1 /// @file htslib/sam.h
2 /// High-level SAM/BAM/CRAM sequence file operations.
3 /*
4     Copyright (C) 2008, 2009, 2013-2021 Genome Research Ltd.
5     Copyright (C) 2010, 2012, 2013 Broad Institute.
6 
7     Author: Heng Li <lh3@sanger.ac.uk>
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.  */
26 
27 module htslib.sam;
28 
29 import core.stdc.stddef;
30 import core.stdc.stdlib;
31 import std.format: format;
32 
33 import htslib.hts;
34 import htslib.hts_log;
35 import htslib.bgzf : BGZF;
36 import htslib.kstring : kstring_t, ssize_t;
37 
38 @system:
39 extern (C):
40 @nogc nothrow {
41 
42 /// Highest SAM format version supported by this library
43 enum SAM_FORMAT_VERSION = "1.6";
44 
45 /***************************
46  *** SAM/BAM/CRAM header ***
47  ***************************/
48 
49 /*! @typedef
50  * @abstract Header extension structure, grouping a collection
51  *  of hash tables that contain the parsed header data.
52  */
53 
54 struct sam_hrecs_t;
55 
56 /*! @typedef
57  @abstract Structure for the alignment header.
58  @field n_targets   number of reference sequences
59  @field l_text      length of the plain text in the header (may be zero if
60                     the header has been edited)
61  @field target_len  lengths of the reference sequences
62  @field target_name names of the reference sequences
63  @field text        plain text (may be NULL if the header has been edited)
64  @field sdict       header dictionary
65  @field hrecs       pointer to the extended header struct (internal use only)
66  @field ref_count   reference count
67 
68  @note The text and l_text fields are included for backwards compatibility.
69  These fields may be set to NULL and zero respectively as a side-effect
70  of calling some header API functions.  New code that needs to access the
71  header text should use the sam_hdr_str() and sam_hdr_length() functions
72  instead of these fields.
73  */
74 
75 struct sam_hdr_t
76 {
77     int n_targets;
78     int ignore_sam_err;
79     size_t l_text;
80     uint* target_len;
81     const(byte)* cigar_tab;
82     char** target_name;
83     char* text;
84     void* sdict;
85     sam_hrecs_t* hrecs;
86     uint ref_count;
87 }
88 
89 /*! @typedef
90  * @abstract Old name for compatibility with existing code.
91  */
92 alias bam_hdr_t = sam_hdr_t;
93 
94 /****************************
95  *** CIGAR related macros ***
96  ****************************/
97 
98 enum BAM_CMATCH = 0;
99 enum BAM_CINS = 1;
100 enum BAM_CDEL = 2;
101 enum BAM_CREF_SKIP = 3;
102 enum BAM_CSOFT_CLIP = 4;
103 enum BAM_CHARD_CLIP = 5;
104 enum BAM_CPAD = 6;
105 enum BAM_CEQUAL = 7;
106 enum BAM_CDIFF = 8;
107 enum BAM_CBACK = 9;
108 
109 enum BAM_CIGAR_STR = "MIDNSHP=XB";
110 enum BAM_CIGAR_SHIFT = 4;
111 enum BAM_CIGAR_MASK = 0xf;
112 enum BAM_CIGAR_TYPE = 0x3C1A7;
113 
114 /*! @abstract Table for converting a CIGAR operator character to BAM_CMATCH etc.
115 Result is operator code or -1. Be sure to cast the index if it is a plain char:
116     int op = bam_cigar_table[(unsigned char) ch];
117 */
118 extern __gshared const(byte)[256] bam_cigar_table;
119 
120 extern (D) auto bam_cigar_op(T)(auto ref T c)
121 {
122     return c & BAM_CIGAR_MASK;
123 }
124 
125 extern (D) auto bam_cigar_oplen(T)(auto ref T c)
126 {
127     return c >> BAM_CIGAR_SHIFT;
128 }
129 
130 // Note that BAM_CIGAR_STR is padded to length 16 bytes below so that
131 // the array look-up will not fall off the end.  '?' is chosen as the
132 // padding character so it's easy to spot if one is emitted, and will
133 // result in a parsing failure (in sam_parse1(), at least) if read.
134 extern (D) auto bam_cigar_gen(T0, T1)(auto ref T0 l, auto ref T1 o)
135 {
136     return l << BAM_CIGAR_SHIFT | o;
137 }
138 
139 /* bam_cigar_type returns a bit flag with:
140  *   bit 1 set if the cigar operation consumes the query
141  *   bit 2 set if the cigar operation consumes the reference
142  *
143  * For reference, the unobfuscated truth table for this function is:
144  * BAM_CIGAR_TYPE  QUERY  REFERENCE
145  * --------------------------------
146  * BAM_CMATCH      1      1
147  * BAM_CINS        1      0
148  * BAM_CDEL        0      1
149  * BAM_CREF_SKIP   0      1
150  * BAM_CSOFT_CLIP  1      0
151  * BAM_CHARD_CLIP  0      0
152  * BAM_CPAD        0      0
153  * BAM_CEQUAL      1      1
154  * BAM_CDIFF       1      1
155  * BAM_CBACK       0      0
156  * --------------------------------
157  */
158 extern (D) auto bam_cigar_type(T)(auto ref T o)
159 {
160     return BAM_CIGAR_TYPE >> (o << 1) & 3;
161 } // bit 1: consume query; bit 2: consume reference
162 
163 /*! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */
164 enum BAM_FPAIRED = 1;
165 /*! @abstract the read is mapped in a proper pair */
166 enum BAM_FPROPER_PAIR = 2;
167 /*! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */
168 enum BAM_FUNMAP = 4;
169 /*! @abstract the mate is unmapped */
170 enum BAM_FMUNMAP = 8;
171 /*! @abstract the read is mapped to the reverse strand */
172 enum BAM_FREVERSE = 16;
173 /*! @abstract the mate is mapped to the reverse strand */
174 enum BAM_FMREVERSE = 32;
175 /*! @abstract this is read1 */
176 enum BAM_FREAD1 = 64;
177 /*! @abstract this is read2 */
178 enum BAM_FREAD2 = 128;
179 /*! @abstract not primary alignment */
180 enum BAM_FSECONDARY = 256;
181 /*! @abstract QC failure */
182 enum BAM_FQCFAIL = 512;
183 /*! @abstract optical or PCR duplicate */
184 enum BAM_FDUP = 1024;
185 /*! @abstract supplementary alignment */
186 enum BAM_FSUPPLEMENTARY = 2048;
187 
188 /*************************
189  *** Alignment records ***
190  *************************/
191 
192 /*
193  * Assumptions made here.  While pos can be 64-bit, no sequence
194  * itself is that long, but due to ref skip CIGAR fields it
195  * may span more than that.  (CIGAR itself is 28-bit len + 4 bit
196  * type, but in theory we can combine multiples together.)
197  *
198  * Mate position and insert size also need to be 64-bit, but
199  * we won't accept more than 32-bit for tid.
200  *
201  * The bam_core_t structure is the *in memory* layout and not
202  * the same as the on-disk format.  64-bit changes here permit
203  * SAM to work with very long chromosomes and permit BAM and CRAM
204  * to seamlessly update in the future without further API/ABI
205  * revisions.
206  */
207 
208 /*! @typedef
209  @abstract Structure for core alignment information.
210  @field  pos     0-based leftmost coordinate
211  @field  tid     chromosome ID, defined by sam_hdr_t
212  @field  bin     bin calculated by bam_reg2bin()
213  @field  qual    mapping quality
214  @field  l_extranul length of extra NULs between qname & cigar (for alignment)
215  @field  flag    bitwise flag
216  @field  l_qname length of the query name
217  @field  n_cigar number of CIGAR operations
218  @field  l_qseq  length of the query sequence (read)
219  @field  mtid    chromosome ID of next read in template, defined by sam_hdr_t
220  @field  mpos    0-based leftmost coordinate of next read in template
221  @field  isize   observed template length ("insert size")
222  */
223 struct bam1_core_t
224 {
225     hts_pos_t pos;
226     int tid;
227     ushort bin; // NB: invalid on 64-bit pos
228     ubyte qual;
229     ubyte l_extranul;
230     ushort flag;
231     ushort l_qname;
232     uint n_cigar;
233     int l_qseq;
234     int mtid;
235     hts_pos_t mpos;
236     hts_pos_t isize;
237 }
238 
239 /*! @typedef
240  @abstract Structure for one alignment.
241  @field  core       core information about the alignment
242  @field  id
243  @field  data       all variable-length data, concatenated; structure: qname-cigar-seq-qual-aux
244  @field  l_data     current length of bam1_t::data
245  @field  m_data     maximum length of bam1_t::data
246  @field  mempolicy  memory handling policy, see bam_set_mempolicy()
247 
248  @discussion Notes:
249 
250  1. The data blob should be accessed using bam_get_qname, bam_get_cigar,
251     bam_get_seq, bam_get_qual and bam_get_aux macros.  These returns pointers
252     to the start of each type of data.
253  2. qname is terminated by one to four NULs, so that the following
254     cigar data is 32-bit aligned; core.l_qname includes these trailing NULs,
255     while core.l_extranul counts the excess NULs (so 0 <= l_extranul <= 3).
256  3. Cigar data is encoded 4 bytes per CIGAR operation.
257     See the bam_cigar_* macros for manipulation.
258  4. seq is nibble-encoded according to bam_nt16_table.
259     See the bam_seqi macro for retrieving individual bases.
260  5. Per base qualities are stored in the Phred scale with no +33 offset.
261     Ie as per the BAM specification and not the SAM ASCII printable method.
262  */
263 struct bam1_t
264 {
265     import std.bitmanip : bitfields;
266 
267     bam1_core_t core;
268     ulong id;
269     ubyte* data;
270     int l_data;
271     uint m_data;
272 
273     mixin(bitfields!(
274         uint, "mempolicy", 2,
275         uint, "", 30));
276 
277     /* Reserved */
278 }
279 
280 /*! @function
281  @abstract  Get whether the query is on the reverse strand
282  @param  b  pointer to an alignment
283  @return    boolean true if query is on the reverse strand
284  */
285 extern (D) auto bam_is_rev(T)(auto ref T b)
286 {
287     return (b.core.flag & BAM_FREVERSE) != 0;
288 }
289 
290 /*! @function
291  @abstract  Get whether the query's mate is on the reverse strand
292  @param  b  pointer to an alignment
293  @return    boolean true if query's mate on the reverse strand
294  */
295 extern (D) auto bam_is_mrev(T)(auto ref T b)
296 {
297     return (b.core.flag & BAM_FMREVERSE) != 0;
298 }
299 
300 /*! @function
301  @abstract  Get the name of the query
302  @param  b  pointer to an alignment
303  @return    pointer to the name string, null terminated
304  */
305 extern (D) auto bam_get_qname(T)(auto ref T b)
306 {
307     return cast(char*) b.data;
308 }
309 
310 /*! @function
311  @abstract  Get the CIGAR array
312  @param  b  pointer to an alignment
313  @return    pointer to the CIGAR array
314 
315  @discussion In the CIGAR array, each element is a 32-bit integer. The
316  lower 4 bits gives a CIGAR operation and the higher 28 bits keep the
317  length of a CIGAR.
318  */
319 extern (D) auto bam_get_cigar(bam1_t * b)
320 {
321     return cast(uint*) ((*b).data + (*b).core.l_qname);
322 }
323 
324 /*! @function
325  @abstract  Get query sequence
326  @param  b  pointer to an alignment
327  @return    pointer to sequence
328 
329  @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G,
330  8 for T and 15 for N. Two bases are packed in one byte with the base
331  at the higher 4 bits having smaller coordinate on the read. It is
332  recommended to use bam_seqi() macro to get the base.
333  */
334 extern (D) auto bam_get_seq(T)(auto ref T b)
335 {
336     return b.data + (b.core.n_cigar << 2) + b.core.l_qname;
337 }
338 
339 /*! @function
340  @abstract  Get query quality
341  @param  b  pointer to an alignment
342  @return    pointer to quality string
343  */
344 extern (D) auto bam_get_qual(T)(auto ref T b)
345 {
346     return b.data + (b.core.n_cigar << 2) + b.core.l_qname + ((b.core.l_qseq + 1) >> 1);
347 }
348 
349 /*! @function
350  @abstract  Get auxiliary data
351  @param  b  pointer to an alignment
352  @return    pointer to the concatenated auxiliary data
353  */
354 extern (D) auto bam_get_aux(T)(auto ref T b)
355 {
356     return b.data + (b.core.n_cigar << 2) + b.core.l_qname + ((b.core.l_qseq + 1) >> 1) + b.core.l_qseq;
357 }
358 
359 /*! @function
360  @abstract  Get length of auxiliary data
361  @param  b  pointer to an alignment
362  @return    length of the concatenated auxiliary data
363  */
364 extern (D) auto bam_get_l_aux(T)(auto ref T b)
365 {
366     return b.l_data - (b.core.n_cigar << 2) - b.core.l_qname - b.core.l_qseq - ((b.core.l_qseq + 1) >> 1);
367 }
368 
369 /*! @function
370  @abstract  Get a base on read
371  @param  s  Query sequence returned by bam_get_seq()
372  @param  i  The i-th position, 0-based
373  @return    4-bit integer representing the base.
374  */
375 extern (D) auto bam_seqi(T0, T1)(auto ref T0 s, auto ref T1 i)
376 {
377     return s[i >> 1] >> ((~i & 1) << 2) & 0xf;
378 }
379 
380 /*!
381  @abstract  Modifies a single base in the bam structure.
382  @param s   Query sequence returned by bam_get_seq()
383  @param i   The i-th position, 0-based
384  @param b   Base in nt16 nomenclature (see seq_nt16_table)
385 */
386 
387 extern (D) void bam_set_seqi(T0, T1, T3)(auto ref T0 s, auto ref T1 i, auto ref T3 b)
388 {
389     s[i >> 1] = (s[i >> 1] & (0xf0 >> ((~i & 1) << 2))) | cast(ubyte)(b << ((~i & 1) << 2));
390 }
391 
392 /**************************
393  *** Exported functions ***
394  **************************/
395 
396 /***************
397  *** BAM I/O ***
398  ***************/
399 
400 /* Header */
401 
402 /// Generates a new unpopulated header structure.
403 /*!
404  *
405  * @return  A valid pointer to new header on success, NULL on failure
406  *
407  * The sam_hdr_t struct returned by a successful call should be freed
408  * via sam_hdr_destroy() when it is no longer needed.
409  */
410 sam_hdr_t* sam_hdr_init();
411 
412 /// Read the header from a BAM compressed file.
413 /*!
414  * @param fp  File pointer
415  * @return    A valid pointer to new header on success, NULL on failure
416  *
417  * This function only works with BAM files.  It is usually better to use
418  * sam_hdr_read(), which works on SAM, BAM and CRAM files.
419  *
420  * The sam_hdr_t struct returned by a successful call should be freed
421  * via sam_hdr_destroy() when it is no longer needed.
422  */
423 sam_hdr_t* bam_hdr_read(BGZF* fp);
424 
425 /// Writes the header to a BAM file.
426 /*!
427  * @param fp  File pointer
428  * @param h   Header pointer
429  * @return    0 on success, -1 on failure
430  *
431  * This function only works with BAM files.  Use sam_hdr_write() to
432  * write in any of the SAM, BAM or CRAM formats.
433  */
434 int bam_hdr_write(BGZF* fp, const(sam_hdr_t)* h);
435 
436 /*!
437  * Frees the resources associated with a header.
438  */
439 void sam_hdr_destroy(sam_hdr_t* h);
440 
441 /// Duplicate a header structure.
442 /*!
443  * @return  A valid pointer to new header on success, NULL on failure
444  *
445  * The sam_hdr_t struct returned by a successful call should be freed
446  * via sam_hdr_destroy() when it is no longer needed.
447  */
448 sam_hdr_t* sam_hdr_dup(const(sam_hdr_t)* h0);
449 
450 /*!
451  * @abstract Old names for compatibility with existing code.
452  */
453 pragma(inline,true) 
454 sam_hdr_t* bam_hdr_init() { return sam_hdr_init(); }
455 
456 pragma(inline,true) 
457 void bam_hdr_destroy(sam_hdr_t* h) { sam_hdr_destroy(h); }
458 
459 pragma(inline,true) 
460 sam_hdr_t* bam_hdr_dup(const(sam_hdr_t)* h0) { return sam_hdr_dup(h0); }
461 
462 alias samFile = htsFile;
463 
464 /// Create a header from existing text.
465 /*!
466  * @param l_text    Length of text
467  * @param text      Header text
468  * @return A populated sam_hdr_t structure on success; NULL on failure.
469  * @note The text field of the returned header will be NULL, and the l_text
470  * field will be zero.
471  *
472  * The sam_hdr_t struct returned by a successful call should be freed
473  * via sam_hdr_destroy() when it is no longer needed.
474  */
475 sam_hdr_t* sam_hdr_parse(size_t l_text, const(char)* text);
476 
477 /// Read a header from a SAM, BAM or CRAM file.
478 /*!
479  * @param fp    Pointer to a SAM, BAM or CRAM file handle
480  * @return  A populated sam_hdr_t struct on success; NULL on failure.
481  *
482  * The sam_hdr_t struct returned by a successful call should be freed
483  * via sam_hdr_destroy() when it is no longer needed.
484  */
485 sam_hdr_t* sam_hdr_read(samFile* fp);
486 
487 /// Write a header to a SAM, BAM or CRAM file.
488 /*!
489  * @param fp    SAM, BAM or CRAM file header
490  * @param h     Header structure to write
491  * @return  0 on success; -1 on failure
492  */
493 int sam_hdr_write(samFile* fp, const(sam_hdr_t)* h);
494 
495 /// Returns the current length of the header text.
496 /*!
497  * @return  >= 0 on success, SIZE_MAX on failure
498  */
499 size_t sam_hdr_length(sam_hdr_t* h);
500 
501 /// Returns the text representation of the header.
502 /*!
503  * @return  valid char pointer on success, NULL on failure
504  *
505  * The returned string is part of the header structure.  It will remain
506  * valid until a call to a header API function causes the string to be
507  * invalidated, or the header is destroyed.
508  *
509  * The caller should not attempt to free or realloc this pointer.
510  */
511 const(char)* sam_hdr_str(sam_hdr_t* h);
512 
513 /// Returns the number of references in the header.
514 /*!
515  * @return  >= 0 on success, -1 on failure
516  */
517 int sam_hdr_nref(const(sam_hdr_t)* h);
518 
519 /* ==== Line level methods ==== */
520 
521 /// Add formatted lines to an existing header.
522 /*!
523  * @param lines  Full SAM header record, eg "@SQ\tSN:foo\tLN:100", with
524  *               optional new-line. If it contains more than 1 line then
525  *               multiple lines will be added in order
526  * @param len    The maximum length of lines (if an early NUL is not
527  *               encountered). len may be 0 if unknown, in which case
528  *               lines must be NUL-terminated
529  * @return       0 on success, -1 on failure
530  *
531  * The lines will be appended to the end of the existing header
532  * (apart from HD, which always comes first).
533  */
534 int sam_hdr_add_lines(sam_hdr_t* h, const(char)* lines, size_t len);
535 
536 /// Adds a single line to an existing header.
537 /*!
538  * Specify type and one or more key,value pairs, ending with the NULL key.
539  * Eg. sam_hdr_add_line(h, "SQ", "ID", "foo", "LN", "100", NULL).
540  *
541  * @param type  Type of the added line. Eg. "SQ"
542  * @return      0 on success, -1 on failure
543  *
544  * The new line will be added immediately after any others of the same
545  * type, or at the end of the existing header if no lines of the
546  * given type currently exist.  The exception is HD lines, which always
547  * come first.  If an HD line already exists, it will be replaced.
548  */
549 int sam_hdr_add_line(sam_hdr_t* h, const(char)* type, ...);
550 
551 /// Returns a complete line of formatted text for a given type and ID.
552 /*!
553  * @param type      Type of the searched line. Eg. "SQ"
554  * @param ID_key    Tag key defining the line. Eg. "SN"
555  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
556  * @param ks        kstring to hold the result
557  * @return          0 on success;
558  *                 -1 if no matching line is found
559  *                 -2 on other failures
560  *
561  * Puts a complete line of formatted text for a specific header type/ID
562  * combination into @p ks. If ID_key is NULL then it returns the first line of
563  * the specified type.
564  *
565  * Any existing content in @p ks will be overwritten.
566  */
567 int sam_hdr_find_line_id(
568     sam_hdr_t* h,
569     const(char)* type,
570     const(char)* ID_key,
571     const(char)* ID_val,
572     kstring_t* ks);
573 
574 /// Returns a complete line of formatted text for a given type and index.
575 /*!
576  * @param type      Type of the searched line. Eg. "SQ"
577  * @param position  Index in lines of this type (zero-based)
578  * @param ks        kstring to hold the result
579  * @return          0 on success;
580  *                 -1 if no matching line is found
581  *                 -2 on other failures
582  *
583  * Puts a complete line of formatted text for a specific line into @p ks.
584  * The header line is selected using the @p type and @p position parameters.
585  *
586  * Any existing content in @p ks will be overwritten.
587  */
588 int sam_hdr_find_line_pos(
589     sam_hdr_t* h,
590     const(char)* type,
591     int pos,
592     kstring_t* ks);
593 
594 /// Remove a line with given type / id from a header
595 /*!
596  * @param type      Type of the searched line. Eg. "SQ"
597  * @param ID_key    Tag key defining the line. Eg. "SN"
598  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
599  * @return          0 on success, -1 on error
600  *
601  * Remove a line from the header by specifying a tag:value that uniquely
602  * identifies the line, i.e. the @SQ line containing "SN:ref1".
603  *
604  * \@SQ line is uniquely identified by the SN tag.
605  * \@RG line is uniquely identified by the ID tag.
606  * \@PG line is uniquely identified by the ID tag.
607  * Eg. sam_hdr_remove_line_id(h, "SQ", "SN", "ref1")
608  *
609  * If no key:value pair is specified, the type MUST be followed by a NULL argument and
610  * the first line of the type will be removed, if any.
611  * Eg. sam_hdr_remove_line_id(h, "SQ", NULL, NULL)
612  *
613  * @note Removing \@PG lines is currently unsupported.
614  */
615 int sam_hdr_remove_line_id(
616     sam_hdr_t* h,
617     const(char)* type,
618     const(char)* ID_key,
619     const(char)* ID_value);
620 
621 /// Remove nth line of a given type from a header
622 /*!
623  * @param type     Type of the searched line. Eg. "SQ"
624  * @param position Index in lines of this type (zero-based). E.g. 3
625  * @return         0 on success, -1 on error
626  *
627  * Remove a line from the header by specifying the position in the type
628  * group, i.e. 3rd @SQ line.
629  */
630 int sam_hdr_remove_line_pos(sam_hdr_t* h, const(char)* type, int position);
631 
632 /// Add or update tag key,value pairs in a header line.
633 /*!
634  * @param type      Type of the searched line. Eg. "SQ"
635  * @param ID_key    Tag key defining the line. Eg. "SN"
636  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
637  * @return          0 on success, -1 on error
638  *
639  * Adds or updates tag key,value pairs in a header line.
640  * Eg. for adding M5 tags to @SQ lines or updating sort order for the
641  * @HD line.
642  *
643  * Specify multiple key,value pairs ending in NULL. Eg.
644  * sam_hdr_update_line(h, "RG", "ID", "rg1", "DS", "description", "PG", "samtools", NULL)
645  *
646  * Attempting to update the record name (i.e. @SQ SN or @RG ID) will
647  * work as long as the new name is not already in use, however doing this
648  * on a file opened for reading may produce unexpected results.
649  *
650  * Renaming an @RG record in this way will only change the header.  Alignment
651  * records written later will not be updated automatically even if they
652  * reference the old read group name.
653  *
654  * Attempting to change an @PG ID tag is not permitted.
655  */
656 int sam_hdr_update_line(
657     sam_hdr_t* h,
658     const(char)* type,
659     const(char)* ID_key,
660     const(char)* ID_value,
661     ...);
662 
663 /// Remove all lines of a given type from a header, except the one matching an ID
664 /*!
665  * @param type      Type of the searched line. Eg. "SQ"
666  * @param ID_key    Tag key defining the line. Eg. "SN"
667  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
668  * @return          0 on success, -1 on failure
669  *
670  * Remove all lines of type <type> from the header, except the one
671  * specified by tag:value, i.e. the @SQ line containing "SN:ref1".
672  *
673  * If no line matches the key:value ID, all lines of the given type are removed.
674  * To remove all lines of a given type, use NULL for both ID_key and ID_value.
675  */
676 int sam_hdr_remove_except(
677     sam_hdr_t* h,
678     const(char)* type,
679     const(char)* ID_key,
680     const(char)* ID_value);
681 
682 /// Remove header lines of a given type, except those in a given ID set
683 /*!
684  * @param type  Type of the searched line. Eg. "RG"
685  * @param id    Tag key defining the line. Eg. "ID"
686  * @param rh    Hash set initialised by the caller with the values to be kept.
687  *              See description for how to create this. If @p rh is NULL, all
688  *              lines of this type will be removed.
689  * @return      0 on success, -1 on failure
690  *
691  * Remove all lines of type @p type from the header, except the ones
692  * specified in the hash set @p rh. If @p rh is NULL, all lines of
693  * this type will be removed.
694  * Declaration of @p rh is done using KHASH_SET_INIT_STR macro. Eg.
695  * @code{.c}
696  *              #include "htslib/khash.h"
697  *              KHASH_SET_INIT_STR(keep)
698  *              typedef khash_t(keep) *keephash_t;
699  *
700  *              void your_method() {
701  *                  samFile *sf = sam_open("alignment.bam", "r");
702  *                  sam_hdr_t *h = sam_hdr_read(sf);
703  *                  keephash_t rh = kh_init(keep);
704  *                  int ret = 0;
705  *                  kh_put(keep, rh, strdup("chr2"), &ret);
706  *                  kh_put(keep, rh, strdup("chr3"), &ret);
707  *                  if (sam_hdr_remove_lines(h, "SQ", "SN", rh) == -1)
708  *                      fprintf(stderr, "Error removing lines\n");
709  *                  khint_t k;
710  *                  for (k = 0; k < kh_end(rh); ++k)
711  *                     if (kh_exist(rh, k)) free((char*)kh_key(rh, k));
712  *                  kh_destroy(keep, rh);
713  *                  sam_hdr_destroy(h);
714  *                  sam_close(sf);
715  *              }
716  * @endcode
717  *
718  */
719 int sam_hdr_remove_lines(
720     sam_hdr_t* h,
721     const(char)* type,
722     const(char)* id,
723     void* rh);
724 
725 /// Count the number of lines for a given header type
726 /*!
727  * @param h     BAM header
728  * @param type  Header type to count. Eg. "RG"
729  * @return  Number of lines of this type on success; -1 on failure
730  */
731 int sam_hdr_count_lines(sam_hdr_t* h, const(char)* type);
732 
733 /// Index of the line for the types that have dedicated look-up tables (SQ, RG, PG)
734 /*!
735  * @param h     BAM header
736  * @param type  Type of the searched line. Eg. "RG"
737  * @param key   The value of the identifying key. Eg. "rg1"
738  * @return  0-based index on success; -1 if line does not exist; -2 on failure
739  */
740 int sam_hdr_line_index(sam_hdr_t* bh, const(char)* type, const(char)* key);
741 
742 /// Id key of the line for the types that have dedicated look-up tables (SQ, RG, PG)
743 /*!
744  * @param h     BAM header
745  * @param type  Type of the searched line. Eg. "RG"
746  * @param pos   Zero-based index inside the type group. Eg. 2 (for the third RG line)
747  * @return  Valid key string on success; NULL on failure
748  */
749 const(char)* sam_hdr_line_name(sam_hdr_t* bh, const(char)* type, int pos);
750 
751 /* ==== Key:val level methods ==== */
752 
753 /// Return the value associated with a key for a header line identified by ID_key:ID_val
754 /*!
755  * @param type      Type of the line to which the tag belongs. Eg. "SQ"
756  * @param ID_key    Tag key defining the line. Eg. "SN". Can be NULL, if looking for the first line.
757  * @param ID_value  Tag value associated with the key above. Eg. "ref1". Can be NULL, if ID_key is NULL.
758  * @param key       Key of the searched tag. Eg. "LN"
759  * @param ks        kstring where the value will be written
760  * @return          0 on success
761  *                 -1 if the requested tag does not exist
762  *                 -2 on other errors
763  *
764  * Looks for a specific key in a single SAM header line and writes the
765  * associated value into @p ks.  The header line is selected using the ID_key
766  * and ID_value parameters.  Any pre-existing content in @p ks will be
767  * overwritten.
768  */
769 int sam_hdr_find_tag_id(
770     sam_hdr_t* h,
771     const(char)* type,
772     const(char)* ID_key,
773     const(char)* ID_value,
774     const(char)* key,
775     kstring_t* ks);
776 
777 /// Return the value associated with a key for a header line identified by position
778 /*!
779  * @param type      Type of the line to which the tag belongs. Eg. "SQ"
780  * @param position  Index in lines of this type (zero-based). E.g. 3
781  * @param key       Key of the searched tag. Eg. "LN"
782  * @param ks        kstring where the value will be written
783  * @return          0 on success
784  *                 -1 if the requested tag does not exist
785  *                 -2 on other errors
786  *
787  * Looks for a specific key in a single SAM header line and writes the
788  * associated value into @p ks.  The header line is selected using the @p type
789  * and @p position parameters.  Any pre-existing content in @p ks will be
790  * overwritten.
791  */
792 int sam_hdr_find_tag_pos(
793     sam_hdr_t* h,
794     const(char)* type,
795     int pos,
796     const(char)* key,
797     kstring_t* ks);
798 
799 /// Remove the key from the line identified by type, ID_key and ID_value.
800 /*!
801  * @param type      Type of the line to which the tag belongs. Eg. "SQ"
802  * @param ID_key    Tag key defining the line. Eg. "SN"
803  * @param ID_value  Tag value associated with the key above. Eg. "ref1"
804  * @param key       Key of the targeted tag. Eg. "M5"
805  * @return          1 if the key was removed; 0 if it was not present; -1 on error
806  */
807 int sam_hdr_remove_tag_id(
808     sam_hdr_t* h,
809     const(char)* type,
810     const(char)* ID_key,
811     const(char)* ID_value,
812     const(char)* key);
813 
814 /// Get the target id for a given reference sequence name
815 /*!
816  * @param ref  Reference name
817  * @return     Positive value on success,
818  *             -1 if unknown reference,
819  *             -2 if the header could not be parsed
820  *
821  * Looks up a reference sequence by name in the reference hash table
822  * and returns the numerical target id.
823  */
824 int sam_hdr_name2tid(sam_hdr_t* h, const(char)* ref_);
825 
826 /// Get the reference sequence name from a target index
827 /*!
828  * @param tid  Target index
829  * @return     Valid reference name on success, NULL on failure
830  *
831  * Fetch the reference sequence name from the target name array,
832  * using the numerical target id.
833  */
834 const(char)* sam_hdr_tid2name(const(sam_hdr_t)* h, int tid);
835 
836 /// Get the reference sequence length from a target index
837 /*!
838  * @param tid  Target index
839  * @return     Strictly positive value on success, 0 on failure
840  *
841  * Fetch the reference sequence length from the target length array,
842  * using the numerical target id.
843  */
844 hts_pos_t sam_hdr_tid2len(const(sam_hdr_t)* h, int tid);
845 
846 /// Alias of sam_hdr_name2tid(), for backwards compatibility.
847 /*!
848  * @param ref  Reference name
849  * @return     Positive value on success,
850  *             -1 if unknown reference,
851  *             -2 if the header could not be parsed
852  */
853 pragma(inline,true)
854 int bam_name2id(sam_hdr_t* h, const(char)* ref_) { return sam_hdr_name2tid(h, ref_); }
855 
856 /// Generate a unique \@PG ID: value
857 /*!
858  * @param name  Name of the program. Eg. samtools
859  * @return      Valid ID on success, NULL on failure
860  *
861  * Returns a unique ID from a base name.  The string returned will remain
862  * valid until the next call to this function, or the header is destroyed.
863  * The caller should not attempt to free() or realloc() it.
864  */
865 const(char)* sam_hdr_pg_id(sam_hdr_t* h, const(char)* name);
866 
867 /// Add an \@PG line.
868 /*!
869  * @param name  Name of the program. Eg. samtools
870  * @return      0 on success, -1 on failure
871  *
872  * If we wish complete control over this use sam_hdr_add_line() directly. This
873  * function uses that, but attempts to do a lot of tedious house work for
874  * you too.
875  *
876  * - It will generate a suitable ID if the supplied one clashes.
877  * - It will generate multiple \@PG records if we have multiple PG chains.
878  *
879  * Call it as per sam_hdr_add_line() with a series of key,value pairs ending
880  * in NULL.
881  */
882 int sam_hdr_add_pg(sam_hdr_t* h, const(char)* name, ...);
883 
884 /*!
885  * A function to help with construction of CL tags in @PG records.
886  * Takes an argc, argv pair and returns a single space-separated string.
887  * This string should be deallocated by the calling function.
888  *
889  * @return
890  * Returns malloced char * on success;
891  *         NULL on failure
892  */
893 char* stringify_argv(int argc, char** argv);
894 
895 /// Increments the reference count on a header
896 /*!
897  * This permits multiple files to share the same header, all calling
898  * sam_hdr_destroy when done, without causing errors for other open files.
899  */
900 void sam_hdr_incr_ref(sam_hdr_t* h);
901 
902 /*
903  * Macros for changing the \@HD line. They eliminate the need to use NULL method arguments.
904  */
905 
906 /// Returns the SAM formatted text of the \@HD header line
907 extern (D) auto sam_hdr_find_hd(T0, T1)(auto ref T0 h, auto ref T1 ks)
908 {
909     return sam_hdr_find_line_id(h, "HD", NULL, NULL, ks);
910 }
911 
912 /// Returns the value associated with a given \@HD line tag
913 extern (D) auto sam_hdr_find_tag_hd(T0, T1, T2)(auto ref T0 h, auto ref T1 key, auto ref T2 ks)
914 {
915     return sam_hdr_find_tag_id(h, "HD", NULL, NULL, key, ks);
916 }
917 
918 /// Adds or updates tags on the header \@HD line
919 extern (D) auto sam_hdr_update_hd(T, A...)(auto ref T h, auto ref A a)
920 {
921     // NOTE: This macro was dropped by dstep due to variadic args
922     static assert (a.length %2 == 0);   // K-V pairs => even number of variadic args
923     return sam_hdr_update_line(h, "HD", null, null, a, null);
924 }
925 
926 /// Removes the \@HD line tag with the given key
927 extern (D) auto sam_hdr_remove_tag_hd(T0, T1)(auto ref T0 h, auto ref T1 key)
928 {
929     return sam_hdr_remove_tag_id(h, "HD", NULL, NULL, key);
930 }
931 
932 /* Alignment */
933 
934 /// Create a new bam1_t alignment structure
935 /**
936    @return An empty bam1_t structure on success, NULL on failure
937 
938    The bam1_t struct returned by a successful call should be freed
939    via bam_destroy1() when it is no longer needed.
940  */
941 bam1_t* bam_init1();
942 
943 /// Destroy a bam1_t structure
944 /**
945    @param b  structure to destroy
946 
947    Does nothing if @p b is NULL.  If not, all memory associated with @p b
948    will be freed, along with the structure itself.  @p b should not be
949    accessed after calling this function.
950  */
951 void bam_destroy1(bam1_t* b);
952 
953 enum BAM_USER_OWNS_STRUCT = 1;
954 enum BAM_USER_OWNS_DATA = 2;
955 
956 /// Set alignment record memory policy
957 /**
958    @param b       Alignment record
959    @param policy  Desired policy
960 
961    Allows the way HTSlib reallocates and frees bam1_t data to be
962    changed.  @policy can be set to the bitwise-or of the following
963    values:
964 
965    \li \c BAM_USER_OWNS_STRUCT
966    If this is set then bam_destroy1() will not try to free the bam1_t struct.
967 
968    \li \c BAM_USER_OWNS_DATA
969    If this is set, bam_destroy1() will not free the bam1_t::data pointer.
970    Also, functions which need to expand bam1_t::data memory will change
971    behaviour.  Instead of calling realloc() on the pointer, they will
972    allocate a new data buffer and copy any existing content in to it.
973    The existing memory will \b not be freed.  bam1_t::data will be
974    set to point to the new memory and the BAM_USER_OWNS_DATA flag will be
975    cleared.
976 
977    BAM_USER_OWNS_STRUCT allows bam_destroy1() to be called on bam1_t
978    structures that are members of an array.
979 
980    BAM_USER_OWNS_DATA can be used by applications that want more control
981    over where the variable-length parts of the bam record will be stored.
982    By preventing calls to free() and realloc(), it allows bam1_t::data
983    to hold pointers to memory that cannot be passed to those functions.
984 
985    Example:  Read a block of alignment records, storing the variable-length
986    data in a single buffer and the records in an array.  Stop when either
987    the array or the buffer is full.
988 
989    \code{.c}
990    #define MAX_RECS 1000
991    #define REC_LENGTH 400  // Average length estimate, to get buffer size
992    size_t bufsz = MAX_RECS * REC_LENGTH, nrecs, buff_used = 0;
993    bam1_t *recs = calloc(MAX_RECS, sizeof(bam1_t));
994    uint8_t *buffer = malloc(bufsz);
995    int res = 0, result = EXIT_FAILURE;
996    uint32_t new_m_data;
997 
998    if (!recs || !buffer) goto cleanup;
999    for (nrecs = 0; nrecs < MAX_RECS; nrecs++) {
1000       bam_set_mempolicy(&recs[nrecs], BAM_USER_OWNS_STRUCT|BAM_USER_OWNS_DATA);
1001 
1002       // Set data pointer to unused part of buffer
1003       recs[nrecs].data = &buffer[buff_used];
1004 
1005       // Set m_data to size of unused part of buffer.  On 64-bit platforms it
1006       // will be necessary to limit this to UINT32_MAX due to the size of
1007       // bam1_t::m_data (not done here as our buffer is only 400K).
1008       recs[nrecs].m_data = bufsz - buff_used;
1009 
1010       // Read the record
1011       res = sam_read1(file_handle, header, &recs[nrecs]);
1012       if (res <= 0) break; // EOF or error
1013 
1014       // Check if the record data didn't fit - if not, stop reading
1015       if ((bam_get_mempolicy(&recs[nrecs]) & BAM_USER_OWNS_DATA) == 0) {
1016          nrecs++; // Include last record in count
1017          break;
1018       }
1019 
1020       // Adjust m_data to the space actually used.  If space is available,
1021       // round up to eight bytes so the next record aligns nicely.
1022       new_m_data = ((uint32_t) recs[nrecs].l_data + 7) & (~7U);
1023       if (new_m_data < recs[nrecs].m_data) recs[nrecs].m_data = new_m_data;
1024 
1025       buff_used += recs[nrecs].m_data;
1026    }
1027    if (res < 0) goto cleanup;
1028    result = EXIT_SUCCESS;
1029 
1030    // ... use data ...
1031 
1032  cleanup:
1033    for (size_t i = 0; i < nrecs; i++)
1034      bam_destroy1(i);
1035    free(buffer);
1036    free(recs);
1037 
1038    \endcode
1039 */
1040 pragma(inline,true)
1041 void bam_set_mempolicy(bam1_t* b, uint policy) {
1042     b.mempolicy = policy;
1043 }
1044 
1045 /// Get alignment record memory policy
1046 /** @param b    Alignment record
1047 
1048     See bam_set_mempolicy()
1049  */
1050 pragma(inline,true)
1051 uint bam_get_mempolicy(bam1_t* b) {
1052     return b.mempolicy;
1053 }
1054 
1055 /// Read a BAM format alignment record
1056 /**
1057    @param fp   BGZF file being read
1058    @param b    Destination for the alignment data
1059    @return number of bytes read on success
1060            -1 at end of file
1061            < -1 on failure
1062 
1063    This function can only read BAM format files.  Most code should use
1064    sam_read1() instead, which can be used with BAM, SAM and CRAM formats.
1065 */
1066 int bam_read1(BGZF* fp, bam1_t* b);
1067 
1068 /// Write a BAM format alignment record
1069 /**
1070    @param fp  BGZF file being written
1071    @param b   Alignment record to write
1072    @return number of bytes written on success
1073            -1 on error
1074 
1075    This function can only write BAM format files.  Most code should use
1076    sam_write1() instead, which can be used with BAM, SAM and CRAM formats.
1077 */
1078 int bam_write1(BGZF* fp, const(bam1_t)* b);
1079 
1080 /// Copy alignment record data
1081 /**
1082    @param bdst  Destination alignment record
1083    @param bsrc  Source alignment record
1084    @return bdst on success; NULL on failure
1085  */
1086 bam1_t* bam_copy1(bam1_t* bdst, const(bam1_t)* bsrc);
1087 
1088 /// Create a duplicate alignment record
1089 /**
1090    @param bsrc  Source alignment record
1091    @return Pointer to a new alignment record on success; NULL on failure
1092 
1093    The bam1_t struct returned by a successful call should be freed
1094    via bam_destroy1() when it is no longer needed.
1095  */
1096 bam1_t* bam_dup1(const(bam1_t)* bsrc);
1097 
1098 /// Sets all components of an alignment structure
1099 /**
1100    @param bam      Target alignment structure. Must be initialized by a call to bam_init1().
1101                    The data field will be reallocated automatically as needed.
1102    @param l_qname  Length of the query name. If set to 0, the placeholder query name "*" will be used.
1103    @param qname    Query name, may be NULL if l_qname = 0
1104    @param flag     Bitwise flag, a combination of the BAM_F* constants.
1105    @param tid      Chromosome ID, defined by sam_hdr_t (a.k.a. RNAME).
1106    @param pos      0-based leftmost coordinate.
1107    @param mapq     Mapping quality.
1108    @param n_cigar  Number of CIGAR operations.
1109    @param cigar    CIGAR data, may be NULL if n_cigar = 0.
1110    @param mtid     Chromosome ID of next read in template, defined by sam_hdr_t (a.k.a. RNEXT).
1111    @param mpos     0-based leftmost coordinate of next read in template (a.k.a. PNEXT).
1112    @param isize    Observed template length ("insert size") (a.k.a. TLEN).
1113    @param l_seq    Length of the query sequence (read) and sequence quality string.
1114    @param seq      Sequence, may be NULL if l_seq = 0.
1115    @param qual     Sequence quality, may be NULL.
1116    @param l_aux    Length to be reserved for auxiliary field data, may be 0.
1117 
1118    @return >= 0 on success (number of bytes written to bam->data), negative (with errno set) on failure.
1119 */
1120 int bam_set1(
1121     bam1_t* bam,
1122     size_t l_qname,
1123     const(char)* qname,
1124     ushort flag,
1125     int tid,
1126     hts_pos_t pos,
1127     ubyte mapq,
1128     size_t n_cigar,
1129     const(uint)* cigar,
1130     int mtid,
1131     hts_pos_t mpos,
1132     hts_pos_t isize,
1133     size_t l_seq,
1134     const(char)* seq,
1135     const(char)* qual,
1136     size_t l_aux);
1137 
1138 /// Calculate query length from CIGAR data
1139 /**
1140    @param n_cigar   Number of items in @p cigar
1141    @param cigar     CIGAR data
1142    @return Query length
1143 
1144    CIGAR data is stored as in the BAM format, i.e. (op_len << 4) | op
1145    where op_len is the length in bases and op is a value between 0 and 8
1146    representing one of the operations "MIDNSHP=X" (M = 0; X = 8)
1147 
1148    This function returns the sum of the lengths of the M, I, S, = and X
1149    operations in @p cigar (these are the operations that "consume" query
1150    bases).  All other operations (including invalid ones) are ignored.
1151 
1152    @note This return type of this function is hts_pos_t so that it can
1153    correctly return the length of CIGAR sequences including many long
1154    operations without overflow. However, other restrictions (notably the sizes
1155    of bam1_core_t::l_qseq and bam1_t::data) limit the maximum query sequence
1156    length supported by HTSlib to fewer than INT_MAX bases.
1157  */
1158 hts_pos_t bam_cigar2qlen(int n_cigar, const(uint)* cigar);
1159 
1160 /// Calculate reference length from CIGAR data
1161 /**
1162    @param n_cigar   Number of items in @p cigar
1163    @param cigar     CIGAR data
1164    @return Reference length
1165 
1166    CIGAR data is stored as in the BAM format, i.e. (op_len << 4) | op
1167    where op_len is the length in bases and op is a value between 0 and 8
1168    representing one of the operations "MIDNSHP=X" (M = 0; X = 8)
1169 
1170    This function returns the sum of the lengths of the M, D, N, = and X
1171    operations in @p cigar (these are the operations that "consume" reference
1172    bases).  All other operations (including invalid ones) are ignored.
1173  */
1174 hts_pos_t bam_cigar2rlen(int n_cigar, const(uint)* cigar);
1175 
1176 /*!
1177       @abstract Calculate the rightmost base position of an alignment on the
1178       reference genome.
1179 
1180       @param  b  pointer to an alignment
1181       @return    the coordinate of the first base after the alignment, 0-based
1182 
1183       @discussion For a mapped read, this is just b->core.pos + bam_cigar2rlen.
1184       For an unmapped read (either according to its flags or if it has no cigar
1185       string) or a read whose cigar string consumes no reference bases at all,
1186       we return b->core.pos + 1 by convention.
1187  */
1188 hts_pos_t bam_endpos(const(bam1_t)* b);
1189 
1190 int bam_str2flag(const(char)* str); /** returns negative value on error */
1191 
1192 char* bam_flag2str(int flag); /** The string must be freed by the user */
1193 
1194 /*! @function
1195  @abstract  Set the name of the query
1196  @param  b  pointer to an alignment
1197  @return    0 on success, -1 on failure
1198  */
1199 int bam_set_qname(bam1_t* b, const(char)* qname);
1200 
1201 /*! @function
1202  @abstract  Parse a CIGAR string into a uint32_t array
1203  @param  in      [in]  pointer to the source string
1204  @param  end     [out] address of the pointer to the new end of the input string
1205                        can be NULL
1206  @param  a_cigar [in/out]  address of the destination uint32_t buffer
1207  @param  a_mem   [in/out]  address of the allocated number of buffer elements
1208  @return         number of processed CIGAR operators; -1 on error
1209  */
1210 ssize_t sam_parse_cigar(
1211     const(char)* in_,
1212     char** end,
1213     uint** a_cigar,
1214     size_t* a_mem);
1215 
1216 /*! @function
1217  @abstract  Parse a CIGAR string into a bam1_t struct
1218  @param  in      [in]  pointer to the source string
1219  @param  end     [out] address of the pointer to the new end of the input string
1220                        can be NULL
1221  @param  b       [in/out]  address of the destination bam1_t struct
1222  @return         number of processed CIGAR operators; -1 on error
1223  */
1224 ssize_t bam_parse_cigar(const(char)* in_, char** end, bam1_t* b);
1225 
1226 /*************************
1227  *** BAM/CRAM indexing ***
1228  *************************/
1229 
1230 // These BAM iterator functions work only on BAM files.  To work with either
1231 // BAM or CRAM files use the sam_index_load() & sam_itr_*() functions.
1232 alias bam_itr_destroy = hts_itr_destroy;
1233 alias bam_itr_queryi = sam_itr_queryi;
1234 alias bam_itr_querys = sam_itr_querys;
1235 
1236 pragma(inline, true)
1237 extern (D) auto bam_itr_next(T0, T1, T2)(auto ref T0 htsfp, auto ref T1 itr, auto ref T2 r)
1238 {
1239     return hts_itr_next(htsfp.fp.bgzf, itr, r, 0);
1240 }
1241 
1242 // Load/build .csi or .bai BAM index file.  Does not work with CRAM.
1243 // It is recommended to use the sam_index_* functions below instead.
1244 extern (D) auto bam_index_load(T)(auto ref T fn)
1245 {
1246     return hts_idx_load(fn, HTS_FMT_BAI);
1247 }
1248 
1249 extern (D) auto bam_index_build(T0, T1)(auto ref T0 fn, auto ref T1 min_shift)
1250 {
1251     return sam_index_build(fn, min_shift);
1252 }
1253 
1254 /// Initialise fp->idx for the current format type for SAM, BAM and CRAM types .
1255 /** @param fp        File handle for the data file being written.
1256     @param h         Bam header structured (needed for BAI and CSI).
1257     @param min_shift 0 for BAI, or larger for CSI (CSI defaults to 14).
1258     @param fnidx     Filename to write index to.  This pointer must remain valid
1259                      until after sam_idx_save is called.
1260     @return          0 on success, <0 on failure.
1261 
1262     @note This must be called after the header has been written, but before
1263           any other data.
1264 */
1265 int sam_idx_init(htsFile* fp, sam_hdr_t* h, int min_shift, const(char)* fnidx);
1266 
1267 /// Writes the index initialised with sam_idx_init to disk.
1268 /** @param fp        File handle for the data file being written.
1269     @return          0 on success, <0 on failure.
1270 */
1271 int sam_idx_save(htsFile* fp);
1272 
1273 /// Load a BAM (.csi or .bai) or CRAM (.crai) index file
1274 /** @param fp  File handle of the data file whose index is being opened
1275     @param fn  BAM/CRAM/etc filename to search alongside for the index file
1276     @return  The index, or NULL if an error occurred.
1277 
1278 Equivalent to sam_index_load3(fp, fn, NULL, HTS_IDX_SAVE_REMOTE);
1279 */
1280 hts_idx_t* sam_index_load(htsFile* fp, const(char)* fn);
1281 
1282 /// Load a specific BAM (.csi or .bai) or CRAM (.crai) index file
1283 /** @param fp     File handle of the data file whose index is being opened
1284     @param fn     BAM/CRAM/etc data file filename
1285     @param fnidx  Index filename, or NULL to search alongside @a fn
1286     @return  The index, or NULL if an error occurred.
1287 
1288 Equivalent to sam_index_load3(fp, fn, fnidx, HTS_IDX_SAVE_REMOTE);
1289 */
1290 hts_idx_t* sam_index_load2(htsFile* fp, const(char)* fn, const(char)* fnidx);
1291 
1292 /// Load or stream a BAM (.csi or .bai) or CRAM (.crai) index file
1293 /** @param fp     File handle of the data file whose index is being opened
1294     @param fn     BAM/CRAM/etc data file filename
1295     @param fnidx  Index filename, or NULL to search alongside @a fn
1296     @param flags  Flags to alter behaviour (see description)
1297     @return  The index, or NULL if an error occurred.
1298 
1299 The @p flags parameter can be set to a combination of the following values:
1300 
1301         HTS_IDX_SAVE_REMOTE   Save a local copy of any remote indexes
1302         HTS_IDX_SILENT_FAIL   Fail silently if the index is not present
1303 
1304 Note that HTS_IDX_SAVE_REMOTE has no effect for remote CRAM indexes.  They
1305 are always downloaded and never cached locally.
1306 
1307 The index struct returned by a successful call should be freed
1308 via hts_idx_destroy() when it is no longer needed.
1309 */
1310 hts_idx_t* sam_index_load3(
1311     htsFile* fp,
1312     const(char)* fn,
1313     const(char)* fnidx,
1314     int flags);
1315 
1316 /// Generate and save an index file
1317 /** @param fn        Input BAM/etc filename, to which .csi/etc will be added
1318     @param min_shift Positive to generate CSI, or 0 to generate BAI
1319     @return  0 if successful, or negative if an error occurred (usually -1; or
1320              -2: opening fn failed; -3: format not indexable; -4:
1321              failed to create and/or save the index)
1322 */
1323 int sam_index_build(const(char)* fn, int min_shift);
1324 
1325 /// Generate and save an index to a specific file
1326 /** @param fn        Input BAM/CRAM/etc filename
1327     @param fnidx     Output filename, or NULL to add .bai/.csi/etc to @a fn
1328     @param min_shift Positive to generate CSI, or 0 to generate BAI
1329     @return  0 if successful, or negative if an error occurred (see
1330              sam_index_build for error codes)
1331 */
1332 int sam_index_build2(const(char)* fn, const(char)* fnidx, int min_shift);
1333 
1334 /// Generate and save an index to a specific file
1335 /** @param fn        Input BAM/CRAM/etc filename
1336     @param fnidx     Output filename, or NULL to add .bai/.csi/etc to @a fn
1337     @param min_shift Positive to generate CSI, or 0 to generate BAI
1338     @param nthreads  Number of threads to use when building the index
1339     @return  0 if successful, or negative if an error occurred (see
1340              sam_index_build for error codes)
1341 */
1342 int sam_index_build3(
1343     const(char)* fn,
1344     const(char)* fnidx,
1345     int min_shift,
1346     int nthreads);
1347 
1348 /// Free a SAM iterator
1349 /// @param iter     Iterator to free
1350 alias sam_itr_destroy = hts_itr_destroy;
1351 
1352 /// Create a BAM/CRAM iterator
1353 /** @param idx     Index
1354     @param tid     Target id
1355     @param beg     Start position in target
1356     @param end     End position in target
1357     @return An iterator on success; NULL on failure
1358 
1359 The following special values (defined in htslib/hts.h)can be used for @p tid.
1360 When using one of these values, @p beg and @p end are ignored.
1361 
1362   HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file
1363   HTS_IDX_START  iterates over the entire file
1364   HTS_IDX_REST   iterates from the current position to the end of the file
1365   HTS_IDX_NONE   always returns "no more alignment records"
1366 
1367 When using HTS_IDX_REST or HTS_IDX_NONE, NULL can be passed in to @p idx.
1368  */
1369 hts_itr_t* sam_itr_queryi(
1370     const(hts_idx_t)* idx,
1371     int tid,
1372     hts_pos_t beg,
1373     hts_pos_t end);
1374 
1375 /// Create a SAM/BAM/CRAM iterator
1376 /** @param idx     Index
1377     @param hdr     Header
1378     @param region  Region specification
1379     @return An iterator on success; NULL on failure
1380 
1381 Regions are parsed by hts_parse_reg(), and take one of the following forms:
1382 
1383 region          | Outputs
1384 --------------- | -------------
1385 REF             | All reads with RNAME REF
1386 REF:            | All reads with RNAME REF
1387 REF:START       | Reads with RNAME REF overlapping START to end of REF
1388 REF:-END        | Reads with RNAME REF overlapping start of REF to END
1389 REF:START-END   | Reads with RNAME REF overlapping START to END
1390 .               | All reads from the start of the file
1391 *               | Unmapped reads at the end of the file (RNAME '*' in SAM)
1392 
1393 The form `REF:` should be used when the reference name itself contains a colon.
1394 
1395 Note that SAM files must be bgzf-compressed for iterators to work.
1396  */
1397 hts_itr_t* sam_itr_querys(
1398     const(hts_idx_t)* idx,
1399     sam_hdr_t* hdr,
1400     const(char)* region);
1401 
1402 /// Create a multi-region iterator
1403 /** @param idx       Index
1404     @param hdr       Header
1405     @param reglist   Array of regions to iterate over
1406     @param regcount  Number of items in reglist
1407 
1408 Each @p reglist entry should have the reference name in the `reg` field, an
1409 array of regions for that reference in `intervals` and the number of items
1410 in `intervals` should be stored in `count`.  No other fields need to be filled
1411 in.
1412 
1413 The iterator will return all reads overlapping the given regions.  If a read
1414 overlaps more than one region, it will only be returned once.
1415  */
1416 hts_itr_t* sam_itr_regions(
1417     const(hts_idx_t)* idx,
1418     sam_hdr_t* hdr,
1419     hts_reglist_t* reglist,
1420     uint regcount);
1421 
1422 /// Create a multi-region iterator
1423 /** @param idx       Index
1424     @param hdr       Header
1425     @param regarray  Array of ref:interval region specifiers
1426     @param regcount  Number of items in regarray
1427 
1428 Each @p regarray entry is parsed by hts_parse_reg(), and takes one of the
1429 following forms:
1430 
1431 region          | Outputs
1432 --------------- | -------------
1433 REF             | All reads with RNAME REF
1434 REF:            | All reads with RNAME REF
1435 REF:START       | Reads with RNAME REF overlapping START to end of REF
1436 REF:-END        | Reads with RNAME REF overlapping start of REF to END
1437 REF:START-END   | Reads with RNAME REF overlapping START to END
1438 .               | All reads from the start of the file
1439 *               | Unmapped reads at the end of the file (RNAME '*' in SAM)
1440 
1441 The form `REF:` should be used when the reference name itself contains a colon.
1442 
1443 The iterator will return all reads overlapping the given regions.  If a read
1444 overlaps more than one region, it will only be returned once.
1445  */
1446 hts_itr_t* sam_itr_regarray(
1447     const(hts_idx_t)* idx,
1448     sam_hdr_t* hdr,
1449     char** regarray,
1450     uint regcount);
1451 
1452 } /// closing @nogc
1453 
1454 /// Get the next read from a SAM/BAM/CRAM iterator
1455 /** @param htsfp       Htsfile pointer for the input file
1456     @param itr         Iterator
1457     @param r           Pointer to a bam1_t struct
1458     @return >= 0 on success; -1 when there is no more data; < -1 on error
1459  */
1460 pragma(inline,true)
1461 int sam_itr_next(htsFile* htsfp, hts_itr_t* itr, bam1_t* r) {
1462     if (!htsfp.is_bgzf && !htsfp.is_cram) {
1463         hts_log_error(__FUNCTION__, format("%s not BGZF compressed", htsfp.fn ? htsfp.fn : "File"));
1464         return -2;
1465     }
1466     if (!itr) {
1467         hts_log_error(__FUNCTION__,"Null iterator");
1468         return -2;
1469     }
1470 
1471     if (itr.multi)
1472         return hts_itr_multi_next(htsfp, itr, r);
1473     else
1474         return hts_itr_next(htsfp.is_bgzf ? htsfp.fp.bgzf : null, itr, r, htsfp);
1475 }
1476 
1477 @nogc nothrow:
1478 
1479 /// Get the next read from a BAM/CRAM multi-iterator
1480 /** @param htsfp       Htsfile pointer for the input file
1481     @param itr         Iterator
1482     @param r           Pointer to a bam1_t struct
1483     @return >= 0 on success; -1 when there is no more data; < -1 on error
1484  */
1485 alias sam_itr_multi_next = sam_itr_next;
1486 
1487 const(char)* sam_parse_region(
1488     sam_hdr_t* h,
1489     const(char)* s,
1490     int* tid,
1491     hts_pos_t* beg,
1492     hts_pos_t* end,
1493     int flags);
1494 
1495 /***************
1496  *** SAM I/O ***
1497  ***************/
1498 
1499 alias sam_open = hts_open;
1500 alias sam_open_format = hts_open_format;
1501 
1502 alias sam_close = hts_close;
1503 
1504 int sam_open_mode(char* mode, const(char)* fn, const(char)* format);
1505 
1506 // A version of sam_open_mode that can handle ,key=value options.
1507 // The format string is allocated and returned, to be freed by the caller.
1508 // Prefix should be "r" or "w",
1509 char* sam_open_mode_opts(
1510     const(char)* fn,
1511     const(char)* mode,
1512     const(char)* format);
1513 
1514 int sam_hdr_change_HD(sam_hdr_t* h, const(char)* key, const(char)* val);
1515 
1516 int sam_parse1(kstring_t* s, sam_hdr_t* h, bam1_t* b);
1517 int sam_format1(const(sam_hdr_t)* h, const(bam1_t)* b, kstring_t* str);
1518 
1519 /// sam_read1 - Read a record from a file
1520 /** @param fp   Pointer to the source file
1521  *  @param h    Pointer to the header previously read (fully or partially)
1522  *  @param b    Pointer to the record placeholder
1523  *  @return >= 0 on successfully reading a new record, -1 on end of stream, < -1 on error
1524  */
1525 int sam_read1(samFile* fp, sam_hdr_t* h, bam1_t* b);
1526 /// sam_write1 - Write a record to a file
1527 /** @param fp    Pointer to the destination file
1528  *  @param h     Pointer to the header structure previously read
1529  *  @param b     Pointer to the record to be written
1530  *  @return >= 0 on successfully writing the record, -1 on error
1531  */
1532 int sam_write1(samFile* fp, const(sam_hdr_t)* h, const(bam1_t)* b);
1533 
1534 // Forward declaration, see hts_expr.h for full.
1535 struct hts_filter_t;
1536 
1537 /// sam_passes_filter - Checks whether a record passes an hts_filter.
1538 /** @param h      Pointer to the header structure previously read
1539  *  @param b      Pointer to the BAM record to be checked
1540  *  @param filt   Pointer to the filter, created from hts_filter_init.
1541  *  @return       1 if passes, 0 if not, and <0 on error.
1542  */
1543 int sam_passes_filter(
1544     const(sam_hdr_t)* h,
1545     const(bam1_t)* b,
1546     hts_filter_t* filt);
1547 
1548 /*************************************
1549  *** Manipulating auxiliary fields ***
1550  *************************************/
1551 
1552 /// Converts a BAM aux tag to SAM format
1553 /*
1554  * @param b    Pointer to the bam record
1555  * @param key  Two letter tag key
1556  * @param type Single letter type code: ACcSsIifHZB.
1557  * @param tag  Tag data pointer, in BAM format
1558  * @param end  Pointer to end of bam record (largest extent of tag)
1559  * @param ks   kstring to write the formatted tag to
1560  *
1561  * @return pointer to end of tag on success,
1562  *         NULL on failure.
1563  *
1564  * @discussion The three separate parameters key, type, tag may be
1565  * derived from a s=bam_aux_get() query as s-2, *s and s+1.  However
1566  * it is recommended to use bam_aux_get_str in this situation.
1567  * The desire to split these parameters up is for potential processing
1568  * of non-BAM formats that encode using a BAM type mechanism
1569  * (such as the internal CRAM representation).
1570  */
1571 
1572 // brevity and consistency with other code.
1573 
1574 // NB: "d" is not an official type in the SAM spec.
1575 // However for unknown reasons samtools has always supported this.
1576 // We believe, HOPE, it is not in general usage and we do not
1577 // encourage it.
1578 
1579 // or externalise sam.c's aux_type2size function?
1580 
1581 // now points to the start of the array
1582 
1583 // write the type
1584 
1585 // Unknown type
1586 const(ubyte)* sam_format_aux1(
1587     const(ubyte)* key,
1588     const ubyte type,
1589     const(ubyte)* tag,
1590     const(ubyte)* end,
1591     kstring_t* ks);
1592 
1593 /// Return a pointer to an aux record
1594 /** @param b   Pointer to the bam record
1595     @param tag Desired aux tag
1596     @return Pointer to the tag data, or NULL if tag is not present or on error
1597     If the tag is not present, this function returns NULL and sets errno to
1598     ENOENT.  If the bam record's aux data is corrupt (either a tag has an
1599     invalid type, or the last record is incomplete) then errno is set to
1600     EINVAL and NULL is returned.
1601  */
1602 ubyte* bam_aux_get(const(bam1_t)* b, ref const(char)[2] tag);
1603 
1604 /// Return a SAM formatting string containing a BAM tag
1605 /** @param b   Pointer to the bam record
1606     @param tag Desired aux tag
1607     @param s   The kstring to write to.
1608 
1609     @return 1 on success,
1610             0 on no tag found with errno = ENOENT,
1611            -1 on error (errno will be either EINVAL or ENOMEM).
1612  */
1613 int bam_aux_get_str(const(bam1_t)* b, ref const(char)[2] tag, kstring_t* s);
1614 
1615 /// Get an integer aux value
1616 /** @param s Pointer to the tag data, as returned by bam_aux_get()
1617     @return The value, or 0 if the tag was not an integer type
1618     If the tag is not an integer type, errno is set to EINVAL.  This function
1619     will not return the value of floating-point tags.
1620 */
1621 long bam_aux2i(const(ubyte)* s);
1622 
1623 /// Get an integer aux value
1624 /** @param s Pointer to the tag data, as returned by bam_aux_get()
1625     @return The value, or 0 if the tag was not an integer type
1626     If the tag is not an numeric type, errno is set to EINVAL.  The value of
1627     integer flags will be returned cast to a double.
1628 */
1629 double bam_aux2f(const(ubyte)* s);
1630 
1631 /// Get a character aux value
1632 /** @param s Pointer to the tag data, as returned by bam_aux_get().
1633     @return The value, or 0 if the tag was not a character ('A') type
1634     If the tag is not a character type, errno is set to EINVAL.
1635 */
1636 char bam_aux2A(const(ubyte)* s);
1637 
1638 /// Get a string aux value
1639 /** @param s Pointer to the tag data, as returned by bam_aux_get().
1640     @return Pointer to the string, or NULL if the tag was not a string type
1641     If the tag is not a string type ('Z' or 'H'), errno is set to EINVAL.
1642 */
1643 char* bam_aux2Z(const(ubyte)* s);
1644 
1645 /// Get the length of an array-type ('B') tag
1646 /** @param s Pointer to the tag data, as returned by bam_aux_get().
1647     @return The length of the array, or 0 if the tag is not an array type.
1648     If the tag is not an array type, errno is set to EINVAL.
1649  */
1650 uint bam_auxB_len(const(ubyte)* s);
1651 
1652 /// Get an integer value from an array-type tag
1653 /** @param s   Pointer to the tag data, as returned by bam_aux_get().
1654     @param idx 0-based Index into the array
1655     @return The idx'th value, or 0 on error.
1656     If the array is not an integer type, errno is set to EINVAL.  If idx
1657     is greater than or equal to  the value returned by bam_auxB_len(s),
1658     errno is set to ERANGE.  In both cases, 0 will be returned.
1659  */
1660 long bam_auxB2i(const(ubyte)* s, uint idx);
1661 
1662 /// Get a floating-point value from an array-type tag
1663 /** @param s   Pointer to the tag data, as returned by bam_aux_get().
1664     @param idx 0-based Index into the array
1665     @return The idx'th value, or 0.0 on error.
1666     If the array is not a numeric type, errno is set to EINVAL.  This can
1667     only actually happen if the input record has an invalid type field.  If
1668     idx is greater than or equal to  the value returned by bam_auxB_len(s),
1669     errno is set to ERANGE.  In both cases, 0.0 will be returned.
1670  */
1671 double bam_auxB2f(const(ubyte)* s, uint idx);
1672 
1673 /// Append tag data to a bam record
1674 /* @param b    The bam record to append to.
1675    @param tag  Tag identifier
1676    @param type Tag data type
1677    @param len  Length of the data in bytes
1678    @param data The data to append
1679    @return 0 on success; -1 on failure.
1680 If there is not enough space to store the additional tag, errno is set to
1681 ENOMEM.  If the type is invalid, errno may be set to EINVAL.  errno is
1682 also set to EINVAL if the bam record's aux data is corrupt.
1683 */
1684 int bam_aux_append(
1685     bam1_t* b,
1686     ref const(char)[2] tag,
1687     char type,
1688     int len,
1689     const(ubyte)* data);
1690 
1691 /// Delete tag data from a bam record
1692 /* @param b The bam record to update
1693    @param s Pointer to the tag to delete, as returned by bam_aux_get().
1694    @return 0 on success; -1 on failure
1695    If the bam record's aux data is corrupt, errno is set to EINVAL and this
1696    function returns -1;
1697 */
1698 int bam_aux_del(bam1_t* b, ubyte* s);
1699 
1700 /// Update or add a string-type tag
1701 /* @param b    The bam record to update
1702    @param tag  Tag identifier
1703    @param len  The length of the new string
1704    @param data The new string
1705    @return 0 on success, -1 on failure
1706    This function will not change the ordering of tags in the bam record.
1707    New tags will be appended to any existing aux records.
1708 
1709    If @p len is less than zero, the length of the input string will be
1710    calculated using strlen().  Otherwise exactly @p len bytes will be
1711    copied from @p data to make the new tag.  If these bytes do not
1712    include a terminating NUL character, one will be added.  (Note that
1713    versions of HTSlib up to 1.10.2 had different behaviour here and
1714    simply copied @p len bytes from data.  To generate a valid tag it
1715    was necessary to ensure the last character was a NUL, and include
1716    it in @p len.)
1717 
1718    On failure, errno may be set to one of the following values:
1719 
1720    EINVAL: The bam record's aux data is corrupt or an existing tag with the
1721    given ID is not of type 'Z'.
1722 
1723    ENOMEM: The bam data needs to be expanded and either the attempt to
1724    reallocate the data buffer failed or the resulting buffer would be
1725    longer than the maximum size allowed in a bam record (2Gbytes).
1726 */
1727 int bam_aux_update_str(
1728     bam1_t* b,
1729     ref const(char)[2] tag,
1730     int len,
1731     const(char)* data);
1732 
1733 /// Update or add an integer tag
1734 /* @param b    The bam record to update
1735    @param tag  Tag identifier
1736    @param val  The new value
1737    @return 0 on success, -1 on failure
1738    This function will not change the ordering of tags in the bam record.
1739    New tags will be appended to any existing aux records.
1740 
1741    On failure, errno may be set to one of the following values:
1742 
1743    EINVAL: The bam record's aux data is corrupt or an existing tag with the
1744    given ID is not of an integer type (c, C, s, S, i or I).
1745 
1746    EOVERFLOW (or ERANGE on systems that do not have EOVERFLOW): val is
1747    outside the range that can be stored in an integer bam tag (-2147483647
1748    to 4294967295).
1749 
1750    ENOMEM: The bam data needs to be expanded and either the attempt to
1751    reallocate the data buffer failed or the resulting buffer would be
1752    longer than the maximum size allowed in a bam record (2Gbytes).
1753 */
1754 int bam_aux_update_int(bam1_t* b, ref const(char)[2] tag, long val);
1755 
1756 /// Update or add a floating-point tag
1757 /* @param b    The bam record to update
1758    @param tag  Tag identifier
1759    @param val  The new value
1760    @return 0 on success, -1 on failure
1761    This function will not change the ordering of tags in the bam record.
1762    New tags will be appended to any existing aux records.
1763 
1764    On failure, errno may be set to one of the following values:
1765 
1766    EINVAL: The bam record's aux data is corrupt or an existing tag with the
1767    given ID is not of a float type.
1768 
1769    ENOMEM: The bam data needs to be expanded and either the attempt to
1770    reallocate the data buffer failed or the resulting buffer would be
1771    longer than the maximum size allowed in a bam record (2Gbytes).
1772 */
1773 int bam_aux_update_float(bam1_t* b, ref const(char)[2] tag, float val);
1774 
1775 /// Update or add an array tag
1776 /* @param b     The bam record to update
1777    @param tag   Tag identifier
1778    @param type  Data type (one of c, C, s, S, i, I or f)
1779    @param items Number of items
1780    @param data  Pointer to data
1781    @return 0 on success, -1 on failure
1782    The type parameter indicates the how the data is interpreted:
1783 
1784    Letter code | Data type | Item Size (bytes)
1785    ----------- | --------- | -----------------
1786    c           | int8_t    | 1
1787    C           | uint8_t   | 1
1788    s           | int16_t   | 2
1789    S           | uint16_t  | 2
1790    i           | int32_t   | 4
1791    I           | uint32_t  | 4
1792    f           | float     | 4
1793 
1794    This function will not change the ordering of tags in the bam record.
1795    New tags will be appended to any existing aux records.  The bam record
1796    will grow or shrink in order to accommodate the new data.
1797 
1798    The data parameter must not point to any data in the bam record itself or
1799    undefined behaviour may result.
1800 
1801    On failure, errno may be set to one of the following values:
1802 
1803    EINVAL: The bam record's aux data is corrupt, an existing tag with the
1804    given ID is not of an array type or the type parameter is not one of
1805    the values listed above.
1806 
1807    ENOMEM: The bam data needs to be expanded and either the attempt to
1808    reallocate the data buffer failed or the resulting buffer would be
1809    longer than the maximum size allowed in a bam record (2Gbytes).
1810 */
1811 int bam_aux_update_array(
1812     bam1_t* b,
1813     ref const(char)[2] tag,
1814     ubyte type,
1815     uint items,
1816     void* data);
1817 
1818 /**************************
1819  *** Pileup and Mpileup ***
1820  **************************/
1821 
1822 /*! @typedef
1823  @abstract Generic pileup 'client data'.
1824 
1825  @discussion The pileup iterator allows setting a constructor and
1826  destructor function, which will be called every time a sequence is
1827  fetched and discarded.  This permits caching of per-sequence data in
1828  a tidy manner during the pileup process.  This union is the cached
1829  data to be manipulated by the "client" (the caller of pileup).
1830 */
1831 union bam_pileup_cd
1832 {
1833     void* p;
1834     long i;
1835     double f;
1836 }
1837 
1838 /*! @typedef
1839  @abstract Structure for one alignment covering the pileup position.
1840  @field  b          pointer to the alignment
1841  @field  qpos       position of the read base at the pileup site, 0-based
1842  @field  indel      indel length; 0 for no indel, positive for ins and negative for del
1843  @field  level      the level of the read in the "viewer" mode
1844  @field  is_del     1 iff the base on the padded read is a deletion
1845  @field  is_head    1 iff this is the first base in the query sequence
1846  @field  is_tail    1 iff this is the last base in the query sequence
1847  @field  is_refskip 1 iff the base on the padded read is part of CIGAR N op
1848  @field  aux        (used by bcf_call_gap_prep())
1849  @field  cigar_ind  index of the CIGAR operator that has just been processed
1850 
1851  @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The
1852  difference between the two functions is that the former does not
1853  set bam_pileup1_t::level, while the later does. Level helps the
1854  implementation of alignment viewers, but calculating this has some
1855  overhead.
1856  */
1857 struct bam_pileup1_t
1858 {
1859     import std.bitmanip : bitfields;
1860 
1861     bam1_t* b;
1862     int qpos;
1863     int indel;
1864     int level;
1865 
1866     mixin(bitfields!(
1867         uint, "is_del", 1,
1868         uint, "is_head", 1,
1869         uint, "is_tail", 1,
1870         uint, "is_refskip", 1,
1871         uint, "", 1,
1872         uint, "aux", 27));
1873 
1874     /* reserved */
1875     bam_pileup_cd cd; // generic per-struct data, owned by caller.
1876     int cigar_ind;
1877 }
1878 
1879 alias bam_plp_auto_f = int function(void* data, bam1_t* b);
1880 
1881 struct bam_plp_s;
1882 alias bam_plp_t = bam_plp_s*;
1883 
1884 struct bam_mplp_s;
1885 alias bam_mplp_t = bam_mplp_s*;
1886 
1887 /**
1888  *  bam_plp_init() - sets an iterator over multiple
1889  *  @func:      see mplp_func in bam_plcmd.c in samtools for an example. Expected return
1890  *              status: 0 on success, -1 on end, < -1 on non-recoverable errors
1891  *  @data:      user data to pass to @func
1892  *
1893  *  The struct returned by a successful call should be freed
1894  *  via bam_plp_destroy() when it is no longer needed.
1895  */
1896 bam_plp_t bam_plp_init(bam_plp_auto_f func, void* data);
1897 
1898 void bam_plp_destroy(bam_plp_t iter);
1899 
1900 int bam_plp_push(bam_plp_t iter, const(bam1_t)* b);
1901 
1902 const(bam_pileup1_t)* bam_plp_next(
1903     bam_plp_t iter,
1904     int* _tid,
1905     int* _pos,
1906     int* _n_plp);
1907 
1908 const(bam_pileup1_t)* bam_plp_auto(
1909     bam_plp_t iter,
1910     int* _tid,
1911     int* _pos,
1912     int* _n_plp);
1913 
1914 const(bam_pileup1_t)* bam_plp64_next(
1915     bam_plp_t iter,
1916     int* _tid,
1917     hts_pos_t* _pos,
1918     int* _n_plp);
1919 
1920 const(bam_pileup1_t)* bam_plp64_auto(
1921     bam_plp_t iter,
1922     int* _tid,
1923     hts_pos_t* _pos,
1924     int* _n_plp);
1925 
1926 void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
1927 
1928 void bam_plp_reset(bam_plp_t iter);
1929 
1930 /**
1931  *  bam_plp_constructor() - sets a callback to initialise any per-pileup1_t fields.
1932  *  @plp:       The bam_plp_t initialised using bam_plp_init.
1933  *  @func:      The callback function itself.  When called, it is given the
1934  *              data argument (specified in bam_plp_init), the bam structure and
1935  *              a pointer to a locally allocated bam_pileup_cd union.  This union
1936  *              will also be present in each bam_pileup1_t created.
1937  */
1938 void bam_plp_constructor(
1939     bam_plp_t plp,
1940     int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func);
1941 void bam_plp_destructor(
1942     bam_plp_t plp,
1943     int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func);
1944 
1945 /// Get pileup padded insertion sequence
1946 /**
1947  * @param p       pileup data
1948  * @param ins     the kstring where the insertion sequence will be written
1949  * @param del_len location for deletion length
1950  * @return the length of insertion string on success; -1 on failure.
1951  *
1952  * Fills out the kstring with the padded insertion sequence for the current
1953  * location in 'p'.  If this is not an insertion site, the string is blank.
1954  *
1955  * If del_len is not NULL, the location pointed to is set to the length of
1956  * any deletion immediately following the insertion, or zero if none.
1957  */
1958 int bam_plp_insertion(const(bam_pileup1_t)* p, kstring_t* ins, int* del_len);
1959 
1960 /// Create a new bam_mplp_t structure
1961 /** The struct returned by a successful call should be freed
1962  *  via bam_mplp_destroy() when it is no longer needed.
1963  */
1964 bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void** data);
1965 
1966 /// Set up mpileup overlap detection
1967 /**
1968  * @param iter    mpileup iterator
1969  * @return 0 on success; a negative value on error
1970  *
1971  *  If called, mpileup will detect overlapping
1972  *  read pairs and for each base pair set the base quality of the
1973  *  lower-quality base to zero, thus effectively discarding it from
1974  *  calling. If the two bases are identical, the quality of the other base
1975  *  is increased to the sum of their qualities (capped at 200), otherwise
1976  *  it is multiplied by 0.8.
1977  */
1978 int bam_mplp_init_overlaps(bam_mplp_t iter);
1979 
1980 void bam_mplp_destroy(bam_mplp_t iter);
1981 
1982 void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
1983 
1984 int bam_mplp_auto(
1985     bam_mplp_t iter,
1986     int* _tid,
1987     int* _pos,
1988     int* n_plp,
1989     const(bam_pileup1_t*)* plp);
1990 
1991 int bam_mplp64_auto(
1992     bam_mplp_t iter,
1993     int* _tid,
1994     hts_pos_t* _pos,
1995     int* n_plp,
1996     const(bam_pileup1_t*)* plp);
1997 
1998 void bam_mplp_reset(bam_mplp_t iter);
1999 
2000 void bam_mplp_constructor(
2001     bam_mplp_t iter,
2002     int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func);
2003 
2004 void bam_mplp_destructor(
2005     bam_mplp_t iter,
2006     int function(void* data, const(bam1_t)* b, bam_pileup_cd* cd) func);
2007 
2008 // ~!defined(BAM_NO_PILEUP)
2009 
2010 /***********************************
2011  * BAQ calculation and realignment *
2012  ***********************************/
2013 
2014 int sam_cap_mapq(bam1_t* b, const(char)* ref_, hts_pos_t ref_len, int thres);
2015 
2016 /// Used as flag parameter in sam_prob_realn.
2017 enum htsRealnFlags {
2018     BAQ_APPLY = 1,
2019     BAQ_EXTEND = 2,
2020     BAQ_REDO = 4,
2021 
2022     // Platform subfield, in bit position 3 onwards
2023     BAQ_AUTO = 0<<3,
2024     BAQ_ILLUMINA = 1<<3,
2025     BAQ_PACBIOCCS = 2<<3,
2026     BAQ_PACBIO = 3<<3,
2027     BAQ_ONT = 4<<3,
2028     BAQ_GENAPSYS = 5<<3
2029 }
2030 
2031 /// Calculate BAQ scores
2032 /** @param b   BAM record
2033     @param ref     Reference sequence
2034     @param ref_len Reference sequence length
2035     @param flag    Flags, see description
2036     @return 0 on success \n
2037            -1 if the read was unmapped, zero length, had no quality values, did not have at least one M, X or = CIGAR operator, or included a reference skip. \n
2038            -3 if BAQ alignment has already been done and does not need to be applied, or has already been applied. \n
2039            -4 if alignment failed (most likely due to running out of memory)
2040 
2041 This function calculates base alignment quality (BAQ) values using the method
2042 described in "Improving SNP discovery by base alignment quality", Heng Li,
2043 Bioinformatics, Volume 27, Issue 8 (https://doi.org/10.1093/bioinformatics/btr076).
2044 
2045 The @param flag value can be generated using the htsRealnFlags enum, but for
2046 backwards compatibilty reasons is retained as an "int".  An example usage
2047 of the enum could be this, equivalent to flag 19:
2048 
2049     sam_prob_realn(b, ref, len, BAQ_APPLY | BAQ_EXTEND | BAQ_PACBIOCCS);
2050 
2051 
2052 The following @param flag bits can be used:
2053 
2054 Bit 0 (BAQ_APPLY): Adjust the quality values using the BAQ values
2055 
2056  If set, the data in the BQ:Z tag is used to adjust the quality values, and
2057  the BQ:Z tag is renamed to ZQ:Z.
2058 
2059  If clear, and a ZQ:Z tag is present, the quality values are reverted using
2060  the data in the tag, and the tag is renamed to BQ:Z.
2061 
2062 Bit 1 (BAQ_EXTEND): Use "extended" BAQ.
2063 
2064  Changes the BAQ calculation to increase sensitivity at the expense of
2065  reduced specificity.
2066 
2067 Bit 2 (BAQ_REDO): Recalculate BAQ, even if a BQ tag is present.
2068 
2069  Force BAQ to be recalculated.  Note that a ZQ:Z tag will always disable
2070  recalculation.
2071 
2072 Bits 3-10: Choose parameters tailored to a specific instrument type.
2073 
2074  One of BAQ_AUTO, BAQ_ILLUMINA, BAQ_PACBIOCCS, BAQ_PACBIO, BAQ_ONT and
2075  BAQ_GENAPSYS.  The BAQ parameter tuning are still a work in progress and
2076  at the time of writing mainly consist of Illumina vs long-read technology
2077  adjustments.
2078 
2079 
2080 @bug
2081 If the input read has both BQ:Z and ZQ:Z tags, the ZQ:Z one will be removed.
2082 Depending on what previous processing happened, this may or may not be the
2083 correct thing to do.  It would be wise to avoid this situation if possible.
2084 */
2085 
2086 int sam_prob_realn(bam1_t* b, const(char)* ref_, hts_pos_t ref_len, int flag);
2087