1 // htslib-1.9 sam.h as D module
2 // Changes include:
3 // Removed if(n)defs
4 // 	including BAM_NO_ID which is probably archaic
5 // Changed #defines to const/immutable
6 // Removed all HTS_RESULT_USED (__attribute__ ((__warn_unused_result__)))
7 // Do not #include "hts_defs.h"
8 // Change numeric #defines to enum int
9 // Changed string #define to 8-bit (char) string literal "xxx"c
10 // typedef struct to alias
11 // removed typedefs
12 // modified bitfields in struct and aligned(1)
13 // removed redundant struct declarations when declaring struct pointers
14 // ref is a reserved keyword in D; changed 'ref' to '_ref'
15 // Function prototypes taking fixed size array (e.g. ..., const char tag[2], ) should include ref in the D prototype 
16 // const char *p -> const(char) *p
17 // Replace localal definition with import kstring
18 /*  
19 Aliased function pointer typedefs:
20 
21     typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
22     alias bam_plp_auto_f = int *function(void *data, bam1_t *b);
23 
24 Updated parameter function pointers:
25     int (*func)(void *data, const bam1_t *b, bam_pileup_cd *cd));
26     int function(void *data, const bam1_t *b, bam_pileup_cd *cd) func);
27 
28 Functions returning const must be rewritten as const(type)*func_name
29 */
30 module dhtslib.htslib.sam;
31 
32 import std.bitmanip;
33 
34 extern (C):
35 /// @file htslib/sam.h
36 /// High-level SAM/BAM/CRAM sequence file operations.
37 /*
38     Copyright (C) 2008, 2009, 2013-2017 Genome Research Ltd.
39     Copyright (C) 2010, 2012, 2013 Broad Institute.
40 
41     Author: Heng Li <lh3@sanger.ac.uk>
42 
43 Permission is hereby granted, free of charge, to any person obtaining a copy
44 of this software and associated documentation files (the "Software"), to deal
45 in the Software without restriction, including without limitation the rights
46 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
47 copies of the Software, and to permit persons to whom the Software is
48 furnished to do so, subject to the following conditions:
49 
50 The above copyright notice and this permission notice shall be included in
51 all copies or substantial portions of the Software.
52 
53 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
54 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
55 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
56 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
57 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
58 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
59 DEALINGS IN THE SOFTWARE.  */
60 
61 import core.stdc.stdint;
62 import dhtslib.htslib.hts;
63 import dhtslib.htslib.bgzf:BGZF;
64 import dhtslib.htslib.kstring: __kstring_t, kstring_t;
65 
66 /// Highest SAM format version supported by this library
67 auto SAM_FORMAT_VERSION = "1.6"c;
68 
69 /**********************
70  *** SAM/BAM header ***
71  **********************/
72 
73 /**! @typedef
74  @abstract Structure for the alignment header.
75  @field n_targets   number of reference sequences
76  @field l_text      length of the plain text in the header
77  @field target_len  lengths of the reference sequences
78  @field target_name names of the reference sequences
79  @field text        plain text
80  @field sdict       header dictionary
81  */
82 
83 struct bam_hdr_t { // @suppress(dscanner.style.phobos_naming_convention)
84     int  n_targets;         /// number of targets (contigs)
85     int ignore_sam_err; /// ? Ignore SAM format errors?
86     uint l_text;        /// ? text data length
87     uint *target_len;   /// ? number of targets (contigs)
88     byte *cigar_tab;    /// ? CIGAR table
89     char **target_name; /// ? C-style array of target (contig) names
90     char *text;         /// ?
91     void *sdict;        /// ?
92 }
93 
94 /****************************
95  *** CIGAR related macros ***
96  ****************************/
97 
98 enum {
99     BAM_CMATCH      = 0,    /// CIGAR M, match
100     BAM_CINS        = 1,    /// CIGAR I, insertion
101     BAM_CDEL        = 2,    /// CIGAR D, deletion
102     BAM_CREF_SKIP   = 3,    /// CIGAR N, refskip
103     BAM_CSOFT_CLIP  = 4,    /// CIGAR S, soft clip
104     BAM_CHARD_CLIP  = 5,    /// CIGAR H, hard clip
105     BAM_CPAD        = 6,    /// CIGAR P, padding
106     BAM_CEQUAL      = 7,    /// CIGAR =, equal
107     BAM_CDIFF       = 8,    /// CIGAR X, differs
108     BAM_CBACK       = 9     /// CIGAR B, back(?)
109 }
110 
111 auto BAM_CIGAR_STR       = "MIDNSHP=XB"c;   /// internal
112 enum int BAM_CIGAR_SHIFT = 4;       /// internal
113 enum int BAM_CIGAR_MASK  = 0xf;     /// internal
114 enum int BAM_CIGAR_TYPE  = 0x3C1A7; /// internal magic const for bam_cigar_type()
115 
116 pragma(inline, true)
117 {
118 /// CIGAR opcode for CIGAR uint (4 LSB)
119 auto bam_cigar_op(uint c)    { return ( c & BAM_CIGAR_MASK); }
120 /// CIGAR operation length for CIGAR uint (28 MSB >> 4)
121 auto bam_cigar_oplen(uint c) { return ( c >> BAM_CIGAR_SHIFT ); }
122 // Note that BAM_CIGAR_STR is padded to length 16 bytes below so that
123 // the array look-up will not fall off the end.  '?' is chosen as the
124 // padding character so it's easy to spot if one is emitted, and will
125 // result in a parsing failure (in sam_parse1(), at least) if read.
126 /// CIGAR opcode character for CIGAR uint
127 auto bam_cigar_opchr(uint c)    { return (BAM_CIGAR_STR ~ "??????"[bam_cigar_op(c)]); }
128 /// Generate CIGAR uint from length and operator
129 auto bam_cigar_gen(uint l, uint o) { return (l << BAM_CIGAR_SHIFT | o); }
130 
131 /** bam_cigar_type returns a bit flag with:
132  *   bit 1 set if the cigar operation consumes the query
133  *   bit 2 set if the cigar operation consumes the reference
134  *
135  * For reference, the unobfuscated truth table for this function is:
136  * BAM_CIGAR_TYPE  QUERY  REFERENCE
137  * --------------------------------
138  * BAM_CMATCH      1      1
139  * BAM_CINS        1      0
140  * BAM_CDEL        0      1
141  * BAM_CREF_SKIP   0      1
142  * BAM_CSOFT_CLIP  1      0
143  * BAM_CHARD_CLIP  0      0
144  * BAM_CPAD        0      0
145  * BAM_CEQUAL      1      1
146  * BAM_CDIFF       1      1
147  * BAM_CBACK       0      0
148  * --------------------------------
149  */
150 auto bam_cigar_type(uint o) { return (BAM_CIGAR_TYPE >> (o << 1) & 3); }    // bit 1: consume query; bit 2: consume reference
151 } // end pragma(inline)
152 
153 /**! @abstract the read is paired in sequencing, no matter whether it is mapped in a pair */
154 enum int BAM_FPAIRED       = 1;
155 /**! @abstract the read is mapped in a proper pair */
156 enum int BAM_FPROPER_PAIR  = 2;
157 /**! @abstract the read itself is unmapped; conflictive with BAM_FPROPER_PAIR */
158 enum int BAM_FUNMAP        = 4;
159 /**! @abstract the mate is unmapped */
160 enum int BAM_FMUNMAP       = 8;
161 /**! @abstract the read is mapped to the reverse strand */
162 enum int BAM_FREVERSE      =16;
163 /**! @abstract the mate is mapped to the reverse strand */
164 enum int BAM_FMREVERSE     =32;
165 /**! @abstract this is read1 */
166 enum int BAM_FREAD1        =64;
167 /**! @abstract this is read2 */
168 enum int BAM_FREAD2        =128;
169 /**! @abstract not primary alignment */
170 enum int BAM_FSECONDARY    =256;
171 /**! @abstract QC failure */
172 enum int BAM_FQCFAIL       =512;
173 /**! @abstract optical or PCR duplicate */
174 enum int BAM_FDUP          =1024;
175 /**! @abstract supplementary alignment */
176 enum int BAM_FSUPPLEMENTARY=2048;
177 
178 /*************************
179  *** Alignment records ***
180  *************************/
181 
182 /**! @typedef
183  @abstract Structure for core alignment information.
184  @field  tid     chromosome ID, defined by bam_hdr_t
185  @field  pos     0-based leftmost coordinate
186  @field  bin     bin calculated by bam_reg2bin()
187  @field  qual    mapping quality
188  @field  l_qname length of the query name
189  @field  flag    bitwise flag
190  @field  l_extranul length of extra NULs between qname & cigar (for alignment)
191  @field  n_cigar number of CIGAR operations
192  @field  l_qseq  length of the query sequence (read)
193  @field  mtid    chromosome ID of next read in template, defined by bam_hdr_t
194  @field  mpos    0-based leftmost coordinate of next read in template
195  */
196 struct bam1_core_t { // @suppress(dscanner.style.phobos_naming_convention)
197     int32_t  tid;       /// chromosome ID, defined by bam_hdr_t
198     int32_t  pos;       /// 0-based leftmost coordinate
199     uint16_t bin;       /// bin calculated by bam_reg2bin()
200     uint8_t  qual;      /// mapping quality
201     uint8_t  l_qname;   /// length of the query name
202     uint16_t flag;      /// bitwise flag
203     uint8_t  unused1;   /// padding
204     uint8_t  l_extranul;/// length of extra NULs between qname & cigar (for alignment)
205     uint32_t n_cigar;   /// number of CIGAR operations
206     int32_t  l_qseq;    /// length of the query sequence (read)
207     int32_t  mtid;      /// chromosome ID of next read in template, defined by bam_hdr_t
208     int32_t  mpos;      /// 0-based leftmost coordinate of next read in template (mate)
209     int32_t  isize;     /// ? template length
210 }
211 
212 /**! @typedef
213  @abstract Structure for one alignment.
214  @field  core       core information about the alignment
215  @field  l_data     current length of bam1_t::data
216  @field  m_data     maximum length of bam1_t::data
217  @field  data       all variable-length data, concatenated; structure: qname-cigar-seq-qual-aux
218 
219  @discussion Notes:
220 
221  1. qname is terminated by one to four NULs, so that the following
222  cigar data is 32-bit aligned; core.l_qname includes these trailing NULs,
223  while core.l_extranul counts the excess NULs (so 0 <= l_extranul <= 3).
224  2. l_qseq is calculated from the total length of an alignment block
225  on reading or from CIGAR.
226  3. cigar data is encoded 4 bytes per CIGAR operation.
227  4. seq is nybble-encoded according to bam_nt16_table.
228  */
229 struct bam1_t { // @suppress(dscanner.style.phobos_naming_convention)
230     bam1_core_t core;   /// core information about the alignment
231     int         l_data; /// current length of bam1_t::data
232     uint32_t    m_data; /// maximum length of bam1_t::data
233     uint8_t     *data;  /// all variable-length data, concatenated; structure: qname-cigar-seq-qual-aux
234     uint64_t    id;     /// ???
235 }
236 
237 pragma(inline, true) {
238 /**! @function
239  @abstract  Get whether the query is on the reverse strand
240  @param  b  pointer to an alignment
241  @return    boolean true if query is on the reverse strand
242  */
243 bool bam_is_rev(bam1_t *b) { return ( ((*b).core.flag & BAM_FREVERSE) != 0 ); }
244 /**! @function
245  @abstract  Get whether the query's mate is on the reverse strand
246  @param  b  pointer to an alignment
247  @return    boolean true if query's mate on the reverse strand
248  */
249 bool bam_is_mrev(bam1_t *b) { return( ((*b).core.flag & BAM_FMREVERSE) != 0); }
250 /**! @function
251  @abstract  Get the name of the query
252  @param  b  pointer to an alignment
253  @return    pointer to the name string, null terminated
254  */
255 auto bam_get_qname(bam1_t *b) { return (cast(char*)(*b).data); }
256 /**! @function
257  @abstract  Get the CIGAR array
258  @param  b  pointer to an alignment
259  @return    pointer to the CIGAR array
260 
261  @discussion In the CIGAR array, each element is a 32-bit integer. The
262  lower 4 bits gives a CIGAR operation and the higher 28 bits keep the
263  length of a CIGAR.
264  */
265 auto bam_get_cigar(bam1_t *b) { return cast(uint *)((*b).data + (*b).core.l_qname); }
266 /**! @function
267  @abstract  Get query sequence
268  @param  b  pointer to an alignment
269  @return    pointer to sequence
270 
271  @discussion Each base is encoded in 4 bits: 1 for A, 2 for C, 4 for G,
272  8 for T and 15 for N. Two bases are packed in one byte with the base
273  at the higher 4 bits having smaller coordinate on the read. It is
274  recommended to use bam_seqi() macro to get the base.
275  */
276 auto bam_get_seq(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + (*b).core.l_qname); }
277 /**! @function
278  @abstract  Get query quality
279  @param  b  pointer to an alignment
280  @return    pointer to quality string
281  */
282 ////#define bam_get_qual(b)  ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1))
283 auto bam_get_qual(bam1_t *b) { return (*b).data + ((*b).core.n_cigar<<2) + 
284                                     (*b).core.l_qname + 
285                                         (((*b).core.l_qseq + 1)>>1); }
286 
287 /**! @function
288  @abstract  Get auxiliary data
289  @param  b  pointer to an alignment
290  @return    pointer to the concatenated auxiliary data
291  */
292 ////#define bam_get_aux(b)   ((b)->data + ((b)->core.n_cigar<<2) + (b)->core.l_qname + (((b)->core.l_qseq + 1)>>1) + (b)->core.l_qseq)
293 auto bam_get_aux(bam1_t *b) { return ((*b).data + ((*b).core.n_cigar<<2) + 
294                                     (*b).core.l_qname + 
295                                         (((*b).core.l_qseq + 1)>>1) + 
296                                             (*b).core.l_qseq); }
297 /**! @function
298  @abstract  Get length of auxiliary data
299  @param  b  pointer to an alignment
300  @return    length of the concatenated auxiliary data
301  */
302 ////#define bam_get_l_aux(b) ((b)->l_data - ((b)->core.n_cigar<<2) - (b)->core.l_qname - (b)->core.l_qseq - (((b)->core.l_qseq + 1)>>1))
303 auto bam_get_l_aux(bam1_t *b) { return ((*b).l_data - 
304                                 ((*b).core.n_cigar<<2) - 
305                                     (*b).core.l_qname - 
306                                         (*b).core.l_qseq - 
307                                             (((*b).core.l_qseq + 1)>>1)); }
308 /**! @function
309  @abstract  Get a base on read
310  @param  s  Query sequence returned by bam_get_seq()
311  @param  i  The i-th position, 0-based
312  @return    4-bit integer representing the base.
313  */
314 ////#define bam_seqi(s, i) ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf)
315 auto bam_seqi(ubyte *s, uint i) { return ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf);   }
316 //auto bam_seqi(char *s, uint i) { return ((s)[(i)>>1] >> ((~(i)&1)<<2) & 0xf); }
317 } // end pragma(inline)
318 
319 /**************************
320  *** Exported functions ***
321  **************************/
322 
323     /***************
324      *** BAM I/O ***
325      ***************/
326 
327     /// init a BAM header
328     bam_hdr_t *bam_hdr_init();
329     /// read BAM header from fp
330     bam_hdr_t *bam_hdr_read(BGZF *fp);
331     /// write BAM header to fp
332     int bam_hdr_write(BGZF *fp, const(bam_hdr_t) *h);
333     /// destroy a BAM header
334     void bam_hdr_destroy(bam_hdr_t *h);
335     /// Get contig tid 
336     int bam_name2id(bam_hdr_t *h, const(char) *_ref);
337     /// duplicate a BAM header
338     bam_hdr_t* bam_hdr_dup(const(bam_hdr_t) *h0);
339 
340     /// init a BAM record
341     bam1_t *bam_init1();
342     /// destroy a BAM record
343     void bam_destroy1(bam1_t *b);
344     /// read BAM record from fp
345     int bam_read1(BGZF *fp, bam1_t *b);
346     /// write BAM record to fp
347     int bam_write1(BGZF *fp, const(bam1_t) *b);
348     /// copy a BAM record from dst to src
349     bam1_t *bam_copy1(bam1_t *bdst, const(bam1_t) *bsrc);
350     /// duplicate a BAM record
351     bam1_t *bam_dup1(const(bam1_t) *bsrc);
352 
353     /// How much query is consumed by CIGAR op(s)?
354     int bam_cigar2qlen(int n_cigar, const(uint32_t) *cigar);
355     /// How much reference is consumed by CIGAR op(s)?
356     int bam_cigar2rlen(int n_cigar, const(uint32_t) *cigar);
357 
358     /**!
359       @abstract Calculate the rightmost base position of an alignment on the
360       reference genome.
361 
362       @param  b  pointer to an alignment
363       @return    the coordinate of the first base after the alignment, 0-based
364 
365       @discussion For a mapped read, this is just b->core.pos + bam_cigar2rlen.
366       For an unmapped read (either according to its flags or if it has no cigar
367       string), we return b->core.pos + 1 by convention.
368     */
369     int32_t bam_endpos(const(bam1_t) *b);
370 
371     /// returns negative value on error
372     int   bam_str2flag(const(char) *str);    /** returns negative value on error */
373     /// The string must be freed by the user
374     char *bam_flag2str(int flag);   /** The string must be freed by the user */
375 
376     /*************************
377      *** BAM/CRAM indexing ***
378      *************************/
379 
380     // These BAM iterator functions work only on BAM files.  To work with either
381     // BAM or CRAM files use the sam_index_load() & sam_itr_*() functions.
382     deprecated("These BAM iterator functions work only on BAM files. "
383         ~ "To work with either BAM or CRAM files use the sam_index_load() & sam_itr_*() functions.")
384     {
385     ////#define bam_itr_destroy(iter) hts_itr_destroy(iter)
386     alias bam_itr_destroy = hts_itr_destroy;
387     ////#define bam_itr_queryi(idx, tid, beg, end) sam_itr_queryi(idx, tid, beg, end)
388     alias bam_itr_queryi = sam_itr_queryi;
389     ////#define bam_itr_querys(idx, hdr, region) sam_itr_querys(idx, hdr, region)
390     alias bam_itr_querys = sam_itr_querys;
391     ////#define bam_itr_next(htsfp, itr, r) hts_itr_next((htsfp)->fp.bgzf, (itr), (r), 0)
392     pragma(inline, true)
393     auto bam_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) { return hts_itr_next(htsfp.fp.bgzf, itr, r, null); }
394     }
395 
396 // Load/build .csi or .bai BAM index file.  Does not work with CRAM.
397 // It is recommended to use the sam_index_* functions below instead.
398 deprecated("Load/build .csi or .bai BAM index file.  Does not work with CRAM. "
399     ~ "It is recommended to use the sam_index_* functions below instead.")
400 {
401 ////#define bam_index_load(fn) hts_idx_load((fn), HTS_FMT_BAI)
402 pragma(inline, true) auto bam_index_load(const(char) *fn) { return hts_idx_load(fn, HTS_FMT_BAI); }
403 ////#define bam_index_build(fn, min_shift) (sam_index_build((fn), (min_shift)))
404 alias bam_index_build = sam_index_build;
405 }
406 
407 /// Load a BAM (.csi or .bai) or CRAM (.crai) index file
408 /** @param fp  File handle of the data file whose index is being opened
409     @param fn  BAM/CRAM/etc filename to search alongside for the index file
410     @return  The index, or NULL if an error occurred.
411 */
412 hts_idx_t *sam_index_load(htsFile *fp, const(char) *fn);
413 
414 /// Load a specific BAM (.csi or .bai) or CRAM (.crai) index file
415 /** @param fp     File handle of the data file whose index is being opened
416     @param fn     BAM/CRAM/etc data file filename
417     @param fnidx  Index filename, or NULL to search alongside @a fn
418     @return  The index, or NULL if an error occurred.
419 */
420 hts_idx_t *sam_index_load2(htsFile *fp, const(char) *fn, const(char) *fnidx);
421 
422 /// Generate and save an index file
423 /** @param fn        Input BAM/etc filename, to which .csi/etc will be added
424     @param min_shift Positive to generate CSI, or 0 to generate BAI
425     @return  0 if successful, or negative if an error occurred (usually -1; or
426              -2: opening fn failed; -3: format not indexable; -4:
427              failed to create and/or save the index)
428 */
429 int sam_index_build(const(char) *fn, int min_shift);
430 
431 /// Generate and save an index to a specific file
432 /** @param fn        Input BAM/CRAM/etc filename
433     @param fnidx     Output filename, or NULL to add .bai/.csi/etc to @a fn
434     @param min_shift Positive to generate CSI, or 0 to generate BAI
435     @return  0 if successful, or negative if an error occurred (see
436              sam_index_build for error codes)
437 */
438 int sam_index_build2(const(char) *fn, const(char) *fnidx, int min_shift);
439 /// ditto
440 int sam_index_build3(const(char) *fn, const(char) *fnidx, int min_shift, int nthreads);
441 
442     ////#define sam_itr_destroy(iter) hts_itr_destroy(iter)
443     alias sam_itr_destroy = hts_itr_destroy;
444     /// SAM/BAM/CRAM iterator query by integer tid/start/end
445     hts_itr_t *sam_itr_queryi(const(hts_idx_t) *idx, int tid, int beg, int end);
446     /// SAM/BAM/CRAM iterator query by string ("chr:start-end")
447     hts_itr_t *sam_itr_querys(const(hts_idx_t) *idx, bam_hdr_t *hdr, const(char) *region);
448     /// SAM/BAM/CRAM iterator query by region list
449     hts_itr_multi_t *sam_itr_regions(const(hts_idx_t) *idx, bam_hdr_t *hdr, hts_reglist_t *reglist, uint regcount);
450 
451     ////#define sam_itr_next(htsfp, itr, r) hts_itr_next((htsfp)->fp.bgzf, (itr), (r), (htsfp))
452     pragma(inline, true)
453     auto sam_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) { return hts_itr_next(htsfp.fp.bgzf, itr, r, htsfp); }
454     ////#define sam_itr_multi_next(htsfp, itr, r) hts_itr_multi_next((htsfp), (itr), (r))
455     //auto sam_itr_multi_next(htsFile *htsfp, hts_itr_t *itr, void *r) { return hts_itr_multi_next(htsfp, itr, r); }
456     alias sam_itr_multi_next = hts_itr_multi_next;
457 
458     /***************
459      *** SAM I/O ***
460      ***************/
461 
462     //#define sam_open(fn, mode) (hts_open((fn), (mode)))
463     alias sam_open = hts_open;
464     //#define sam_open_format(fn, mode, fmt) (hts_open_format((fn), (mode), (fmt)))
465     alias sam_open_format = hts_open_format;
466     //#define sam_close(fp) hts_close(fp)
467     alias sam_close = hts_close;
468 
469     /// open SAM/BAM/CRAM
470     int sam_open_mode(char *mode, const(char) *fn, const(char) *format);
471 
472     /// A version of sam_open_mode that can handle ,key=value options.
473     /// The format string is allocated and returned, to be freed by the caller.
474     /// Prefix should be "r" or "w",
475     char *sam_open_mode_opts(const(char) *fn,
476                              const(char) *mode,
477                              const(char) *format);
478 
479     alias samFile = htsFile;
480     /// parse SAM/BAM/CRAM header from text
481     bam_hdr_t *sam_hdr_parse(int l_text, const(char) *text);
482     /// read SAM/BAM/CRAM header frpm fp
483     bam_hdr_t *sam_hdr_read(samFile *fp);
484     /// write SAM/BAM/CRAM header to fp
485     int sam_hdr_write(samFile *fp, const(bam_hdr_t) *h);
486     /// edit SAM/BAM/CRAM header @HD line by key/value (see SAM specs section 1.3)
487     int sam_hdr_change_HD(bam_hdr_t *h, const(char) *key, const(char) *val);
488 
489     /// parse text SAM line
490     int sam_parse1(kstring_t *s, bam_hdr_t *h, bam1_t *b);
491     /// emit text SAM line
492     int sam_format1(const(bam_hdr_t) *h, const(bam1_t) *b, kstring_t *str);
493 
494     /**!
495      *  @return >= 0 on successfully reading a new record, -1 on end of stream, < -1 on error
496      **/
497     int sam_read1(samFile *fp, bam_hdr_t *h, bam1_t *b);
498     /// Return >= 0 on successfully writing a new record; <= -1 on error
499     int sam_write1(samFile *fp, const(bam_hdr_t) *h, const(bam1_t) *b);
500 
501     /*************************************
502      *** Manipulating auxiliary fields ***
503      *************************************/
504 
505 /// Return a pointer to an aux record
506 /** @param b   Pointer to the bam record
507     @param tag Desired aux tag
508     @return Pointer to the tag data, or NULL if tag is not present or on error
509     If the tag is not present, this function returns NULL and sets errno to
510     ENOENT.  If the bam record's aux data is corrupt (either a tag has an
511     invalid type, or the last record is incomplete) then errno is set to
512     EINVAL and NULL is returned.
513  */
514 uint8_t *bam_aux_get(const(bam1_t) *b, const ref char[2] tag);
515 
516 /// Get an integer aux value
517 /** @param s Pointer to the tag data, as returned by bam_aux_get()
518     @return The value, or 0 if the tag was not an integer type
519     If the tag is not an integer type, errno is set to EINVAL.  This function
520     will not return the value of floating-point tags.
521 */
522 int64_t bam_aux2i(const(uint8_t) *s);
523 
524 /// Get an integer aux value
525 /** @param s Pointer to the tag data, as returned by bam_aux_get()
526     @return The value, or 0 if the tag was not an integer type
527     If the tag is not an numeric type, errno is set to EINVAL.  The value of
528     integer flags will be returned cast to a double.
529 */
530 double bam_aux2f(const(uint8_t) *s);
531 
532 /// Get a character aux value
533 /** @param s Pointer to the tag data, as returned by bam_aux_get().
534     @return The value, or 0 if the tag was not a character ('A') type
535     If the tag is not a character type, errno is set to EINVAL.
536 */
537 char bam_aux2A(const(uint8_t) *s);
538 
539 /// Get a string aux value
540 /** @param s Pointer to the tag data, as returned by bam_aux_get().
541     @return Pointer to the string, or NULL if the tag was not a string type
542     If the tag is not a string type ('Z' or 'H'), errno is set to EINVAL.
543 */
544 char *bam_aux2Z(const(uint8_t) *s);
545 
546 /// Get the length of an array-type ('B') tag
547 /** @param s Pointer to the tag data, as returned by bam_aux_get().
548     @return The length of the array, or 0 if the tag is not an array type.
549     If the tag is not an array type, errno is set to EINVAL.
550  */
551 uint32_t bam_auxB_len(const(uint8_t) *s);
552 
553 /// Get an integer value from an array-type tag
554 /** @param s   Pointer to the tag data, as returned by bam_aux_get().
555     @param idx 0-based Index into the array
556     @return The idx'th value, or 0 on error.
557     If the array is not an integer type, errno is set to EINVAL.  If idx
558     is greater than or equal to  the value returned by bam_auxB_len(s),
559     errno is set to ERANGE.  In both cases, 0 will be returned.
560  */
561 int64_t bam_auxB2i(const(uint8_t) *s, uint32_t idx);
562 
563 /// Get a floating-point value from an array-type tag
564 /** @param s   Pointer to the tag data, as returned by bam_aux_get().
565     @param idx 0-based Index into the array
566     @return The idx'th value, or 0.0 on error.
567     If the array is not a numeric type, errno is set to EINVAL.  This can
568     only actually happen if the input record has an invalid type field.  If
569     idx is greater than or equal to  the value returned by bam_auxB_len(s),
570     errno is set to ERANGE.  In both cases, 0.0 will be returned.
571  */
572 double bam_auxB2f(const(uint8_t) *s, uint32_t idx);
573 
574 /// Append tag data to a bam record
575 /* @param b    The bam record to append to.
576    @param tag  Tag identifier
577    @param type Tag data type
578    @param len  Length of the data in bytes
579    @param data The data to append
580    @return 0 on success; -1 on failure.
581 If there is not enough space to store the additional tag, errno is set to
582 ENOMEM.  If the type is invalid, errno may be set to EINVAL.  errno is
583 also set to EINVAL if the bam record's aux data is corrupt.
584 */
585 int bam_aux_append(bam1_t *b, const ref char[2] tag, char type, int len, const(uint8_t) *data);
586 
587 /// Delete tag data from a bam record
588 /* @param b The bam record to update
589    @param s Pointer to the tag to delete, as returned by bam_aux_get().
590    @return 0 on success; -1 on failure
591    If the bam record's aux data is corrupt, errno is set to EINVAL and this
592    function returns -1;
593 */
594 int bam_aux_del(bam1_t *b, uint8_t *s);
595 
596 /// Update or add a string-type tag
597 /* @param b    The bam record to update
598    @param tag  Tag identifier
599    @param len  The length of the new string
600    @param data The new string
601    @return 0 on success, -1 on failure
602    This function will not change the ordering of tags in the bam record.
603    New tags will be appended to any existing aux records.
604 
605    On failure, errno may be set to one of the following values:
606 
607    EINVAL: The bam record's aux data is corrupt or an existing tag with the
608    given ID is not of type 'Z'.
609 
610    ENOMEM: The bam data needs to be expanded and either the attempt to
611    reallocate the data buffer failed or the resulting buffer would be
612    longer than the maximum size allowed in a bam record (2Gbytes).
613 */
614 int bam_aux_update_str(bam1_t *b, const ref char[2] tag, int len, const(char) *data);
615 
616 /// Update or add an integer tag
617 /* @param b    The bam record to update
618    @param tag  Tag identifier
619    @param val  The new value
620    @return 0 on success, -1 on failure
621    This function will not change the ordering of tags in the bam record.
622    New tags will be appended to any existing aux records.
623 
624    On failure, errno may be set to one of the following values:
625 
626    EINVAL: The bam record's aux data is corrupt or an existing tag with the
627    given ID is not of an integer type (c, C, s, S, i or I).
628 
629    EOVERFLOW (or ERANGE on systems that do not have EOVERFLOW): val is
630    outside the range that can be stored in an integer bam tag (-2147483647
631    to 4294967295).
632 
633    ENOMEM: The bam data needs to be expanded and either the attempt to
634    reallocate the data buffer failed or the resulting buffer would be
635    longer than the maximum size allowed in a bam record (2Gbytes).
636 */
637 int bam_aux_update_int(bam1_t *b, const ref char[2] tag, int64_t val);
638 
639 /// Update or add a floating-point tag
640 /* @param b    The bam record to update
641    @param tag  Tag identifier
642    @param val  The new value
643    @return 0 on success, -1 on failure
644    This function will not change the ordering of tags in the bam record.
645    New tags will be appended to any existing aux records.
646 
647    On failure, errno may be set to one of the following values:
648 
649    EINVAL: The bam record's aux data is corrupt or an existing tag with the
650    given ID is not of a float type.
651 
652    ENOMEM: The bam data needs to be expanded and either the attempt to
653    reallocate the data buffer failed or the resulting buffer would be
654    longer than the maximum size allowed in a bam record (2Gbytes).
655 */
656 int bam_aux_update_float(bam1_t *b, const ref char[2] tag, float val);
657 
658 /// Update or add an array tag
659 /* @param b     The bam record to update
660    @param tag   Tag identifier
661    @param type  Data type (one of c, C, s, S, i, I or f)
662    @param items Number of items
663    @param data  Pointer to data
664    @return 0 on success, -1 on failure
665    The type parameter indicates the how the data is interpreted:
666 
667    Letter code | Data type | Item Size (bytes)
668    ----------- | --------- | -----------------
669    c           | int8_t    | 1
670    C           | uint8_t   | 1
671    s           | int16_t   | 2
672    S           | uint16_t  | 2
673    i           | int32_t   | 4
674    I           | uint32_t  | 4
675    f           | float     | 4
676 
677    This function will not change the ordering of tags in the bam record.
678    New tags will be appended to any existing aux records.  The bam record
679    will grow or shrink in order to accomodate the new data.
680 
681    The data parameter must not point to any data in the bam record itself or
682    undefined behaviour may result.
683 
684    On failure, errno may be set to one of the following values:
685 
686    EINVAL: The bam record's aux data is corrupt, an existing tag with the
687    given ID is not of an array type or the type parameter is not one of
688    the values listed above.
689 
690    ENOMEM: The bam data needs to be expanded and either the attempt to
691    reallocate the data buffer failed or the resulting buffer would be
692    longer than the maximum size allowed in a bam record (2Gbytes).
693 */
694 int bam_aux_update_array(bam1_t *b, const ref char[2] tag,
695                          uint8_t type, uint32_t items, void *data);
696 
697 /**************************
698  *** Pileup and Mpileup ***
699  **************************/
700 
701 /*! @typedef
702  @abstract Generic pileup 'client data'.
703 
704  @discussion The pileup iterator allows setting a constructor and
705  destructor function, which will be called every time a sequence is
706  fetched and discarded.  This permits caching of per-sequence data in
707  a tidy manner during the pileup process.  This union is the cached
708  data to be manipulated by the "client" (the caller of pileup).
709 */
710 union bam_pileup_cd {
711     void    *p; /// data ptr
712     int64_t i;  /// ?
713     double  f;  /// ?
714 }
715 
716 /**! @typedef
717  @abstract Structure for one alignment covering the pileup position.
718  @field  b          pointer to the alignment
719  @field  qpos       position of the read base at the pileup site, 0-based
720  @field  indel      indel length; 0 for no indel, positive for ins and negative for del
721  @field  level      the level of the read in the "viewer" mode
722  @field  is_del     1 iff the base on the padded read is a deletion
723  @field  is_head    ???
724  @field  is_tail    ???
725  @field  is_refskip ???
726  @field  aux        ???
727 
728  @discussion See also bam_plbuf_push() and bam_lplbuf_push(). The
729  difference between the two functions is that the former does not
730  set bam_pileup1_t::level, while the later does. Level helps the
731  implementation of alignment viewers, but calculating this has some
732  overhead.
733  */
734 struct bam_pileup1_t {
735     bam1_t  *b;     /// bam record
736     int32_t qpos;   /// query position
737     /// ???
738     int indel, level;
739     pragma(msg, "bam_pileup1_t: bitfield order assumed starting with LSB");
740     //uint32_t is_del:1, is_head:1, is_tail:1, is_refskip:1, aux:28;
741     mixin(bitfields!(
742         bool, "is_del",  1,
743         bool, "is_head", 1,
744         bool, "is_tail", 1,
745         bool, "is_refskip", 1,
746         uint, "aux", 28 ));
747     bam_pileup_cd cd; /// generic per-struct data, owned by caller.
748 }
749 
750 ///typedef int (*bam_plp_auto_f)(void *data, bam1_t *b);
751 alias bam_plp_auto_f = int *function(void *data, bam1_t *b);
752 
753 /// opaque pileup data defined in sam.c
754 struct __bam_plp_t;
755 ///typedef struct __bam_plp_t *bam_plp_t;
756 alias bam_plp_t = __bam_plp_t*;
757 
758 /// opaque mpileup data defined in sam.c
759 struct __bam_mplp_t;
760 ///typedef struct __bam_mplp_t *bam_mplp_t;
761 alias bam_mplp_t = __bam_mplp_t*;
762 
763     /**
764      *  bam_plp_init() - sets an iterator over multiple
765      *  @func:      see mplp_func in bam_plcmd.c in samtools for an example. Expected return
766      *              status: 0 on success, -1 on end, < -1 on non-recoverable errors
767      *  @data:      user data to pass to @func
768      */
769     /// NB: maxcnt records default is 8000
770     bam_plp_t bam_plp_init(bam_plp_auto_f func, void *data);
771     /// destroy pileup iterator
772     void bam_plp_destroy(bam_plp_t iter);
773     /// add bam record to pileup iterator
774     int bam_plp_push(bam_plp_t iter, const(bam1_t) *b);
775     /// Prepares next pileup position in bam records collected by bam_plp_auto -> user func -> bam_plp_push. Returns
776     /// pointer to the piled records if next position is ready or NULL if there is not enough records in the
777     /// buffer yet (the current position is still the maximum position across all buffered reads).
778     const(bam_pileup1_t)*bam_plp_next(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
779     /// ???
780     const(bam_pileup1_t)*bam_plp_auto(bam_plp_t iter, int *_tid, int *_pos, int *_n_plp);
781     /// set maximum pileup records returned (init default is 8000)
782     void bam_plp_set_maxcnt(bam_plp_t iter, int maxcnt);
783     /// reset pileup
784     void bam_plp_reset(bam_plp_t iter);
785 
786     /**
787      *  bam_plp_constructor() - sets a callback to initialise any per-pileup1_t fields.
788      *  @plp:       The bam_plp_t initialised using bam_plp_init.
789      *  @func:      The callback function itself.  When called, it is given the
790      *              data argument (specified in bam_plp_init), the bam structure and
791      *              a pointer to a locally allocated bam_pileup_cd union.  This union
792      *              will also be present in each bam_pileup1_t created.
793      */
794     void bam_plp_constructor(bam_plp_t plp,
795                              int function(void *data, const(bam1_t) *b, bam_pileup_cd *cd) func);
796     /// per-pileup1_t field destructor (may be needed if used bam_plp_constructor())
797     void bam_plp_destructor(bam_plp_t plp,
798                             int function(void *data, const(bam1_t) *b, bam_pileup_cd *cd) func);
799 
800     /// initialize new mpileup iterator
801     bam_mplp_t bam_mplp_init(int n, bam_plp_auto_f func, void **data);
802     /**
803      *  bam_mplp_init_overlaps() - if called, mpileup will detect overlapping
804      *  read pairs and for each base pair set the base quality of the
805      *  lower-quality base to zero, thus effectively discarding it from
806      *  calling. If the two bases are identical, the quality of the other base
807      *  is increased to the sum of their qualities (capped at 200), otherwise
808      *  it is multiplied by 0.8.
809      */
810     void bam_mplp_init_overlaps(bam_mplp_t iter);
811     /// destroy mpileup iterator
812     void bam_mplp_destroy(bam_mplp_t iter);
813     /// set maximum mpileup records returned per subpileup (init default of each pileup in the mpileup is 8000)
814     void bam_mplp_set_maxcnt(bam_mplp_t iter, int maxcnt);
815     /// ???
816     int bam_mplp_auto(bam_mplp_t iter, int *_tid, int *_pos, int *n_plp, const bam_pileup1_t **plp);
817     /// reset mpileup
818     void bam_mplp_reset(bam_mplp_t iter);
819     /// see bam_plp_constructor
820     void bam_mplp_constructor(bam_mplp_t iter,
821                               int function(void *data, const(bam1_t) *b, bam_pileup_cd *cd) func);
822     /// see bam_plp_destructor
823     void bam_mplp_destructor(bam_mplp_t iter,
824                              int function(void *data, const(bam1_t) *b, bam_pileup_cd *cd) func);
825 
826 
827 /***********************************
828  * BAQ calculation and realignment *
829  ***********************************/
830 
831 int sam_cap_mapq(bam1_t *b, const(char) *_ref, int ref_len, int thres);
832 
833 /// Calculate BAQ scores
834 /** @param b   BAM record
835     @param ref     Reference sequence
836     @param ref_len Reference sequence length
837     @param flag    Flags, see description
838     @return 0 on success \n
839            -1 if the read was unmapped, zero length, had no quality values, did not have at least one M, X or = CIGAR operator, or included a reference skip. \n
840            -3 if BAQ alignment has already been done and does not need to be applied, or has already been applied. \n
841            -4 if alignment failed (most likely due to running out of memory)
842 
843 This function calculates base alignment quality (BAQ) values using the method
844 described in "Improving SNP discovery by base alignment quality", Heng Li,
845 Bioinformatics, Volume 27, Issue 8 (https://doi.org/10.1093/bioinformatics/btr076).
846 
847 The following @param flag bits can be used:
848 
849 Bit 0: Adjust the quality values using the BAQ values
850 
851  If set, the data in the BQ:Z tag is used to adjust the quality values, and
852  the BQ:Z tag is renamed to ZQ:Z.
853 
854  If clear, and a ZQ:Z tag is present, the quality values are reverted using
855  the data in the tag, and the tag is renamed to BQ:Z.
856 
857 Bit 1: Use "extended" BAQ.
858 
859  Changes the BAQ calculation to increase sensitivity at the expense of
860  reduced specificity.
861 
862 Bit 2: Recalculate BAQ, even if a BQ tag is present.
863 
864  Force BAQ to be recalculated.  Note that a ZQ:Z tag will always disable
865  recalculation.
866 
867 @bug
868 If the input read has both BQ:Z and ZQ:Z tags, the ZQ:Z one will be removed.
869 Depending on what previous processing happened, this may or may not be the
870 correct thing to do.  It would be wise to avoid this situation if possible.
871 */
872 
873 int sam_prob_realn(bam1_t *b, const(char) *_ref, int ref_len, int flag);