1 /// D bindings to htslib-1.9 vcf
2 /// Copyright 2018 James S Blachly, MD
3 /// Changes are MIT licensed
4 /// Section numbers refer to VCF Specification v4.2: https://samtools.github.io/hts-specs/VCFv4.2.pdf
5 module dhtslib.htslib.vcf;
6 
7 import std.bitmanip;
8 import std.string: toStringz;
9 
10 extern (C):
11 
12 /// @file htslib/vcf.h
13 /// High-level VCF/BCF variant calling file operations.
14 /*
15     Copyright (C) 2012, 2013 Broad Institute.
16     Copyright (C) 2012-2014 Genome Research Ltd.
17 
18     Author: Heng Li <lh3@sanger.ac.uk>
19 
20 Permission is hereby granted, free of charge, to any person obtaining a copy
21 of this software and associated documentation files (the "Software"), to deal
22 in the Software without restriction, including without limitation the rights
23 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
24 copies of the Software, and to permit persons to whom the Software is
25 furnished to do so, subject to the following conditions:
26 
27 The above copyright notice and this permission notice shall be included in
28 all copies or substantial portions of the Software.
29 
30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
31 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
32 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
33 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
34 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
35 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
36 DEALINGS IN THE SOFTWARE.  */
37 
38 /*
39     todo:
40         - make the function names consistent
41         - provide calls to abstract away structs as much as possible
42  */
43 
44 import core.stdc.stdint;
45 import core.stdc.limits;
46 import core.stdc.assert_;
47 import dhtslib.htslib.hts;
48 import dhtslib.htslib.kstring: __kstring_t, kstring_t;
49 import dhtslib.htslib.bgzf: BGZF; // normally typedefed as opaque struct in hts.h
50 //#include "hts_defs.h"
51 //#include "hts_endian.h"
52 
53 /*****************
54  * Header struct *
55  *****************/
56 enum {
57     BCF_HL_FLT  = 0, /// header line: FILTER
58     BCF_HL_INFO = 1, /// header line: INFO
59     BCF_HL_FMT  = 2, /// header line: FORMAT
60     BCF_HL_CTG  = 3, /// header line: contig
61     BCF_HL_STR  = 4, /// header line: structured header line TAG=<A=..,B=..>
62     BCF_HL_GEN  = 5, /// header line: generic header line
63 
64     BCF_HT_FLAG = 0, /// header type: FLAG
65     BCF_HT_INT  = 1, /// header type: INTEGER
66     BCF_HT_REAL = 2, /// header type: REAL
67     BCF_HT_STR  = 3, /// header type: STRING
68 
69     BCF_VL_FIXED=  0, /// variable length: fixed (?)
70     BCF_VL_VAR  =  1, /// variable length: variable
71     BCF_VL_A    =  2, /// variable length: ?
72     BCF_VL_G    =  3, /// variable length: ?
73     BCF_VL_R    =  4  /// variable length: ?
74 }
75 
76 /* === Dictionary ===
77 
78    The header keeps three dictonaries. The first keeps IDs in the
79    "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths
80    in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[]
81    is the actual hash table, which is opaque to the end users. In the hash
82    table, the key is the ID or sample name as a C string and the value is a
83    bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash
84    table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the
85    size of the hash table or, equivalently, the length of the id[] arrays.
86 */
87 
88 enum int BCF_DT_ID     = 0; /// dictionary type: ID
89 enum int BCF_DT_CTG    = 1; /// dictionary type: CONTIG
90 enum int BCF_DT_SAMPLE = 2; /// dictionary type: SAMPLE
91 
92 /// Structured representation of a header line (§1.2)
93 struct bcf_hrec_t { // @suppress(dscanner.style.phobos_naming_convention)
94     int type;       /// One of the BCF_HL_* type
95     char *key;      /// The part before '=', i.e. FILTER/INFO/FORMAT/contig/fileformat etc.
96     char *value;    /// Set only for generic lines, NULL for FILTER/INFO, etc.
97     int nkeys;      /// Number of structured fields
98     char **keys;    /// The key=value pairs
99     char **vals;    /// The key=value pairs
100 }
101 
102 ///  ID Dictionary entry
103 struct bcf_idinfo_t {   // @suppress(dscanner.style.phobos_naming_convention)
104     uint32_t[3] info;   /** stores Number:20, var:4, Type:4, ColType:4 in info[0..2]
105                             for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG */
106     bcf_hrec_t *[3] hrec;   /// pointers to header lines for [FILTER, INFO, FORMAT] in order
107     int id;                 /// primary key
108 }
109 
110 /// ID Dictionary k/v
111 struct bcf_idpair_t { // @suppress(dscanner.style.phobos_naming_convention)
112     const(char) *key;        /// header dictionary FILTER/INFO/FORMAT ID key
113     const bcf_idinfo_t *val;/// header dictionary FILTER/INFO/FORMAT ID entry
114 }
115 
116 /// Structured repreentation of VCF header (§1.2)
117 /// Note that bcf_hdr_t structs must always be created via bcf_hdr_init()
118 struct bcf_hdr_t { // @suppress(dscanner.style.phobos_naming_convention)
119     int32_t[3] n;           /// n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI)
120     bcf_idpair_t *[3] id;   /// ID dictionary {FILTER/INFO/FORMAT, contig, sample} ID key/entry
121     void *[3] dict;         /// hash table
122     char **samples;         /// ?list of samples
123     bcf_hrec_t **hrec;      /// Structured representation of this header line
124     int nhrec;              /// # of header records
125     int dirty;              /// ?
126     int ntransl;            /// for bcf_translate()
127     int *[2] transl;        /// for bcf_translate()
128     int nsamples_ori;       /// for bcf_hdr_set_samples()
129     uint8_t *keep_samples;  /// ?
130     kstring_t mem;          /// ?
131     int32_t[3] m;           /// m: allocated size of the dictionary block in use (see n above)
132 }
133 
134 /// Lookup table used in bcf_record_check
135 /// MAINTAINER: in C header is []
136 extern __gshared uint8_t[16] bcf_type_shift;
137 
138 /**************
139  * VCF record *
140  **************/
141 
142 enum int BCF_BT_NULL   = 0; /// null
143 enum int BCF_BT_INT8   = 1; /// int8
144 enum int BCF_BT_INT16  = 2; /// int16
145 enum int BCF_BT_INT32  = 3; /// int32
146 enum int BCF_BT_FLOAT  = 5; /// float (32?)
147 enum int BCF_BT_CHAR   = 7; /// char (8 bit)
148 
149 enum int VCF_REF  =0;   /// ref (e.g. in a gVCF)
150 enum int VCF_SNP  =1;   /// SNP 
151 enum int VCF_MNP  =2;   /// MNP
152 enum int VCF_INDEL=4;   /// INDEL
153 enum int VCF_OTHER=8;   /// other (e.g. SV)
154 enum int VCF_BND  =16;  /// breakend
155 
156 /// variant type record embedded in bcf_dec_t
157 /// variant type and the number of bases affected, negative for deletions
158 struct variant_t { // @suppress(dscanner.style.phobos_naming_convention)
159     int type;   /// variant type and the number of bases affected, negative for deletions
160     int n;      /// variant type and the number of bases affected, negative for deletions
161 }
162 
163 /// FORMAT field data (§1.4.2 Genotype fields)
164 struct bcf_fmt_t { // @suppress(dscanner.style.phobos_naming_convention)
165     int id;         /// id: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$id].key
166     int n;          /// n: number of values per-sample; size: number of bytes per-sample; type: one of BCF_BT_* types
167     int size;       /// size: number of bytes per-sample; type: one of BCF_BT_* types
168     int type;       /// type: one of BCF_BT_* types
169     uint8_t *p;     /// same as vptr and vptr_* in bcf_info_t below
170     uint32_t p_len;                 /// ?
171     
172     /// ???
173     mixin(bitfields!(
174         uint, "p_off", 31,
175         bool, "p_free", 1));        /// ?
176 }
177 
178 /// INFO field data (§1.4.1 Fixed fields, (8) INFO)
179 struct bcf_info_t { // @suppress(dscanner.style.phobos_naming_convention)
180     int key;        /// key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key
181     int type;       /// type: one of BCF_BT_* types; len: vector length, 1 for scalars
182     int len;        /// type: one of BCF_BT_* types; len: vector length, 1 for scalars
183     /// Stores a numeric value iff this INFO field is a scalar
184     union V1 {
185         int32_t i;  /// integer value
186         float f;    /// float value
187     }
188     V1 v1;          /// only set if $len==1; for easier access
189     uint8_t *vptr;          /// pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes
190     uint32_t vptr_len;      /// length of the vptr block or, when set, of the vptr_mod block, excluding offset
191     
192     /// vptr offset, i.e., the size of the INFO key plus size+type bytes
193     /// indicates that vptr-vptr_off must be freed; set only when modified and the new
194     ///    data block is bigger than the original
195     mixin(bitfields!(
196         uint, "vptr_off", 31,   
197         bool, "vptr_free", 1));
198 }
199 
200 
201 enum int BCF1_DIRTY_ID  = 1;    /// ID was edited
202 enum int BCF1_DIRTY_ALS = 2;    /// Allele(s) was edited
203 enum int BCF1_DIRTY_FLT = 4;    /// FILTER was edited
204 enum int BCF1_DIRTY_INF = 8;    /// INFO was edited
205 
206 /// Variable-length data from a VCF record
207 struct bcf_dec_t { // @suppress(dscanner.style.phobos_naming_convention)
208     /// allocated size (high-water mark); do not change 
209     int m_fmt, m_info, m_id, m_als, m_allele, m_flt; 
210     int n_flt;          /// Number of FILTER fields
211     int *flt;           /// FILTER keys in the dictionary
212     char *id;           /// ID
213     char *als;          /// REF+ALT block (\0-seperated)
214     char **allele;      /// allele[0] is the REF (allele[] pointers to the als block); all null terminated
215     bcf_info_t *info;   /// INFO
216     bcf_fmt_t *fmt;     /// FORMAT and individual sample
217     variant_t *var;     /// $var and $var_type set only when set_variant_types called
218     int n_var;          /// variant number(???)
219     int var_type;       /// variant type (TODO: make enum)
220     int shared_dirty;   /// if set, shared.s must be recreated on BCF output (TODO: make enum)
221     int indiv_dirty;    /// if set, indiv.s must be recreated on BCF output (TODO: make enum)
222 }
223 
224 
225 enum int BCF_ERR_CTG_UNDEF   =  1;  /// BCF error: undefined contig
226 enum int BCF_ERR_TAG_UNDEF   =  2;  /// BCF error: undefined tag
227 enum int BCF_ERR_NCOLS       =  4;  /// BCF error: 
228 enum int BCF_ERR_LIMITS      =  8;  /// BCF error: 
229 enum int BCF_ERR_CHAR        = 16;  /// BCF error: 
230 enum int BCF_ERR_CTG_INVALID = 32;  /// BCF error: 
231 enum int BCF_ERR_TAG_INVALID = 64;  /// BCF error: 
232 
233 /**
234     The bcf1_t structure corresponds to one VCF/BCF line. Reading from VCF file
235     is slower because the string is first to be parsed, packed into BCF line
236     (done in vcf_parse), then unpacked into internal bcf1_t structure. If it
237     is known in advance that some of the fields will not be required (notably
238     the sample columns), parsing of these can be skipped by setting max_unpack
239     appropriately.
240     Similarly, it is fast to output a BCF line because the columns (kept in
241     shared.s, indiv.s, etc.) are written directly by bcf_write, whereas a VCF
242     line must be formatted in vcf_format.
243  */
244 struct bcf1_t { // @suppress(dscanner.style.phobos_naming_convention)
245     int32_t rid;  /// CHROM
246     int32_t pos;  /// POS
247     int32_t rlen; /// length of REF
248     float qual;   /// QUAL
249 
250     mixin(bitfields!(
251         uint, "n_info", 16,     /// For whatever reason, windows doesn't like uint32_t
252         uint, "n_allele", 16));
253 
254     mixin(bitfields!(
255         uint, "n_fmt", 8,       /// For whatever reason, windows doesn't like uint32_t
256         uint, "n_sample", 24));
257 
258     kstring_t _shared;      /// ??? (name mangled due to D reserved keyword shared)
259     kstring_t indiv;        /// ???
260     bcf_dec_t d;            /// lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack()
261     int max_unpack;         /// Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields won't be needed
262     int unpacked;           /// remember what has been unpacked to allow calling bcf_unpack() repeatedly without redoing the work
263     int[3] unpack_size;     /// the original block size of ID, REF+ALT and FILTER
264     int errcode;    /// one of BCF_ERR_* codes (TODO: make enum)
265 }
266 
267 /*******
268  * API *
269  *******/
270 
271     /***********************************************************************
272      *  BCF and VCF I/O
273      *
274      *  A note about naming conventions: htslib internally represents VCF
275      *  records as bcf1_t data structures, therefore most functions are
276      *  prefixed with bcf_. There are a few exceptions where the functions must
277      *  be aware of both BCF and VCF worlds, such as bcf_parse vs vcf_parse. In
278      *  these cases, functions prefixed with bcf_ are more general and work
279      *  with both BCF and VCF.
280      *
281      ***********************************************************************/
282 
283     /** These macros are defined only for consistency with other parts of htslib */
284     alias bcf_init1     = bcf_init;
285     alias bcf_read1     = bcf_read;
286     alias vcf_read1     = vcf_read;
287     alias bcf_write1    = bcf_write;
288     alias vcf_write1    = vcf_write;
289     alias bcf_destroy1  = bcf_destroy;
290     alias bcf_empty1    = bcf_empty;
291     alias vcf_parse1    = vcf_parse;
292     alias bcf_clear1    = bcf_clear;
293     alias vcf_format1   = vcf_format;
294 
295     /**
296      *  bcf_hdr_init() - create an empty BCF header.
297      *  @param mode    "r" or "w"
298      *
299      *  When opened for writing, the mandatory fileFormat and
300      *  FILTER=PASS lines are added automatically.
301      */
302     bcf_hdr_t *bcf_hdr_init(const(char) *mode);
303 
304     /** Destroy a BCF header struct */
305     void bcf_hdr_destroy(bcf_hdr_t *h);
306 
307     /** Initialize a bcf1_t object; equivalent to calloc(1, sizeof(bcf1_t)) */
308     bcf1_t *bcf_init();
309 
310     /** Deallocate a bcf1_t object */
311     void bcf_destroy(bcf1_t *v);
312 
313     /**
314      *  Same as bcf_destroy() but frees only the memory allocated by bcf1_t,
315      *  not the bcf1_t object itself.
316      */
317     void bcf_empty(bcf1_t *v);
318 
319     /**
320      *  Make the bcf1_t object ready for next read. Intended mostly for
321      *  internal use, the user should rarely need to call this function
322      *  directly.
323      */
324     void bcf_clear(bcf1_t *v);
325 
326 
327     /** bcf_open and vcf_open mode: please see hts_open() in hts.h */
328     alias vcfFile = htsFile;
329     alias bcf_open = hts_open;
330     alias vcf_open = hts_open;
331     alias bcf_close = hts_close;
332     alias vcf_close = hts_close;
333 
334     /** Reads VCF or BCF header */
335     bcf_hdr_t *bcf_hdr_read(htsFile *fp);
336 
337     /**
338      *  bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed
339      *  @samples: samples to include or exclude from file or as a comma-separated string.
340      *              LIST|FILE   .. select samples in list/file
341      *              ^LIST|FILE  .. exclude samples from list/file
342      *              -           .. include all samples
343      *              NULL        .. exclude all samples
344      *  @is_file: @samples is a file (1) or a comma-separated list (0)
345      *
346      *  The bottleneck of VCF reading is parsing of genotype fields. If the
347      *  reader knows in advance that only subset of samples is needed (possibly
348      *  no samples at all), the performance of bcf_read() can be significantly
349      *  improved by calling bcf_hdr_set_samples after bcf_hdr_read().
350      *  The function bcf_read() will subset the VCF/BCF records automatically
351      *  with the notable exception when reading records via bcf_itr_next().
352      *  In this case, bcf_subset_format() must be called explicitly, because
353      *  bcf_readrec() does not see the header.
354      *
355      *  Returns 0 on success, -1 on error or a positive integer if the list
356      *  contains samples not present in the VCF header. In such a case, the
357      *  return value is the index of the offending sample.
358      */
359     int bcf_hdr_set_samples(bcf_hdr_t *hdr, const(char) *samples, int is_file);
360     /// ditto
361     int bcf_subset_format(const(bcf_hdr_t) *hdr, bcf1_t *rec);
362 
363 
364     /** Writes VCF or BCF header */
365     int bcf_hdr_write(htsFile *fp, bcf_hdr_t *h);
366 
367     /**
368      * Parse VCF line contained in kstring and populate the bcf1_t struct
369      * The line must not end with \n or \r characters.
370      */
371     int vcf_parse(kstring_t *s, const(bcf_hdr_t) *h, bcf1_t *v);
372 
373     /** The opposite of vcf_parse. It should rarely be called directly, see vcf_write */
374     int vcf_format(const(bcf_hdr_t) *h, const(bcf1_t) *v, kstring_t *s);
375 
376     /**
377      *  bcf_read() - read next VCF or BCF record
378      *
379      *  Returns -1 on critical errors, 0 otherwise. On errors which are not
380      *  critical for reading, such as missing header definitions, v->errcode is
381      *  set to one of BCF_ERR* code and must be checked before calling
382      *  vcf_write().
383      */
384     int bcf_read(htsFile *fp, const(bcf_hdr_t) *h, bcf1_t *v);
385 
386     /**
387      *  bcf_unpack() - unpack/decode a BCF record (fills the bcf1_t::d field)
388      *
389      *  Note that bcf_unpack() must be called even when reading VCF. It is safe
390      *  to call the function repeatedly, it will not unpack the same field
391      *  twice.
392      */
393     int bcf_unpack(bcf1_t *b, int which);
394     enum int BCF_UN_STR  = 1;       /// up to ALT inclusive
395     enum int BCF_UN_FLT  = 2;       /// up to FILTER
396     enum int BCF_UN_INFO = 4;       /// up to INFO
397     enum int BCF_UN_SHR  = (BCF_UN_STR|BCF_UN_FLT|BCF_UN_INFO); /// all shared information
398     enum int BCF_UN_FMT  = 8;       /// unpack format and each sample
399     alias BCF_UN_IND = BCF_UN_FMT;  // a synonymo of BCF_UN_FMT
400     enum int BCF_UN_ALL  = (BCF_UN_SHR|BCF_UN_FMT);             /// everything
401 
402     /**
403      *  bcf_dup() - create a copy of BCF record.
404      *
405      *  Note that bcf_unpack() must be called on the returned copy as if it was
406      *  obtained from bcf_read(). Also note that bcf_dup() calls bcf_sync1(src)
407      *  internally to reflect any changes made by bcf_update_* functions.
408      */
409     bcf1_t *bcf_dup(bcf1_t *src);
410     /// ditto
411     bcf1_t *bcf_copy(bcf1_t *dst, bcf1_t *src);
412 
413     /**
414      *  bcf_write() - write one VCF or BCF record. The type is determined at the open() call.
415      */
416     int bcf_write(htsFile *fp, bcf_hdr_t *h, bcf1_t *v);
417 
418     /**
419      *  The following functions work only with VCFs and should rarely be called
420      *  directly. Usually one wants to use their bcf_* alternatives, which work
421      *  transparently with both VCFs and BCFs.
422      */
423     bcf_hdr_t *vcf_hdr_read(htsFile *fp);
424     /// ditto
425     int vcf_hdr_write(htsFile *fp, const(bcf_hdr_t) *h);
426     /// ditto
427     int vcf_read(htsFile *fp, const(bcf_hdr_t) *h, bcf1_t *v);
428     /// ditto
429     int vcf_write(htsFile *fp, const(bcf_hdr_t) *h, bcf1_t *v);
430 
431     /** Helper function for the bcf_itr_next() macro; internal use, ignore it */
432     /** NOTE: C API second parameter called "null", mangled here as _null */
433     int bcf_readrec(BGZF *fp, void *_null, void *v, int *tid, int *beg, int *end);
434 
435 
436 
437     /**************************************************************************
438      *  Header querying and manipulation routines
439      **************************************************************************/
440 
441     /** Create a new header using the supplied template */
442     bcf_hdr_t *bcf_hdr_dup(const(bcf_hdr_t) *hdr);
443 
444     /**
445      *  Copy header lines from src to dst if not already present in dst. See also bcf_translate().
446      *  Returns 0 on success or sets a bit on error:
447      *      1 .. conflicting definitions of tag length
448      *      // todo
449      */
450     deprecated("Please use bcf_hdr_merge instead")
451     int bcf_hdr_combine(bcf_hdr_t *dst, const(bcf_hdr_t) *src);
452 
453     /**
454      *  bcf_hdr_merge() - copy header lines from src to dst, see also bcf_translate()
455      *  @param dst: the destination header to be merged into, NULL on the first pass
456      *  @param src: the source header
457      *
458      *  Notes:
459      *      - use as:
460      *          bcf_hdr_t *dst = NULL;
461      *          for (i=0; i<nsrc; i++) dst = bcf_hdr_merge(dst,src[i]);
462      *
463      *      - bcf_hdr_merge() replaces bcf_hdr_combine() which had a problem when
464      *      combining multiple BCF headers. The current bcf_hdr_combine()
465      *      does not have this problem, but became slow when used for many files.
466      */
467     bcf_hdr_t *bcf_hdr_merge(bcf_hdr_t *dst, const(bcf_hdr_t) *src);
468 
469     /**
470      *  bcf_hdr_add_sample() - add a new sample.
471      *  @param sample:  sample name to be added
472      */
473     int bcf_hdr_add_sample(bcf_hdr_t *hdr, const(char) *sample);
474 
475     /** Read VCF header from a file and update the header */
476     int bcf_hdr_set(bcf_hdr_t *hdr, const(char) *fname);
477 
478     /// Appends formatted header text to _str_.
479     /** If _is_bcf_ is zero, `IDX` fields are discarded.
480      *  @return 0 if successful, or negative if an error occurred
481      *  @since 1.4
482      */
483     int bcf_hdr_format(const(bcf_hdr_t) *hdr, int is_bcf, kstring_t *str);
484 
485     /** Returns formatted header (newly allocated string) and its length,
486      *  excluding the terminating \0. If is_bcf parameter is unset, IDX
487      *  fields are discarded.
488      *  @deprecated Use bcf_hdr_format() instead as it can handle huge headers.
489      */
490     deprecated("use bcf_hdr_format() instead")
491     char *bcf_hdr_fmt_text(const(bcf_hdr_t) *hdr, int is_bcf, int *len);
492 
493     /** Append new VCF header line, returns 0 on success */
494     int bcf_hdr_append(bcf_hdr_t *h, const(char) *line);
495     /// ditto
496     int bcf_hdr_printf(bcf_hdr_t *h, const(char) *format, ...);
497 
498     /** VCF version, e.g. VCFv4.2 */
499     const(char) *bcf_hdr_get_version(const(bcf_hdr_t) *hdr);
500     /// Ditto
501     /// NB: mangled second parameter to _version
502     void bcf_hdr_set_version(bcf_hdr_t *hdr, const(char) *_version);
503 
504     /**
505      *  bcf_hdr_remove() - remove VCF header tag
506      *  @param type:      one of BCF_HL_*
507      *  @param key:       tag name or NULL to remove all tags of the given type
508      */
509     void bcf_hdr_remove(bcf_hdr_t *h, int type, const(char) *key);
510 
511     /**
512      *  bcf_hdr_subset() - creates a new copy of the header removing unwanted samples
513      *  @param n:        number of samples to keep
514      *  @param samples:  names of the samples to keep
515      *  @param imap:     mapping from index in @samples to the sample index in the original file
516      *
517      *  Sample names not present in h0 are ignored. The number of unmatched samples can be checked
518      *  by comparing n and bcf_hdr_nsamples(out_hdr).
519      *  This function can be used to reorder samples.
520      *  See also bcf_subset() which subsets individual records.
521      */
522     /// NOTE: char *const* samples really exmplifies what I hate about C pointers
523     /// My interpretation of this is it is equivalent to char **samples, but that the outer pointer is const
524     /// which in D would be const(char *)*samples. I don't know what it implies about constancy of *samples or samples.
525     bcf_hdr_t *bcf_hdr_subset(const(bcf_hdr_t) *h0, int n, const(char *)*samples, int *imap);
526     //bcf_hdr_t *bcf_hdr_subset(const(bcf_hdr_t) *h0, int n, char *const* samples, int *imap);
527 
528     /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */
529     const(char) **bcf_hdr_seqnames(const(bcf_hdr_t) *h, int *nseqs);
530 
531     /** Get number of samples */
532     pragma(inline, true) auto bcf_hdr_nsamples(bcf_hdr_t *hdr) { return hdr.n[BCF_DT_SAMPLE]; }
533     //#define bcf_hdr_nsamples(hdr) (hdr)->n[BCF_DT_SAMPLE]
534 
535 
536     /** The following functions are for internal use and should rarely be called directly */
537     int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt);
538     /// ditto
539     int bcf_hdr_sync(bcf_hdr_t *h);
540     /// ditto
541     bcf_hrec_t *bcf_hdr_parse_line(const(bcf_hdr_t) *h, const(char) *line, int *len);
542     /// ditto
543     void bcf_hrec_format(const(bcf_hrec_t) *hrec, kstring_t *str);
544     /// ditto
545     int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec);
546 
547     /**
548      *  bcf_hdr_get_hrec() - get header line info
549      *  @param type:  one of the BCF_HL_* types: FLT,INFO,FMT,CTG,STR,GEN
550      *  @param key:   the header key for generic lines (e.g. "fileformat"), any field
551      *                  for structured lines, typically "ID".
552      *  @param value: the value which pairs with key. Can be be NULL for BCF_HL_GEN
553      *  @param str_class: the class of BCF_HL_STR line (e.g. "ALT" or "SAMPLE"), otherwise NULL
554      */
555     bcf_hrec_t *bcf_hdr_get_hrec(const(bcf_hdr_t) *hdr, int type, const(char) *key, const(char) *value,
556                                                                                     const(char) *str_class);
557     /// Duplicate header record
558     bcf_hrec_t *bcf_hrec_dup(bcf_hrec_t *hrec);
559     /// Add key to header record
560     void bcf_hrec_add_key(bcf_hrec_t *hrec, const(char) *str, int len);
561     /// Set key's value for header record
562     void bcf_hrec_set_val(bcf_hrec_t *hrec, int i, const(char) *str, int len, int is_quoted);
563     /// Lookup header record by key
564     int bcf_hrec_find_key(bcf_hrec_t *hrec, const(char) *key);
565     /// Index header record
566     void hrec_add_idx(bcf_hrec_t *hrec, int idx);
567     /// Deallocate header record
568     void bcf_hrec_destroy(bcf_hrec_t *hrec);
569 
570 
571 
572     /**************************************************************************
573      *  Individual record querying and manipulation routines
574      **************************************************************************/
575 
576     /** See the description of bcf_hdr_subset() */
577     int bcf_subset(const(bcf_hdr_t) *h, bcf1_t *v, int n, int *imap);
578 
579     /**
580      *  bcf_translate() - translate tags ids to be consistent with different header. This function
581      *                    is useful when lines from multiple VCF need to be combined.
582      *  @dst_hdr:   the destination header, to be used in bcf_write(), see also bcf_hdr_combine()
583      *  @src_hdr:   the source header, used in bcf_read()
584      *  @src_line:  line obtained by bcf_read()
585      */
586     int bcf_translate(const(bcf_hdr_t) *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *src_line);
587 
588     /**
589      *  bcf_get_variant_type[s]()  - returns one of VCF_REF, VCF_SNP, etc
590      */
591     int bcf_get_variant_types(bcf1_t *rec);
592     /// ditto
593     int bcf_get_variant_type(bcf1_t *rec, int ith_allele);
594     /// ditto (TODO: what is this? should this be bool?)
595     int bcf_is_snp(bcf1_t *v);
596 
597     /**
598      *  bcf_update_filter() - sets the FILTER column
599      *  @flt_ids:  The filter IDs to set, numeric IDs returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS")
600      *  @n:        Number of filters. If n==0, all filters are removed
601      *  Returns:    zero
602      */
603     int bcf_update_filter(const(bcf_hdr_t) *hdr, bcf1_t *line, int *flt_ids, int n);
604     /**
605      *  bcf_add_filter() - adds to the FILTER column
606      *  @flt_id:   filter ID to add, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS")
607      *
608      *  If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed.
609      */
610     int bcf_add_filter(const(bcf_hdr_t) *hdr, bcf1_t *line, int flt_id);
611     /**
612      *  bcf_remove_filter() - removes from the FILTER column
613      *  @flt_id:   filter ID to remove, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS")
614      *  @pass:     when set to 1 and no filters are present, set to PASS
615      */
616     int bcf_remove_filter(const(bcf_hdr_t) *hdr, bcf1_t *line, int flt_id, int pass);
617     /**
618      *  Returns 1 if present, 0 if absent, or -1 if filter does not exist. "PASS" and "." can be used interchangeably.
619      */
620     int bcf_has_filter(const(bcf_hdr_t) *hdr, bcf1_t *line, char *filter);
621     /**
622      *  bcf_update_alleles() and bcf_update_alleles_str() - update REF and ALLT column
623      *  @alleles:           Array of alleles
624      *  @nals:              Number of alleles
625      *  @alleles_string:    Comma-separated alleles, starting with the REF allele
626      */
627     int bcf_update_alleles(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) **alleles, int nals);
628     /// ditto
629     int bcf_update_alleles_str(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *alleles_string);
630 
631     /**
632       *  bcf_update_id() - sets new ID string
633       *  bcf_add_id() - adds to the ID string checking for duplicates
634       */
635     int bcf_update_id(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *id);
636     /// ditto
637     int bcf_add_id(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *id);
638 
639     /**
640      *  bcf_update_info_*() - functions for updating INFO fields
641      *  @hdr:       the BCF header
642      *  @line:      VCF line to be edited
643      *  @key:       the INFO tag to be updated
644      *  @values:    pointer to the array of values. Pass NULL to remove the tag.
645      *  @n:         number of values in the array. When set to 0, the INFO tag is removed
646      *
647      *  The @string in bcf_update_info_flag() is optional, @n indicates whether
648      *  the flag is set or removed.
649      *
650      *  Returns 0 on success or negative value on error.
651      */
652     int bcf_update_info(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n, int type);
653     // TODO: Write D template
654     pragma(inline, true) {
655         auto bcf_update_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
656             { return bcf_update_info(hdr, line, key, values, n, BCF_HT_INT); }
657         auto bcf_update_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
658             { return bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL); }
659         auto bcf_update_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
660             { return bcf_update_info(hdr, line, key, values, n, BCF_HT_FLAG); }
661         auto bcf_update_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values) // @suppress(dscanner.style.undocumented_declaration)
662             { return bcf_update_info(hdr, line, key, values, 1, BCF_HT_STR); }
663     }
664 
665     /**
666      *  bcf_update_format_*() - functions for updating FORMAT fields
667      *  @values:    pointer to the array of values, the same number of elements
668      *              is expected for each sample. Missing values must be padded
669      *              with bcf_*_missing or bcf_*_vector_end values.
670      *  @n:         number of values in the array. If n==0, existing tag is removed.
671      *
672      *  The function bcf_update_format_string() is a higher-level (slower) variant of
673      *  bcf_update_format_char(). The former accepts array of \0-terminated strings
674      *  whereas the latter requires that the strings are collapsed into a single array
675      *  of fixed-length strings. In case of strings with variable length, shorter strings
676      *  can be \0-padded. Note that the collapsed strings passed to bcf_update_format_char()
677      *  are not \0-terminated.
678      *
679      *  Returns 0 on success or negative value on error.
680      */
681     int bcf_update_format_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(char) **values, int n);
682     /// ditto
683     int bcf_update_format(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n, int type);
684     // TODO: Write D template
685     pragma(inline, true) {
686         auto bcf_update_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(int) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
687             { return bcf_update_format(hdr, line, key, values, n, BCF_HT_INT); }
688         auto bcf_update_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(float) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
689             { return bcf_update_format(hdr, line, key, values, n, BCF_HT_REAL); }
690         auto bcf_update_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(char) **values, int n) // @suppress(dscanner.style.undocumented_declaration)
691             { return bcf_update_format(hdr, line, key, values, n, BCF_HT_STR); }
692         auto bcf_update_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) **gts, int n) // @suppress(dscanner.style.undocumented_declaration)
693             { return bcf_update_format(hdr, line, toStringz("GT"c), gts, n, BCF_HT_INT); }
694     }
695 
696 
697     /// Macros for setting genotypes correctly, for use with bcf_update_genotypes only; idx corresponds
698     /// to VCF's GT (1-based index to ALT or 0 for the reference allele) and val is the opposite, obtained
699     /// from bcf_get_genotypes() below.
700     // TODO: is int appropriate?
701     pragma(inline, true) {
702         auto bcf_gt_phased(int idx)     { return (((idx)+1)<<1|1);  }
703         /// ditto
704         auto bcf_gt_unphased(int idx)   { return (((idx)+1)<<1);    }
705         /// ditto
706         auto bcf_gt_is_missing(int val) { return ((val)>>1 ? 0 : 1);}
707         /// ditto
708         auto bcf_gt_is_phased(int idx)  { return ((idx)&1);         }
709         /// ditto
710         auto bcf_gt_allele(int val)     { return (((val)>>1)-1);    }
711     }
712     /// ditto
713     enum int bcf_gt_missing = 0;
714 
715     /** Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based) */
716     pragma(inline, true) {
717         auto bcf_alleles2gt(int a, int b) { return ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a))); }
718         /// ditto
719         void bcf_gt2alleles(int igt, int *a, int *b)
720         {
721             int k = 0, dk = 1; // @suppress(dscanner.useless-initializer)
722             while ( k<igt ) { dk++; k += dk; }
723             *b = dk - 1; *a = igt - k + *b;
724         }
725     }
726 
727     /**
728      * bcf_get_fmt() - returns pointer to FORMAT's field data
729      * @header: for access to BCF_DT_ID dictionary
730      * @line:   VCF line obtained from vcf_parse1
731      * @fmt:    one of GT,PL,...
732      *
733      * Returns bcf_fmt_t* if the call succeeded, or returns NULL when the field
734      * is not available.
735      */
736     bcf_fmt_t *bcf_get_fmt(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key);
737     /// ditto
738     bcf_info_t *bcf_get_info(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key);
739 
740     /**
741      * bcf_get_*_id() - returns pointer to FORMAT/INFO field data given the header index instead of the string ID
742      * @line: VCF line obtained from vcf_parse1
743      * @id:  The header index for the tag, obtained from bcf_hdr_id2int()
744      *
745      * Returns bcf_fmt_t* / bcf_info_t*. These functions do not check if the index is valid
746      * as their goal is to avoid the header lookup.
747      */
748     bcf_fmt_t *bcf_get_fmt_id(bcf1_t *line, const int id);
749     /// ditto
750     bcf_info_t *bcf_get_info_id(bcf1_t *line, const int id);
751 
752     /**
753      *  bcf_get_info_*() - get INFO values, integers or floats
754      *  @hdr:       BCF header
755      *  @line:      BCF record
756      *  @tag:       INFO tag to retrieve
757      *  @dst:       *dst is pointer to a memory location, can point to NULL
758      *  @ndst:      pointer to the size of allocated memory
759      *
760      *  Returns negative value on error or the number of written values
761      *  (including missing values) on success. bcf_get_info_string() returns
762      *  on success the number of characters written excluding the null-
763      *  terminating byte. bcf_get_info_flag() returns 1 when flag is set or 0
764      *  if not.
765      *
766      *  List of return codes:
767      *      -1 .. no such INFO tag defined in the header
768      *      -2 .. clash between types defined in the header and encountered in the VCF record
769      *      -3 .. tag is not present in the VCF record
770      */
771     int bcf_get_info_values(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst, int type);
772     pragma(inline, true) {
773         auto bcf_get_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
774             { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); }
775         auto bcf_get_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
776             { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); }
777         auto bcf_get_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
778             { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); }
779         auto bcf_get_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
780             { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_FLAG); }
781     }
782 
783     /**
784      *  bcf_get_format_*() - same as bcf_get_info*() above
785      *
786      *  The function bcf_get_format_string() is a higher-level (slower) variant of bcf_get_format_char().
787      *  see the description of bcf_update_format_string() and bcf_update_format_char() above.
788      *  Unlike other bcf_get_format__*() functions, bcf_get_format_string() allocates two arrays:
789      *  a single block of \0-terminated strings collapsed into a single array and an array of pointers
790      *  to these strings. Both arrays must be cleaned by the user.
791      *
792      *  Returns negative value on error or the number of written values on success.
793      *
794      *  Use the returned number of written values for accessing valid entries of dst, as ndst is only a
795      *  watermark that can be higher than the returned value, i.e. the end of dst can contain carry-over
796      *  values from previous calls to bcf_get_format_*() on lines with more values per sample.
797      *
798      *  Example:
799      *      int ndst = 0; char **dst = NULL;
800      *      if ( bcf_get_format_string(hdr, line, "XX", &dst, &ndst) > 0 )
801      *          for (i=0; i<bcf_hdr_nsamples(hdr); i++) printf("%s\n", dst[i]);
802      *      free(dst[0]); free(dst);
803      *
804      *  Example:
805      *      int i, j, ngt, nsmpl = bcf_hdr_nsamples(hdr);
806      *      int32_t *gt_arr = NULL, ngt_arr = 0;
807      *
808      *      ngt = bcf_get_genotypes(hdr, line, &gt_arr, &ngt_arr);
809      *      if ( ngt<=0 ) return; // GT not present
810      *
811      *      int max_ploidy = ngt/nsmpl;
812      *      for (i=0; i<nsmpl; i++)
813      *      {
814      *        int32_t *ptr = gt + i*max_ploidy;
815      *        for (j=0; j<max_ploidy; j++)
816      *        {
817      *           // if true, the sample has smaller ploidy
818      *           if ( ptr[j]==bcf_int32_vector_end ) break;
819      *
820      *           // missing allele
821      *           if ( bcf_gt_is_missing(ptr[j]) ) continue;
822      *
823      *           // the VCF 0-based allele index
824      *           int allele_index = bcf_gt_allele(ptr[j]);
825      *
826      *           // is phased?
827      *           int is_phased = bcf_gt_is_phased(ptr[j]);
828      *
829      *           // .. do something ..
830      *         }
831      *      }
832      *      free(gt_arr);
833      *
834      */
835     int bcf_get_format_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, char ***dst, int *ndst);
836     /// ditto
837     int bcf_get_format_values(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst, int type);
838     pragma(inline, true) {
839         auto bcf_get_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
840             { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); }
841         auto bcf_get_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
842             { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); }
843         auto bcf_get_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
844             { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); }
845         auto bcf_get_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
846             { return bcf_get_format_values(hdr, line, toStringz("GT"c), cast(void**) dst, ndst, BCF_HT_INT); }
847     }
848 
849 
850 
851     /**************************************************************************
852      *  Helper functions
853      **************************************************************************/
854 
855     /**
856      *  bcf_hdr_id2int() - Translates string into numeric ID
857      *  bcf_hdr_int2id() - Translates numeric ID into string
858      *  @type:     one of BCF_DT_ID, BCF_DT_CTG, BCF_DT_SAMPLE
859      *  @id:       tag name, such as: PL, DP, GT, etc.
860      *
861      *  Returns -1 if string is not in dictionary, otherwise numeric ID which identifies
862      *  fields in BCF records.
863      */
864     int bcf_hdr_id2int(const(bcf_hdr_t) *hdr, int type, const(char) *id);
865     /// ditto
866     pragma(inline, true)
867     auto bcf_hdr_int2id(const(bcf_hdr_t) *hdr, int type, int int_id)
868         { return hdr.id[type][int_id].key; }
869     //#define bcf_hdr_int2id(hdr,type,int_id) ((hdr)->id[type][int_id].key)
870 
871     /**
872      *  bcf_hdr_name2id() - Translates sequence names (chromosomes) into numeric ID
873      *  bcf_hdr_id2name() - Translates numeric ID to sequence name
874      */
875     pragma(inline, true) int bcf_hdr_name2id(const(bcf_hdr_t) *hdr, const(char) *id) { return bcf_hdr_id2int(hdr, BCF_DT_CTG, id); } // @suppress(dscanner.style.long_line)
876     /// ditto
877     pragma(inline, true) const(char) *bcf_hdr_id2name(const(bcf_hdr_t) *hdr, int rid) { return hdr.id[BCF_DT_CTG][rid].key; } // @suppress(dscanner.style.long_line)
878     /// ditto
879     pragma(inline, true) const(char) *bcf_seqname(const(bcf_hdr_t) *hdr, bcf1_t *rec) { return hdr.id[BCF_DT_CTG][rec.rid].key; } // @suppress(dscanner.style.long_line)
880 
881     /**
882      *  bcf_hdr_id2*() - Macros for accessing bcf_idinfo_t
883      *  @type:      one of BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT
884      *  @int_id:    return value of bcf_hdr_id2int, must be >=0
885      *
886      *  The returned values are:
887      *     bcf_hdr_id2length   ..  whether the number of values is fixed or variable, one of BCF_VL_*
888      *     bcf_hdr_id2number   ..  the number of values, 0xfffff for variable length fields
889      *     bcf_hdr_id2type     ..  the field type, one of BCF_HT_*
890      *     bcf_hdr_id2coltype  ..  the column type, one of BCF_HL_*
891      *
892      *  Notes: Prior to using the macros, the presence of the info should be
893      *  tested with bcf_hdr_idinfo_exists().
894      */
895     // TODO: for dict_type and col_type use ENUMs
896     pragma(inline, true) {
897         auto bcf_hdr_id2length(const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>8 & 0xf); } // @suppress(dscanner.style.long_line)
898         /// ditto
899         auto bcf_hdr_id2number(const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>12);    } // @suppress(dscanner.style.long_line)
900         /// ditto
901         auto bcf_hdr_id2type(const(bcf_hdr_t) *hdr, int type, int int_id)   { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>4 & 0xf); } // @suppress(dscanner.style.long_line)
902         /// ditto
903         auto bcf_hdr_id2coltype(const(bcf_hdr_t) *hdr, int type, int int_id){ return ((hdr).id[BCF_DT_ID][int_id].val.info[type] & 0xf); } // @suppress(dscanner.style.long_line)
904         /// ditto
905         auto bcf_hdr_idinfo_exists(const(bcf_hdr_t) *hdr, int type, int int_id) { return ((int_id<0 || bcf_hdr_id2coltype(hdr,type,int_id)==0xf) ? 0 : 1); } // @suppress(dscanner.style.long_line)
906         /// ditto
907         auto bcf_hdr_id2hrc(const(bcf_hdr_t) *hdr, int dict_type, int col_type, int int_id)
908             { return ((hdr).id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val.hrec[(dict_type)==BCF_DT_CTG?0:(col_type)]); } // @suppress(dscanner.style.long_line)
909     }
910 
911     /// Undocumented
912     void bcf_fmt_array(kstring_t *s, int n, int type, void *data);
913     /// ditto
914     uint8_t *bcf_fmt_sized_array(kstring_t *s, uint8_t *ptr);
915 
916     /// ditto
917     void bcf_enc_vchar(kstring_t *s, int l, const(char) *a);
918     /// ditto
919     void bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize);
920     /// ditto
921     void bcf_enc_vfloat(kstring_t *s, int n, float *a);
922 
923 
924     /**************************************************************************
925      *  BCF index
926      *
927      *  Note that these functions work with BCFs only. See synced_bcf_reader.h
928      *  which provides (amongst other things) an API to work transparently with
929      *  both indexed BCFs and VCFs.
930      **************************************************************************/
931 
932     alias bcf_itr_destroy = hts_itr_destroy;
933 
934 pragma(inline, true) {
935     /// Generate an iterator for an integer-based range query
936     auto bcf_itr_queryi(const(hts_idx_t) *idx, int tid, int beg, int end)
937         { return hts_itr_query(idx, tid, beg, end, &bcf_readrec); }
938     
939     /// Generate an iterator for a string-based range query
940     auto bcf_itr_querys(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, const(char) *s)
941         { return hts_itr_querys(idx, s, cast(hts_name2id_f) &bcf_hdr_name2id, cast(void *) hdr,
942                                 &hts_itr_query, &bcf_readrec); }
943 
944     /// Iterate through the range
945     /// r should (probably) point to your VCF (BCF) row structure
946     /// TODO: attempt to define parameter r as bcf1_t *, which is what I think it should be
947     auto bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r)
948         { return hts_itr_next(htsfp.fp.bgzf, itr, r, null); }   // last param was 0
949 
950     /// Load a BCF file's CSI index 
951     auto bcf_index_load(const(char) *fn) { return hts_idx_load(fn, HTS_FMT_CSI); }
952 
953     /// Get a list (char **) of sequence names from the index -- free only the array, not the values
954     auto bcf_index_seqnames(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, int *nptr)
955         { return hts_idx_seqnames(idx, nptr, cast(hts_id2name_f) &bcf_hdr_id2name, cast(void *) hdr); }
956 }
957 
958     /** Load BCF index with explicit fn and explicit index fn */
959     hts_idx_t *bcf_index_load2(const(char) *fn, const(char) *fnidx);
960 
961     /**
962      *  bcf_index_build() - Generate and save an index file
963      *  @fn:         Input VCF(compressed)/BCF filename
964      *  @min_shift:  log2(width of the smallest bin), e.g. a value of 14
965      *  imposes a 16k base lower limit on the width of index bins.
966      *  Positive to generate CSI, or 0 to generate TBI. However, a small
967      *  value of min_shift would create a large index, which would lead to
968      *  reduced performance when using the index. A recommended value is 14.
969      *  For BCF files, only the CSI index can be generated.
970      *
971      *  Returns 0 if successful, or negative if an error occurred.
972      *
973      *  List of error codes:
974      *      -1 .. indexing failed
975      *      -2 .. opening @fn failed
976      *      -3 .. format not indexable
977      *      -4 .. failed to create and/or save the index
978      */
979     int bcf_index_build(const(char) *fn, int min_shift);
980 
981     /**
982      *  bcf_index_build2() - Generate and save an index to a specific file
983      *  @fn:         Input VCF/BCF filename
984      *  @fnidx:      Output filename, or NULL to add .csi/.tbi to @fn
985      *  @min_shift:  Positive to generate CSI, or 0 to generate TBI
986      *
987      *  Returns 0 if successful, or negative if an error occurred.
988      *
989      *  List of error codes:
990      *      -1 .. indexing failed
991      *      -2 .. opening @fn failed
992      *      -3 .. format not indexable
993      *      -4 .. failed to create and/or save the index
994      */
995     int bcf_index_build2(const(char) *fn, const(char) *fnidx, int min_shift);
996 
997     /**
998      *  bcf_index_build3() - Generate and save an index to a specific file
999      *  @fn:         Input VCF/BCF filename
1000      *  @fnidx:      Output filename, or NULL to add .csi/.tbi to @fn
1001      *  @min_shift:  Positive to generate CSI, or 0 to generate TBI
1002      *  @n_threads:  Number of VCF/BCF decoder threads
1003      *
1004      *  Returns 0 if successful, or negative if an error occurred.
1005      *
1006      *  List of error codes:
1007      *      -1 .. indexing failed
1008      *      -2 .. opening @fn failed
1009      *      -3 .. format not indexable
1010      *      -4 .. failed to create and/or save the index
1011      */
1012      int bcf_index_build3(const(char) *fn, const(char) *fnidx, int min_shift, int n_threads);
1013 
1014 /*******************
1015  * Typed value I/O *
1016  *******************/
1017 
1018 /**
1019     Note that in contrast with BCFv2.1 specification, HTSlib implementation
1020     allows missing values in vectors. For integer types, the values 0x80,
1021     0x8000, 0x80000000 are interpreted as missing values and 0x81, 0x8001,
1022     0x80000001 as end-of-vector indicators.  Similarly for floats, the value of
1023     0x7F800001 is interpreted as a missing value and 0x7F800002 as an
1024     end-of-vector indicator.
1025     Note that the end-of-vector byte is not part of the vector.
1026 
1027     This trial BCF version (v2.2) is compatible with the VCF specification and
1028     enables to handle correctly vectors with different ploidy in presence of
1029     missing values.
1030  */
1031 enum int8_t  bcf_int8_vector_end  = (INT8_MIN+1);
1032 /// ditto
1033 enum int16_t bcf_int16_vector_end = (INT16_MIN+1);
1034 /// ditto
1035 enum int32_t bcf_int32_vector_end = (INT32_MIN+1);
1036 /// ditto
1037 enum char bcf_str_vector_end = 0;   //#define bcf_str_vector_end   0
1038 /// ditto
1039 enum int8_t  bcf_int8_missing  = INT8_MIN;
1040 /// ditto
1041 enum int16_t bcf_int16_missing = INT16_MIN;
1042 /// ditto
1043 enum int32_t bcf_int32_missing = INT32_MIN;
1044 /// ditto
1045 enum char bcf_str_missing = 0x07;   // #define bcf_str_missing      0x07
1046 extern __gshared uint32_t bcf_float_vector_end;   /// ditto
1047 extern __gshared uint32_t bcf_float_missing;      /// ditto
1048 
1049 version(LDC) pragma(inline, true):
1050 version(GNU) pragma(inline, true):
1051 /** u wot */
1052 void bcf_float_set(float *ptr, uint32_t value)
1053 {
1054     union U { uint32_t i; float f; }
1055     U u;
1056     u.i = value;
1057     *ptr = u.f;
1058 }
1059 
1060 /// float vector macros
1061 void bcf_float_set_vector_end(float x) { bcf_float_set(&x, bcf_float_vector_end); }
1062 /// ditto
1063 void bcf_float_set_missing(float x) { bcf_float_set(&x, bcf_float_missing); }
1064 
1065 /** u wot */
1066 int bcf_float_is_missing(float f)
1067 {
1068     union U { uint32_t i; float f; }
1069     U u;
1070     u.f = f;
1071     return u.i==bcf_float_missing ? 1 : 0;
1072 }
1073 /// ditto
1074 int bcf_float_is_vector_end(float f)
1075 {
1076     union U { uint32_t i; float f; }
1077     U u;
1078     u.f = f;
1079     return u.i==bcf_float_vector_end ? 1 : 0;
1080 }
1081 /// ditto
1082 /+void bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)
1083 {
1084     #define BRANCH(type_t, missing, vector_end) { \
1085         type_t *ptr = (type_t*) (fmt->p + isample*fmt->size); \
1086         int i; \
1087         for (i=0; i<fmt->n && ptr[i]!=vector_end; i++) \
1088         { \
1089             if ( i ) kputc("/|"[ptr[i]&1], str); \
1090             if ( !(ptr[i]>>1) ) kputc('.', str); \
1091             else kputw((ptr[i]>>1) - 1, str); \
1092         } \
1093         if (i == 0) kputc('.', str); \
1094     }
1095     switch (fmt->type) {
1096         case BCF_BT_INT8:  BRANCH(int8_t,  bcf_int8_missing, bcf_int8_vector_end); break;
1097         case BCF_BT_INT16: BRANCH(int16_t, bcf_int16_missing, bcf_int16_vector_end); break;
1098         case BCF_BT_INT32: BRANCH(int32_t, bcf_int32_missing, bcf_int32_vector_end); break;
1099         case BCF_BT_NULL:  kputc('.', str); break;
1100         default: hts_log_error("Unexpected type %d", fmt->type); abort(); break;
1101     }
1102     #undef BRANCH
1103 }+/
1104 /// ditto
1105 /+void bcf_enc_size(kstring_t *s, int size, int type)
1106 {
1107     if (size >= 15) {
1108         kputc(15<<4|type, s);
1109         if (size >= 128) {
1110             if (size >= 32768) {
1111                 int32_t x = size;
1112                 kputc(1<<4|BCF_BT_INT32, s);
1113                 kputsn(cast(char*)&x, 4, s);
1114             } else {
1115                 int16_t x = size;
1116                 kputc(1<<4|BCF_BT_INT16, s);
1117                 kputsn(cast(char*)&x, 2, s);
1118             }
1119         } else {
1120             kputc(1<<4|BCF_BT_INT8, s);
1121             kputc(size, s);
1122         }
1123     } else kputc(size<<4|type, s);
1124 }+/
1125 /// ditto
1126 int bcf_enc_inttype(long x)
1127 {
1128     if (x <= INT8_MAX && x > bcf_int8_missing) return BCF_BT_INT8;
1129     if (x <= INT16_MAX && x > bcf_int16_missing) return BCF_BT_INT16;
1130     return BCF_BT_INT32;
1131 }
1132 /// ditto
1133 /+void bcf_enc_int1(kstring_t *s, int32_t x)
1134 {
1135     if (x == bcf_int32_vector_end) {
1136         bcf_enc_size(s, 1, BCF_BT_INT8);
1137         kputc(bcf_int8_vector_end, s);
1138     } else if (x == bcf_int32_missing) {
1139         bcf_enc_size(s, 1, BCF_BT_INT8);
1140         kputc(bcf_int8_missing, s);
1141     } else if (x <= INT8_MAX && x > bcf_int8_missing) {
1142         bcf_enc_size(s, 1, BCF_BT_INT8);
1143         kputc(x, s);
1144     } else if (x <= INT16_MAX && x > bcf_int16_missing) {
1145         int16_t z = x;
1146         bcf_enc_size(s, 1, BCF_BT_INT16);
1147         kputsn(cast(char*)&z, 2, s);
1148     } else {
1149         int32_t z = x;
1150         bcf_enc_size(s, 1, BCF_BT_INT32);
1151         kputsn(cast(char*)&z, 4, s);
1152     }
1153 }+/
1154 /// Need hts_endian
1155 /+ int32_t bcf_dec_int1(const uint8_t *p, int type, uint8_t **q)
1156 {
1157     if (type == BCF_BT_INT8) {
1158         *q = cast(uint8_t*)p + 1;
1159         return le_to_i8(p);
1160     } else if (type == BCF_BT_INT16) {
1161         *q = cast(uint8_t*)p + 2;
1162         return le_to_i16(p);
1163     } else {
1164         *q = cast(uint8_t*)p + 4;
1165         return le_to_i32(p);
1166     }
1167 }+/
1168 /// TODO
1169 /+int32_t bcf_dec_typed_int1(const uint8_t *p, uint8_t **q)
1170 {
1171     return bcf_dec_int1(p + 1, *p&0xf, q);
1172 }+/
1173 /// TODO
1174 /+int32_t bcf_dec_size(const uint8_t *p, uint8_t **q, int *type)
1175 {
1176     *type = *p & 0xf;
1177     if (*p>>4 != 15) {
1178         *q = cast(uint8_t*)p + 1;
1179         return *p>>4;
1180     } else return bcf_dec_typed_int1(p + 1, q);
1181 }+/