1 /// @file htslib/vcf.h
2 /// High-level VCF/BCF variant calling file operations.
3 /*
4     Copyright (C) 2012, 2013 Broad Institute.
5     Copyright (C) 2012-2020 Genome Research Ltd.
6 
7     Author: Heng Li <lh3@sanger.ac.uk>
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.  */
26 
27 /*
28     todo:
29         - make the function names consistent
30         - provide calls to abstract away structs as much as possible
31  */
32 /// Section numbers refer to VCF Specification v4.2: https://samtools.github.io/hts-specs/VCFv4.2.pdf
33 module htslib.vcf;
34 
35 import std.bitmanip;
36 import std.string: toStringz;
37 import core.stdc.errno : errno, EINVAL;
38 import core.stdc.config;
39 
40 import htslib.hts;
41 import htslib.hts_log;
42 import htslib.hts_endian;
43 import htslib.kstring : kstring_t;
44 import htslib.bgzf : BGZF;
45 
46 @system:
47 extern (C):
48 @nogc nothrow {
49 
50 /* Included only for backwards compatibility with e.g. bcftools 1.10 */
51 
52 /*****************
53  * Header struct *
54  *****************/
55 
56 enum BCF_HL_FLT = 0; /// header line: FILTER
57 enum BCF_HL_INFO = 1;/// header line: INFO
58 enum BCF_HL_FMT = 2; /// header line: FORMAT
59 enum BCF_HL_CTG = 3; /// header line: contig
60 enum BCF_HL_STR = 4; /// header line: structured header line TAG=<A=..,B=..>
61 enum BCF_HL_GEN = 5; /// header line: generic header line
62 
63 enum BCF_HT_FLAG = 0; /// header type: FLAG// header type
64 enum BCF_HT_INT = 1;  /// header type: INTEGER
65 enum BCF_HT_REAL = 2; /// header type: REAL
66 enum BCF_HT_STR = 3;  /// header type: STRING
67 enum BCF_HT_LONG = BCF_HT_INT | 0x100; // BCF_HT_INT, but for int64_t values; VCF only!
68 
69 enum BCF_VL_FIXED = 0; /// variable length: fixed (?)// variable length
70 enum BCF_VL_VAR = 1; /// variable length: variable
71 enum BCF_VL_A = 2; /// variable length: ?
72 enum BCF_VL_G = 3; /// variable length: ?
73 enum BCF_VL_R = 4; /// variable length: ?
74 
75 /* === Dictionary ===
76 
77    The header keeps three dictionaries. The first keeps IDs in the
78    "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths
79    in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[]
80    is the actual hash table, which is opaque to the end users. In the hash
81    table, the key is the ID or sample name as a C string and the value is a
82    bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash
83    table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the
84    size of the hash table or, equivalently, the length of the id[] arrays.
85 */
86 
87 enum BCF_DT_ID = 0;    /// dictionary type: ID
88 enum BCF_DT_CTG = 1;   /// dictionary type: CONTIG
89 enum BCF_DT_SAMPLE = 2;/// dictionary type: SAMPLE
90 
91 /// Structured representation of a header line (§1.2)
92 struct bcf_hrec_t // @suppress(dscanner.style.phobos_naming_convention)
93 {
94     int type; /// One of the BCF_HL_* type
95     char* key; /// The part before '=', i.e. FILTER/INFO/FORMAT/contig/fileformat etc.
96     char* value; /// Set only for generic lines, NULL for FILTER/INFO, etc.
97     int nkeys; /// Number of structured fields
98     char** keys; /// The key=value pairs
99     char** vals; /// The key=value pairs
100 }
101 
102 ///  ID Dictionary entry
103 struct bcf_idinfo_t
104 {
105     ulong[3] info; /** stores Number:20, var:4, Type:4, ColType:4 in info[0..2]
106                      for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG */
107     bcf_hrec_t*[3] hrec; /// pointers to header lines for [FILTER, INFO, FORMAT] in order
108     int id; /// primary key
109 }
110 
111 /// ID Dictionary k/v
112 struct bcf_idpair_t // @suppress(dscanner.style.phobos_naming_convention)
113 {
114     const(char)* key; /// header dictionary FILTER/INFO/FORMAT ID key
115     const(bcf_idinfo_t)* val; /// header dictionary FILTER/INFO/FORMAT ID entry
116 }
117 
118 /// Structured repreentation of VCF header (§1.2)
119 /// Note that bcf_hdr_t structs must always be created via bcf_hdr_init()
120 struct bcf_hdr_t // @suppress(dscanner.style.phobos_naming_convention)
121 {
122     int[3] n;           /// n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI)
123     bcf_idpair_t*[3] id;/// ID dictionary {FILTER/INFO/FORMAT, contig, sample} ID key/entry
124     void*[3] dict; /// hash table
125     char** samples; /// ?list of samples
126     bcf_hrec_t** hrec; /// Structured representation of this header line
127     int nhrec; /// # of header records
128     int dirty; /// ?
129     int ntransl; /// for bcf_translate()
130     int*[2] transl; /// for bcf_translate()
131     int nsamples_ori; /// for bcf_hdr_set_samples()
132     ubyte* keep_samples; /// ?
133     kstring_t mem; /// ?
134     int[3] m; /// m: allocated size of the dictionary block in use (see n above)
135 }
136 
137 /// Lookup table used in bcf_record_check
138 /// MAINTAINER: in C header is []
139 extern __gshared ubyte[] bcf_type_shift;
140 
141 /**************
142  * VCF record *
143  **************/
144 
145 enum BCF_BT_NULL = 0; /// null
146 enum BCF_BT_INT8 = 1;/// int8
147 enum BCF_BT_INT16 = 2;/// int16
148 enum BCF_BT_INT32 = 3;/// int32
149 enum BCF_BT_INT64 = 4;/// Unofficial, for internal use only per htslib headers 
150 enum BCF_BT_FLOAT = 5; /// float (32?)
151 enum BCF_BT_CHAR = 7;/// char (8 bit)
152 
153 enum VCF_REF = 0;     /// ref (e.g. in a gVCF)
154 enum VCF_SNP = 1;/// SNP 
155 enum VCF_MNP = 2;/// MNP
156 enum VCF_INDEL = 4;/// INDEL
157 enum VCF_OTHER = 8;/// other (e.g. SV)
158 enum VCF_BND = 16; // /// breakend
159 enum VCF_OVERLAP = 32;/// overlapping deletion, ALT=* 
160 
161 /// variant type record embedded in bcf_dec_t
162 /// variant type and the number of bases affected, negative for deletions
163 struct bcf_variant_t // @suppress(dscanner.style.phobos_naming_convention)
164 {
165     int type;   /// variant type and the number of bases affected, negative for deletions
166     int n;      /// variant type and the number of bases affected, negative for deletions
167 }
168 
169 /// FORMAT field data (§1.4.2 Genotype fields)
170 struct bcf_fmt_t // @suppress(dscanner.style.phobos_naming_convention)
171 {
172     import std.bitmanip : bitfields;
173 
174     int id;   /// id: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$id].key
175     int n;/// n: number of values per-sample; size: number of bytes per-sample; type: one of BCF_BT_* types
176     int size;/// size: number of bytes per-sample; type: one of BCF_BT_* types
177     int type; /// type: one of BCF_BT_* types
178     ubyte* p; /// same as vptr and vptr_* in bcf_info_t below
179     uint p_len;
180 
181     mixin(bitfields!(
182         uint, "p_off", 31,
183         bool, "p_free", 1));
184 }
185 
186 /// INFO field data (§1.4.1 Fixed fields, (8) INFO)
187 struct bcf_info_t // @suppress(dscanner.style.phobos_naming_convention)
188 {
189     import std.bitmanip : bitfields;
190 
191     int key; /// key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key
192     int type; /// type: one of BCF_BT_* types
193 
194     /// integer value
195     /// float value
196     union V1
197     {
198         long i;
199         float f;
200     }
201 
202     V1 v1; /// only set if $len==1; for easier access
203     ubyte* vptr; /// pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes
204     uint vptr_len;
205 
206     mixin(bitfields!(
207         uint, "vptr_off", 31,
208         uint, "vptr_free", 1)); /// length of the vptr block or, when set, of the vptr_mod block, excluding offset
209     /// vptr offset, i.e., the size of the INFO key plus size+type bytes
210     /// indicates that vptr-vptr_off must be freed; set only when modified and the new
211     ///    data block is bigger than the original
212     int len; /// vector length, 1 for scalars
213 }
214 
215 enum BCF1_DIRTY_ID = 1;  /// ID was edited
216 enum BCF1_DIRTY_ALS = 2; /// Allele(s) was edited
217 enum BCF1_DIRTY_FLT = 4; /// FILTER was edited
218 enum BCF1_DIRTY_INF = 8; /// INFO was edited
219 
220 /// Variable-length data from a VCF record
221 struct bcf_dec_t // @suppress(dscanner.style.phobos_naming_convention)
222 {
223     /// allocated size (high-water mark); do not change 
224     int m_fmt;          
225     int m_info;
226     int m_id;
227     int m_als;
228     int m_allele;
229     int m_flt; 
230     int n_flt;          /// Number of FILTER fields
231     int* flt; /// FILTER keys in the dictionary
232     char* id;/// ID
233     char* als; /// REF+ALT block (\0-seperated)
234     char** allele; /// allele[0] is the REF (allele[] pointers to the als block); all null terminated
235     bcf_info_t* info; /// INFO
236     bcf_fmt_t* fmt; /// FORMAT and individual sample
237     bcf_variant_t* var; /// $var and $var_type set only when set_variant_types called
238     int n_var;/// variant number(???)
239     int var_type;/// variant type (TODO: make enum)
240     int shared_dirty; /// if set, shared.s must be recreated on BCF output (TODO: make enum)
241     int indiv_dirty;   /// if set, indiv.s must be recreated on BCF output (TODO: make enum)
242 }
243 
244 enum BCF_ERR_CTG_UNDEF = 1;   /// BCF error: undefined contig
245 enum BCF_ERR_TAG_UNDEF = 2;/// BCF error: undefined tag
246 enum BCF_ERR_NCOLS = 4;/// BCF error: 
247 enum BCF_ERR_LIMITS = 8;/// BCF error: 
248 enum BCF_ERR_CHAR = 16;/// BCF error: 
249 enum BCF_ERR_CTG_INVALID = 32;/// BCF error: 
250 enum BCF_ERR_TAG_INVALID = 64;/// BCF error: 
251 
252 /**
253     The bcf1_t structure corresponds to one VCF/BCF line. Reading from VCF file
254     is slower because the string is first to be parsed, packed into BCF line
255     (done in vcf_parse), then unpacked into internal bcf1_t structure. If it
256     is known in advance that some of the fields will not be required (notably
257     the sample columns), parsing of these can be skipped by setting max_unpack
258     appropriately.
259     Similarly, it is fast to output a BCF line because the columns (kept in
260     shared.s, indiv.s, etc.) are written directly by bcf_write, whereas a VCF
261     line must be formatted in vcf_format.
262  */
263 struct bcf1_t
264 {
265     import std.bitmanip : bitfields;
266 
267     hts_pos_t pos; /// POS
268     hts_pos_t rlen; /// length of REF
269     int rid; /// CHROM
270     float qual;
271 
272     mixin(bitfields!(
273         uint, "n_info", 16,
274         uint, "n_allele", 16,
275         uint, "n_fmt", 8,
276         uint, "n_sample", 24)); /// QUAL
277 
278     kstring_t shared_;
279     kstring_t indiv;
280     bcf_dec_t d; /// lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack()
281     int max_unpack; /// Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields won't be needed
282     int unpacked; /// remember what has been unpacked to allow calling bcf_unpack() repeatedly without redoing the work
283     int[3] unpack_size; /// the original block size of ID, REF+ALT and FILTER
284     int errcode; /// one of BCF_ERR_* codes
285 }
286 
287 /*******
288  * API *
289  *******/
290 
291 /***********************************************************************
292  *  BCF and VCF I/O
293  *
294  *  A note about naming conventions: htslib internally represents VCF
295  *  records as bcf1_t data structures, therefore most functions are
296  *  prefixed with bcf_. There are a few exceptions where the functions must
297  *  be aware of both BCF and VCF worlds, such as bcf_parse vs vcf_parse. In
298  *  these cases, functions prefixed with bcf_ are more general and work
299  *  with both BCF and VCF.
300  *
301  ***********************************************************************/
302 
303 /** These macros are defined only for consistency with other parts of htslib */
304 alias bcf_init1 = bcf_init;
305 alias bcf_read1 = bcf_read;
306 alias vcf_read1 = vcf_read;
307 alias bcf_write1 = bcf_write;
308 alias vcf_write1 = vcf_write;
309 alias bcf_destroy1 = bcf_destroy;
310 alias bcf_empty1 = bcf_empty;
311 alias vcf_parse1 = vcf_parse;
312 alias bcf_clear1 = bcf_clear;
313 alias vcf_format1 = vcf_format;
314 
315 /**
316  *  bcf_hdr_init() - create an empty BCF header.
317  *  @param mode    "r" or "w"
318  *
319  *  When opened for writing, the mandatory fileFormat and
320  *  FILTER=PASS lines are added automatically.
321  *
322  * The bcf_hdr_t struct returned by a successful call should be freed
323  * via bcf_hdr_destroy() when it is no longer needed.
324  */
325 bcf_hdr_t* bcf_hdr_init(const(char)* mode);
326 
327 /** Destroy a BCF header struct */
328 void bcf_hdr_destroy(bcf_hdr_t* h);
329 
330 /** Allocate and initialize a bcf1_t object.
331  *
332  * The bcf1_t struct returned by a successful call should be freed
333  * via bcf_destroy() when it is no longer needed.
334  */
335 bcf1_t* bcf_init();
336 
337 /** Deallocate a bcf1_t object */
338 void bcf_destroy(bcf1_t* v);
339 
340 /**
341  *  Same as bcf_destroy() but frees only the memory allocated by bcf1_t,
342  *  not the bcf1_t object itself.
343  */
344 void bcf_empty(bcf1_t* v);
345 
346 /**
347  *  Make the bcf1_t object ready for next read. Intended mostly for
348  *  internal use, the user should rarely need to call this function
349  *  directly.
350  */
351 void bcf_clear(bcf1_t* v);
352 
353 /** bcf_open and vcf_open mode: please see hts_open() in hts.h */
354 alias vcfFile = htsFile;
355 alias bcf_open = hts_open;
356 alias vcf_open = hts_open;
357 alias bcf_close = hts_close;
358 alias vcf_close = hts_close;
359 
360 /// Read a VCF or BCF header
361 /** @param  fp  The file to read the header from
362     @return Pointer to a populated header structure on success;
363             NULL on failure
364 
365     The bcf_hdr_t struct returned by a successful call should be freed
366     via bcf_hdr_destroy() when it is no longer needed.
367 */
368 bcf_hdr_t* bcf_hdr_read(htsFile* fp);
369 
370 /**
371  *  bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed
372  *  @param samples  samples to include or exclude from file or as a comma-separated string.
373  *              LIST|FILE   .. select samples in list/file
374  *              ^LIST|FILE  .. exclude samples from list/file
375  *              -           .. include all samples
376  *              NULL        .. exclude all samples
377  *  @param is_file  @p samples is a file (1) or a comma-separated list (0)
378  *
379  *  The bottleneck of VCF reading is parsing of genotype fields. If the
380  *  reader knows in advance that only subset of samples is needed (possibly
381  *  no samples at all), the performance of bcf_read() can be significantly
382  *  improved by calling bcf_hdr_set_samples after bcf_hdr_read().
383  *  The function bcf_read() will subset the VCF/BCF records automatically
384  *  with the notable exception when reading records via bcf_itr_next().
385  *  In this case, bcf_subset_format() must be called explicitly, because
386  *  bcf_readrec() does not see the header.
387  *
388  *  Returns 0 on success, -1 on error or a positive integer if the list
389  *  contains samples not present in the VCF header. In such a case, the
390  *  return value is the index of the offending sample.
391  */
392 int bcf_hdr_set_samples(bcf_hdr_t* hdr, const(char)* samples, int is_file);
393 
394 int bcf_subset_format(const(bcf_hdr_t)* hdr, bcf1_t* rec);
395 
396 /// Write a VCF or BCF header
397 /** @param  fp  Output file
398     @param  h   The header to write
399     @return 0 on success; -1 on failure
400  */
401 int bcf_hdr_write(htsFile* fp, bcf_hdr_t* h);
402 
403 /**
404  * Parse VCF line contained in kstring and populate the bcf1_t struct
405  * The line must not end with \n or \r characters.
406  */
407 int vcf_parse(kstring_t* s, const(bcf_hdr_t)* h, bcf1_t* v);
408 
409 /**
410  * Complete the file opening mode, according to its extension.
411  * @param mode      Preallocated mode string to be completed.
412  * @param fn        File name to be opened.
413  * @param format    Format string (vcf|bcf|vcf.gz)
414  * @return          0 on success; -1 on failure
415  */
416 int vcf_open_mode(char* mode, const(char)* fn, const(char)* format);
417 
418 /** The opposite of vcf_parse. It should rarely be called directly, see vcf_write */
419 int vcf_format(const(bcf_hdr_t)* h, const(bcf1_t)* v, kstring_t* s);
420 
421 /// Read next VCF or BCF record
422 /** @param fp  The file to read the record from
423         @param h   The header for the vcf/bcf file
424         @param v   The bcf1_t structure to populate
425         @return 0 on success; -1 on end of file; < -1 on critical error
426 
427 On errors which are not critical for reading, such as missing header
428 definitions in vcf files, zero will be returned but v->errcode will have been
429 set to one of BCF_ERR* codes and must be checked before calling bcf_write().
430      */
431 int bcf_read(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v);
432 
433 /**
434  *  bcf_unpack() - unpack/decode a BCF record (fills the bcf1_t::d field)
435  *
436  *  Note that bcf_unpack() must be called even when reading VCF. It is safe
437  *  to call the function repeatedly, it will not unpack the same field
438  *  twice.
439  */
440 enum BCF_UN_STR = 1; // up to ALT inclusive
441 enum BCF_UN_FLT = 2; // up to FILTER
442 enum BCF_UN_INFO = 4; // up to INFO
443 enum BCF_UN_SHR = BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO; // all shared information
444 enum BCF_UN_FMT = 8; // unpack format and each sample
445 enum BCF_UN_IND = BCF_UN_FMT; // a synonym of BCF_UN_FMT
446 enum BCF_UN_ALL = BCF_UN_SHR | BCF_UN_FMT; // everything
447 int bcf_unpack(bcf1_t* b, int which);
448 
449 /*
450  *  bcf_dup() - create a copy of BCF record.
451  *
452  *  Note that bcf_unpack() must be called on the returned copy as if it was
453  *  obtained from bcf_read(). Also note that bcf_dup() calls bcf_sync1(src)
454  *  internally to reflect any changes made by bcf_update_* functions.
455  *
456  *  The bcf1_t struct returned by a successful call should be freed
457  *  via bcf_destroy() when it is no longer needed.
458  */
459 bcf1_t* bcf_dup(bcf1_t* src);
460 
461 bcf1_t* bcf_copy(bcf1_t* dst, bcf1_t* src);
462 
463 /// Write one VCF or BCF record. The type is determined at the open() call.
464 /** @param  fp  The file to write to
465     @param  h   The header for the vcf/bcf file
466     @param  v   The bcf1_t structure to write
467     @return 0 on success; -1 on error
468  */
469 int bcf_write(htsFile* fp, bcf_hdr_t* h, bcf1_t* v);
470 
471 /**
472  *  The following functions work only with VCFs and should rarely be called
473  *  directly. Usually one wants to use their bcf_* alternatives, which work
474  *  transparently with both VCFs and BCFs.
475  */
476 /// Read a VCF format header
477 /** @param  fp  The file to read the header from
478     @return Pointer to a populated header structure on success;
479             NULL on failure
480 
481     Use bcf_hdr_read() instead.
482 
483     The bcf_hdr_t struct returned by a successful call should be freed
484     via bcf_hdr_destroy() when it is no longer needed.
485 */
486 bcf_hdr_t* vcf_hdr_read(htsFile* fp);
487 
488 /// Write a VCF format header
489 /** @param  fp  Output file
490     @param  h   The header to write
491     @return 0 on success; -1 on failure
492 
493     Use bcf_hdr_write() instead
494 */
495 int vcf_hdr_write(htsFile* fp, const(bcf_hdr_t)* h);
496 
497 /// Read a record from a VCF file
498 /** @param fp  The file to read the record from
499     @param h   The header for the vcf file
500     @param v   The bcf1_t structure to populate
501     @return 0 on success; -1 on end of file; < -1 on error
502 
503     Use bcf_read() instead
504 */
505 int vcf_read(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v);
506 
507 /// Write a record to a VCF file
508 /** @param  fp  The file to write to
509     @param h   The header for the vcf file
510     @param v   The bcf1_t structure to write
511     @return 0 on success; -1 on error
512 
513     Use bcf_write() instead
514 */
515 int vcf_write(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v);
516 
517 /** Helper function for the bcf_itr_next() macro; internal use, ignore it */
518 int bcf_readrec(
519     BGZF* fp,
520     void* null_,
521     void* v,
522     int* tid,
523     hts_pos_t* beg,
524     hts_pos_t* end);
525 
526 /// Write a line to a VCF file
527 /** @param line   Line to write
528     @param fp     File to write it to
529     @return 0 on success; -1 on failure
530 
531     @note No checks are done on the line being added, apart from
532           ensuring that it ends with a newline.  This function
533           should therefore be used with care.
534 */
535 int vcf_write_line(htsFile* fp, kstring_t* line);
536 
537 /**************************************************************************
538  *  Header querying and manipulation routines
539  **************************************************************************/
540 
541 /** Create a new header using the supplied template
542  *
543  *  The bcf_hdr_t struct returned by a successful call should be freed
544  *  via bcf_hdr_destroy() when it is no longer needed.
545  *  @return NULL on failure, header otherwise
546  */
547 bcf_hdr_t* bcf_hdr_dup(const(bcf_hdr_t)* hdr);
548 
549 /**
550  *  Copy header lines from src to dst if not already present in dst. See also bcf_translate().
551  *  Returns 0 on success or sets a bit on error:
552  *      1 .. conflicting definitions of tag length
553  *      // todo
554  */
555 deprecated("Please use bcf_hdr_merge instead")
556 int bcf_hdr_combine(bcf_hdr_t* dst, const(bcf_hdr_t)* src);
557 
558 /**
559  *  bcf_hdr_merge() - copy header lines from src to dst, see also bcf_translate()
560  *  @param dst: the destination header to be merged into, NULL on the first pass
561  *  @param src: the source header
562  *  @return NULL on failure, header otherwise
563  *
564  *  Notes:
565  *      - use as:
566  *          bcf_hdr_t *dst = NULL;
567  *          for (i=0; i<nsrc; i++) dst = bcf_hdr_merge(dst,src[i]);
568  *
569  *      - bcf_hdr_merge() replaces bcf_hdr_combine() which had a problem when
570  *      combining multiple BCF headers. The current bcf_hdr_combine()
571  *      does not have this problem, but became slow when used for many files.
572  */
573 bcf_hdr_t* bcf_hdr_merge(bcf_hdr_t* dst, const(bcf_hdr_t)* src);
574 
575 /**
576  *  bcf_hdr_add_sample() - add a new sample.
577  *  @param sample:  sample name to be added
578  *
579  *  Note:
580  *      After all samples have been added, the internal header structure must be updated
581  *      by calling bcf_hdr_sync(). This is normally done automatically by the first bcf_hdr_write()
582  *      or bcf_write() call. Otherwise, the caller must force the update by calling bcf_hdr_sync()
583  *      explicitly.
584  */
585 int bcf_hdr_add_sample(bcf_hdr_t* hdr, const(char)* sample);
586 
587 /** Read VCF header from a file and update the header */
588 int bcf_hdr_set(bcf_hdr_t* hdr, const(char)* fname);
589 
590 /// Appends formatted header text to _str_.
591 /** If _is_bcf_ is zero, `IDX` fields are discarded.
592  *  @return 0 if successful, or negative if an error occurred
593  *  @since 1.4
594  */
595 int bcf_hdr_format(const(bcf_hdr_t)* hdr, int is_bcf, kstring_t* str);
596 
597 /** Returns formatted header (newly allocated string) and its length,
598  *  excluding the terminating \0. If is_bcf parameter is unset, IDX
599  *  fields are discarded.
600  *  @deprecated Use bcf_hdr_format() instead as it can handle huge headers.
601  */
602 deprecated("use bcf_hdr_format() instead")
603 char* bcf_hdr_fmt_text(const(bcf_hdr_t)* hdr, int is_bcf, int* len);
604 
605 /** Append new VCF header line, returns 0 on success */
606 int bcf_hdr_append(bcf_hdr_t* h, const(char)* line);
607 
608 int bcf_hdr_printf(bcf_hdr_t* h, const(char)* format, ...);
609 
610 /** VCF version, e.g. VCFv4.2 */
611 const(char)* bcf_hdr_get_version(const(bcf_hdr_t)* hdr);
612 
613 /// Set version in bcf header
614 /**
615    @param hdr     BCF header struct
616    @param version Version to set, e.g. "VCFv4.3"
617    @return 0 on success; < 0 on error
618  */
619 int bcf_hdr_set_version(bcf_hdr_t* hdr, const(char)* version_);
620 
621 /**
622  *  bcf_hdr_remove() - remove VCF header tag
623  *  @param type:      one of BCF_HL_*
624  *  @param key:       tag name or NULL to remove all tags of the given type
625  */
626 void bcf_hdr_remove(bcf_hdr_t* h, int type, const(char)* key);
627 
628 /**
629  *  bcf_hdr_subset() - creates a new copy of the header removing unwanted samples
630  *  @param n:        number of samples to keep
631  *  @param samples:  names of the samples to keep
632  *  @param imap:     mapping from index in @samples to the sample index in the original file
633  *  @return NULL on failure, header otherwise
634  *
635  *  Sample names not present in h0 are ignored. The number of unmatched samples can be checked
636  *  by comparing n and bcf_hdr_nsamples(out_hdr).
637  *  This function can be used to reorder samples.
638  *  See also bcf_subset() which subsets individual records.
639  *  The bcf_hdr_t struct returned by a successful call should be freed
640  *  via bcf_hdr_destroy() when it is no longer needed.
641  */
642  /// NOTE: char *const* samples really exmplifies what I hate about C pointers
643 /// My interpretation of this is it is equivalent to char **samples, but that the outer pointer is const
644 /// which in D would be const(char *)*samples. I don't know what it implies about constancy of *samples or samples.
645 bcf_hdr_t* bcf_hdr_subset(
646     const(bcf_hdr_t)* h0,
647     int n,
648     const(char*)* samples,
649     int* imap);
650 
651 /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */
652 const(char*)* bcf_hdr_seqnames(const(bcf_hdr_t)* h, int* nseqs);
653 
654 /** Get number of samples */
655 pragma(inline, true) auto bcf_hdr_nsamples (bcf_hdr_t *hdr)
656 {
657     return hdr.n[BCF_DT_SAMPLE];
658 }
659 
660 /** The following functions are for internal use and should rarely be called directly */
661 int bcf_hdr_parse(bcf_hdr_t* hdr, char* htxt);
662 
663 /// Synchronize internal header structures
664 /** @param h  Header
665     @return 0 on success, -1 on failure
666 
667     This function updates the id, sample and contig arrays in the
668     bcf_hdr_t structure so that they point to the same locations as
669     the id, sample and contig dictionaries.
670 */
671 int bcf_hdr_sync(bcf_hdr_t* h);
672 
673 /**
674  * bcf_hdr_parse_line() - parse a single line of VCF textual header
675  * @param h     BCF header struct
676  * @param line  One or more lines of header text
677  * @param len   Filled out with length data parsed from 'line'.
678  * @return bcf_hrec_t* on success;
679  *         NULL on error or on end of header text.
680  *         NB: to distinguish error from end-of-header, check *len:
681  *           *len == 0 indicates @p line did not start with "##"
682  *           *len == -1 indicates failure, likely due to out of memory
683  *           *len > 0 indicates a malformed header line
684  *
685  * If *len > 0 on exit, it will contain the full length of the line
686  * including any trailing newline (this includes cases where NULL was
687  * returned due to a malformed line).  Callers can use this to skip to
688  * the next header line.
689  */
690 bcf_hrec_t* bcf_hdr_parse_line(
691     const(bcf_hdr_t)* h,
692     const(char)* line,
693     int* len);
694 /// Convert a bcf header record to string form
695 /**
696  * @param hrec    Header record
697  * @param str     Destination kstring
698  * @return 0 on success; < 0 on error
699  */
700 int bcf_hrec_format(const(bcf_hrec_t)* hrec, kstring_t* str);
701 
702 int bcf_hdr_add_hrec(bcf_hdr_t* hdr, bcf_hrec_t* hrec);
703 
704 /**
705  *  bcf_hdr_get_hrec() - get header line info
706  *  @param type:  one of the BCF_HL_* types: FLT,INFO,FMT,CTG,STR,GEN
707  *  @param key:   the header key for generic lines (e.g. "fileformat"), any field
708  *                  for structured lines, typically "ID".
709  *  @param value: the value which pairs with key. Can be be NULL for BCF_HL_GEN
710  *  @param str_class: the class of BCF_HL_STR line (e.g. "ALT" or "SAMPLE"), otherwise NULL
711  */
712 bcf_hrec_t* bcf_hdr_get_hrec(
713     const(bcf_hdr_t)* hdr,
714     int type,
715     const(char)* key,
716     const(char)* value,
717     const(char)* str_class);
718 
719 /// Duplicate a header record
720 /** @param hrec   Header record to copy
721     @return A new header record on success; NULL on failure
722 
723     The bcf_hrec_t struct returned by a successful call should be freed
724     via bcf_hrec_destroy() when it is no longer needed.
725 */
726 bcf_hrec_t* bcf_hrec_dup(bcf_hrec_t* hrec);
727 
728 /// Add a new header record key
729 /** @param hrec  Header record
730     @param str   Key name
731     @param len   Length of @p str
732     @return 0 on success; -1 on failure
733 */
734 int bcf_hrec_add_key(bcf_hrec_t* hrec, const(char)* str, size_t len);
735 
736 /// Set a header record value
737 /** @param hrec      Header record
738     @param i         Index of value
739     @param str       Value to set
740     @param len       Length of @p str
741     @param is_quoted Value should be quoted
742     @return 0 on success; -1 on failure
743 */
744 int bcf_hrec_set_val(
745     bcf_hrec_t* hrec,
746     int i,
747     const(char)* str,
748     size_t len,
749     int is_quoted);
750 
751 /// Lookup header record by key
752 int bcf_hrec_find_key(bcf_hrec_t* hrec, const(char)* key);
753 
754 /// Add an IDX header record
755 /** @param hrec   Header record
756     @param idx    IDX value to add
757     @return 0 on success; -1 on failure
758 */
759 int hrec_add_idx(bcf_hrec_t* hrec, int idx);
760 
761 /// Free up a header record and associated structures
762 /** @param hrec  Header record
763  */
764 void bcf_hrec_destroy(bcf_hrec_t* hrec);
765 
766 /**************************************************************************
767  *  Individual record querying and manipulation routines
768  **************************************************************************/
769 
770 /** See the description of bcf_hdr_subset() */
771 int bcf_subset(const(bcf_hdr_t)* h, bcf1_t* v, int n, int* imap);
772 
773 /**
774  *  bcf_translate() - translate tags ids to be consistent with different header. This function
775  *                    is useful when lines from multiple VCF need to be combined.
776  *  @dst_hdr:   the destination header, to be used in bcf_write(), see also bcf_hdr_combine()
777  *  @src_hdr:   the source header, used in bcf_read()
778  *  @src_line:  line obtained by bcf_read()
779  */
780 int bcf_translate(
781     const(bcf_hdr_t)* dst_hdr,
782     bcf_hdr_t* src_hdr,
783     bcf1_t* src_line);
784 
785 /**
786  *  bcf_get_variant_type[s]()  - returns one of VCF_REF, VCF_SNP, etc
787  */
788 int bcf_get_variant_types(bcf1_t* rec);
789 
790 int bcf_get_variant_type(bcf1_t* rec, int ith_allele);
791 
792 int bcf_is_snp(bcf1_t* v);
793 
794 /**
795  *  bcf_update_filter() - sets the FILTER column
796  *  @flt_ids:  The filter IDs to set, numeric IDs returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS")
797  *  @n:        Number of filters. If n==0, all filters are removed
798  */
799 int bcf_update_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, int* flt_ids, int n);
800 /**
801  *  bcf_add_filter() - adds to the FILTER column
802  *  @flt_id:   filter ID to add, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS")
803  *
804  *  If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed.
805  */
806 int bcf_add_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, int flt_id);
807 /**
808  *  bcf_remove_filter() - removes from the FILTER column
809  *  @flt_id:   filter ID to remove, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS")
810  *  @pass:     when set to 1 and no filters are present, set to PASS
811  */
812 int bcf_remove_filter(
813     const(bcf_hdr_t)* hdr,
814     bcf1_t* line,
815     int flt_id,
816     int pass);
817 /**
818  *  Returns 1 if present, 0 if absent, or -1 if filter does not exist. "PASS" and "." can be used interchangeably.
819  */
820 int bcf_has_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, char* filter);
821 /**
822  *  bcf_update_alleles() and bcf_update_alleles_str() - update REF and ALT column
823  *  @alleles:           Array of alleles
824  *  @nals:              Number of alleles
825  *  @alleles_string:    Comma-separated alleles, starting with the REF allele
826  */
827 int bcf_update_alleles(
828     const(bcf_hdr_t)* hdr,
829     bcf1_t* line,
830     const(char*)* alleles,
831     int nals);
832 
833 int bcf_update_alleles_str(
834     const(bcf_hdr_t)* hdr,
835     bcf1_t* line,
836     const(char)* alleles_string);
837 
838 /**
839   *  bcf_update_id() - sets new ID string
840   *  bcf_add_id() - adds to the ID string checking for duplicates
841   */
842 int bcf_update_id(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* id);
843 
844 int bcf_add_id(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* id);
845 
846 /**
847  *  bcf_update_info_*() - functions for updating INFO fields
848  *  @param hdr:       the BCF header
849  *  @param line:      VCF line to be edited
850  *  @param key:       the INFO tag to be updated
851  *  @param values:    pointer to the array of values. Pass NULL to remove the tag.
852  *  @param n:         number of values in the array. When set to 0, the INFO tag is removed
853  *  @return 0 on success or negative value on error.
854  *
855  *  The @p string in bcf_update_info_flag() is optional,
856  *  @p n indicates whether the flag is set or removed.
857  *
858  *  Note that updating an END info tag will cause line->rlen to be
859  *  updated as a side-effect (removing the tag will set it to the
860  *  string length of the REF allele). If line->pos is being changed as
861  *  well, it is important that this is done before calling
862  *  bcf_update_info_int32() to update the END tag, otherwise rlen will be
863  *  set incorrectly.  If the new END value is less than or equal to
864  *  line->pos, a warning will be printed and line->rlen will be set to
865  *  the length of the REF allele.
866  */
867 pragma(inline, true) {    // TODO: rewrite as template
868     auto bcf_update_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
869         { return bcf_update_info(hdr, line, key, values, n, BCF_HT_INT); }
870     auto bcf_update_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
871         { return bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL); }
872     auto bcf_update_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
873         { return bcf_update_info(hdr, line, key, values, n, BCF_HT_FLAG); }
874     auto bcf_update_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values) // @suppress(dscanner.style.undocumented_declaration)
875         { return bcf_update_info(hdr, line, key, values, 1, BCF_HT_STR); }
876     }
877 
878 int bcf_update_info (
879     const(bcf_hdr_t)* hdr,
880     bcf1_t* line,
881     const(char)* key,
882     const(void)* values,
883     int n,
884     int type);
885 
886 /// Set or update 64-bit integer INFO values
887 /**
888  *  @param hdr:       the BCF header
889  *  @param line:      VCF line to be edited
890  *  @param key:       the INFO tag to be updated
891  *  @param values:    pointer to the array of values. Pass NULL to remove the tag.
892  *  @param n:         number of values in the array. When set to 0, the INFO tag is removed
893  *  @return 0 on success or negative value on error.
894  *
895  *  This function takes an int64_t values array as input.  The data
896  *  actually stored will be shrunk to the minimum size that can
897  *  accept all of the values.
898  *
899  *  INFO values outside of the range BCF_MIN_BT_INT32 to BCF_MAX_BT_INT32
900  *  can only be written to VCF files.
901  */
902 pragma(inline, true)
903 auto bcf_update_info_int64( const(bcf_hdr_t) *hdr, bcf1_t *line,
904                             const(char) *key,
905                             const(long) *values, int n)
906 {
907     return bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG);
908 }
909 
910 /**
911  *  bcf_update_format_*() - functions for updating FORMAT fields
912  *  @values:    pointer to the array of values, the same number of elements
913  *              is expected for each sample. Missing values must be padded
914  *              with bcf_*_missing or bcf_*_vector_end values.
915  *  @n:         number of values in the array. If n==0, existing tag is removed.
916  *
917  *  The function bcf_update_format_string() is a higher-level (slower) variant of
918  *  bcf_update_format_char(). The former accepts array of \0-terminated strings
919  *  whereas the latter requires that the strings are collapsed into a single array
920  *  of fixed-length strings. In case of strings with variable length, shorter strings
921  *  can be \0-padded. Note that the collapsed strings passed to bcf_update_format_char()
922  *  are not \0-terminated.
923  *
924  *  Returns 0 on success or negative value on error.
925  */
926 
927 }/// closing @nogc nothrow
928 
929 pragma(inline, true) {
930     auto bcf_update_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(int) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
931         { return bcf_update_format(hdr, line, key, values, n, BCF_HT_INT); }
932     auto bcf_update_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(float) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
933         { return bcf_update_format(hdr, line, key, values, n, BCF_HT_REAL); }
934     auto bcf_update_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(char) **values, int n) // @suppress(dscanner.style.undocumented_declaration)
935         { return bcf_update_format(hdr, line, key, values, n, BCF_HT_STR); }
936     auto bcf_update_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) **gts, int n) // @suppress(dscanner.style.undocumented_declaration)
937         { return bcf_update_format(hdr, line, toStringz("GT"c), gts, n, BCF_HT_INT); 
938     }
939 }
940 
941 @nogc nothrow {
942 
943 int bcf_update_format_string (
944     const(bcf_hdr_t)* hdr,
945     bcf1_t* line,
946     const(char)* key,
947     const(char*)* values,
948     int n);
949 
950 int bcf_update_format(
951     const(bcf_hdr_t)* hdr,
952     bcf1_t* line,
953     const(char)* key,
954     const(void)* values,
955     int n,
956     int type);
957 
958 /// Macros for setting genotypes correctly, for use with bcf_update_genotypes only; idx corresponds
959 /// to VCF's GT (1-based index to ALT or 0 for the reference allele) and val is the opposite, obtained
960 /// from bcf_get_genotypes() below.
961 // TODO: is int appropriate?
962 pragma(inline, true) {
963     auto bcf_gt_phased(int idx)     { return (((idx)+1)<<1|1);  }
964     /// ditto
965     auto bcf_gt_unphased(int idx)   { return (((idx)+1)<<1);    }
966     /// ditto
967     auto bcf_gt_is_missing(int val) { return ((val)>>1 ? 0 : 1);}
968     /// ditto
969     auto bcf_gt_is_phased(int idx)  { return ((idx)&1);         }
970     /// ditto
971     auto bcf_gt_allele(int val)     { return (((val)>>1)-1);    }
972 }
973 /// ditto
974     enum int bcf_gt_missing = 0;
975 
976 /** Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based) */
977 pragma(inline, true) {
978         auto bcf_alleles2gt(int a, int b) { return ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a))); }
979         /// ditto
980         void bcf_gt2alleles(int igt, int *a, int *b)
981         {
982             int k = 0, dk = 1; // @suppress(dscanner.useless-initializer)
983             while ( k<igt ) { dk++; k += dk; }
984             *b = dk - 1; *a = igt - k + *b;
985         }
986     }
987 
988 /**
989  * bcf_get_fmt() - returns pointer to FORMAT's field data
990  * @header: for access to BCF_DT_ID dictionary
991  * @line:   VCF line obtained from vcf_parse1
992  * @fmt:    one of GT,PL,...
993  *
994  * Returns bcf_fmt_t* if the call succeeded, or returns NULL when the field
995  * is not available.
996  */
997 bcf_fmt_t* bcf_get_fmt(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* key);
998 
999 bcf_info_t* bcf_get_info(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* key);
1000 
1001 /**
1002  * bcf_get_*_id() - returns pointer to FORMAT/INFO field data given the header index instead of the string ID
1003  * @line: VCF line obtained from vcf_parse1
1004  * @id:  The header index for the tag, obtained from bcf_hdr_id2int()
1005  *
1006  * Returns bcf_fmt_t* / bcf_info_t*. These functions do not check if the index is valid
1007  * as their goal is to avoid the header lookup.
1008  */
1009 bcf_fmt_t* bcf_get_fmt_id(bcf1_t* line, const int id);
1010 
1011 bcf_info_t* bcf_get_info_id(bcf1_t* line, const int id);
1012 
1013 /**
1014  *  bcf_get_info_*() - get INFO values, integers or floats
1015  *  @param hdr:    BCF header
1016  *  @param line:   BCF record
1017  *  @param tag:    INFO tag to retrieve
1018  *  @param dst:    *dst is pointer to a memory location, can point to NULL
1019  *  @param ndst:   pointer to the size of allocated memory
1020  *  @return  >=0 on success
1021  *          -1 .. no such INFO tag defined in the header
1022  *          -2 .. clash between types defined in the header and encountered in the VCF record
1023  *          -3 .. tag is not present in the VCF record
1024  *          -4 .. the operation could not be completed (e.g. out of memory)
1025  *
1026  *  Returns negative value on error or the number of values (including
1027  *  missing values) put in *dst on success. bcf_get_info_string() returns
1028  *  on success the number of characters stored excluding the nul-
1029  *  terminating byte. bcf_get_info_flag() does not store anything in *dst
1030  *  but returns 1 if the flag is set or 0 if not.
1031  *
1032  *  *dst will be reallocated if it is not big enough (i.e. *ndst is too
1033  *  small) or NULL on entry.  The new size will be stored in *ndst.
1034  */
1035 pragma(inline, true) {
1036     auto bcf_get_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
1037         { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); }
1038     auto bcf_get_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
1039         { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); }
1040     auto bcf_get_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
1041         { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); }
1042     auto bcf_get_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
1043         { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_FLAG); }
1044     }
1045 
1046 int bcf_get_info_values (
1047     const(bcf_hdr_t)* hdr,
1048     bcf1_t* line,
1049     const(char)* tag,
1050     void** dst,
1051     int* ndst,
1052     int type);
1053 
1054 /// Put integer INFO values into an int64_t array
1055 /**
1056  *  @param hdr:    BCF header
1057  *  @param line:   BCF record
1058  *  @param tag:    INFO tag to retrieve
1059  *  @param dst:    *dst is pointer to a memory location, can point to NULL
1060  *  @param ndst:   pointer to the size of allocated memory
1061  *  @return  >=0 on success
1062  *          -1 .. no such INFO tag defined in the header
1063  *          -2 .. clash between types defined in the header and encountered in the VCF record
1064  *          -3 .. tag is not present in the VCF record
1065  *          -4 .. the operation could not be completed (e.g. out of memory)
1066  *
1067  *  Returns negative value on error or the number of values (including
1068  *  missing values) put in *dst on success.
1069  *
1070  *  *dst will be reallocated if it is not big enough (i.e. *ndst is too
1071  *  small) or NULL on entry.  The new size will be stored in *ndst.
1072  */
1073 pragma(inline, true)
1074 auto bcf_get_info_int64(const(bcf_hdr_t) *hdr, bcf1_t *line,
1075                                         const(char) *tag, long **dst,
1076                                         int *ndst)
1077 {
1078     return bcf_get_info_values(hdr, line, tag,
1079                                 cast(void **) dst, ndst, BCF_HT_LONG);
1080 }
1081 
1082 /**
1083  *  bcf_get_format_*() - same as bcf_get_info*() above
1084  *
1085  *  The function bcf_get_format_string() is a higher-level (slower) variant of bcf_get_format_char().
1086  *  see the description of bcf_update_format_string() and bcf_update_format_char() above.
1087  *  Unlike other bcf_get_format__*() functions, bcf_get_format_string() allocates two arrays:
1088  *  a single block of \0-terminated strings collapsed into a single array and an array of pointers
1089  *  to these strings. Both arrays must be cleaned by the user.
1090  *
1091  *  Returns negative value on error or the number of written values on success.
1092  *
1093  *  Use the returned number of written values for accessing valid entries of dst, as ndst is only a
1094  *  watermark that can be higher than the returned value, i.e. the end of dst can contain carry-over
1095  *  values from previous calls to bcf_get_format_*() on lines with more values per sample.
1096  *
1097  *  Example:
1098  *      int ndst = 0; char **dst = NULL;
1099  *      if ( bcf_get_format_string(hdr, line, "XX", &dst, &ndst) > 0 )
1100  *          for (i=0; i<bcf_hdr_nsamples(hdr); i++) printf("%s\n", dst[i]);
1101  *      free(dst[0]); free(dst);
1102  *
1103  *  Example:
1104  *      int i, j, ngt, nsmpl = bcf_hdr_nsamples(hdr);
1105  *      int32_t *gt_arr = NULL, ngt_arr = 0;
1106  *
1107  *      ngt = bcf_get_genotypes(hdr, line, &gt_arr, &ngt_arr);
1108  *      if ( ngt<=0 ) return; // GT not present
1109  *
1110  *      int max_ploidy = ngt/nsmpl;
1111  *      for (i=0; i<nsmpl; i++)
1112  *      {
1113  *        int32_t *ptr = gt_arr + i*max_ploidy;
1114  *        for (j=0; j<max_ploidy; j++)
1115  *        {
1116  *           // if true, the sample has smaller ploidy
1117  *           if ( ptr[j]==bcf_int32_vector_end ) break;
1118  *
1119  *           // missing allele
1120  *           if ( bcf_gt_is_missing(ptr[j]) ) continue;
1121  *
1122  *           // the VCF 0-based allele index
1123  *           int allele_index = bcf_gt_allele(ptr[j]);
1124  *
1125  *           // is phased?
1126  *           int is_phased = bcf_gt_is_phased(ptr[j]);
1127  *
1128  *           // .. do something ..
1129  *         }
1130  *      }
1131  *      free(gt_arr);
1132  *
1133  */
1134 
1135 }/// closing @nogc nothrow from line 928
1136 
1137 pragma(inline, true) {
1138     auto bcf_get_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
1139         { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); }
1140     auto bcf_get_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
1141         { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); }
1142     auto bcf_get_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
1143         { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); }
1144     auto bcf_get_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
1145         { return bcf_get_format_values(hdr, line, toStringz("GT"c), cast(void**) dst, ndst, BCF_HT_INT); }
1146 }
1147 
1148 @nogc nothrow {
1149 
1150 int bcf_get_format_string(
1151     const(bcf_hdr_t)* hdr,
1152     bcf1_t* line,
1153     const(char)* tag,
1154     char*** dst,
1155     int* ndst);
1156 
1157 int bcf_get_format_values(
1158     const(bcf_hdr_t)* hdr,
1159     bcf1_t* line,
1160     const(char)* tag,
1161     void** dst,
1162     int* ndst,
1163     int type);
1164 
1165 /**************************************************************************
1166  *  Helper functions
1167  **************************************************************************/
1168 
1169 /**
1170  *  bcf_hdr_id2int() - Translates string into numeric ID
1171  *  bcf_hdr_int2id() - Translates numeric ID into string
1172  *  @type:     one of BCF_DT_ID, BCF_DT_CTG, BCF_DT_SAMPLE
1173  *  @id:       tag name, such as: PL, DP, GT, etc.
1174  *
1175  *  Returns -1 if string is not in dictionary, otherwise numeric ID which identifies
1176  *  fields in BCF records.
1177  */
1178 int bcf_hdr_id2int(const(bcf_hdr_t)* hdr, int type, const(char)* id);
1179 
1180 pragma(inline, true)
1181 auto bcf_hdr_int2id(const(bcf_hdr_t) *hdr, int type, int int_id)
1182     { return hdr.id[type][int_id].key; }
1183 
1184 /**
1185  *  bcf_hdr_name2id() - Translates sequence names (chromosomes) into numeric ID
1186  *  bcf_hdr_id2name() - Translates numeric ID to sequence name
1187  */
1188 pragma(inline, true) int bcf_hdr_name2id(const(bcf_hdr_t) *hdr, const(char) *id) { return bcf_hdr_id2int(hdr, BCF_DT_CTG, id); } // @suppress(dscanner.style.long_line)
1189     /// ditto
1190     pragma(inline, true) const(char) *bcf_hdr_id2name(const(bcf_hdr_t) *hdr, int rid) { return hdr.id[BCF_DT_CTG][rid].key; } // @suppress(dscanner.style.long_line)
1191     /// ditto
1192     pragma(inline, true) const(char) *bcf_seqname(const(bcf_hdr_t) *hdr, bcf1_t *rec) { return hdr.id[BCF_DT_CTG][rec.rid].key; } // @suppress(dscanner.style.long_line)
1193 
1194 /** Return CONTIG name, or "(unknown)"
1195 
1196     Like bcf_seqname(), but this function will never return NULL.  If
1197     the contig name cannot be found (either because @p hdr was not
1198     supplied or rec->rid was out of range) it returns the string
1199     "(unknown)".
1200 */
1201 const(char)* bcf_seqname_safe(const(bcf_hdr_t)* hdr, const(bcf1_t)* rec);
1202 
1203 /**
1204  *  bcf_hdr_id2*() - Macros for accessing bcf_idinfo_t
1205  *  @type:      one of BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT
1206  *  @int_id:    return value of bcf_hdr_id2int, must be >=0
1207  *
1208  *  The returned values are:
1209  *     bcf_hdr_id2length   ..  whether the number of values is fixed or variable, one of BCF_VL_*
1210  *     bcf_hdr_id2number   ..  the number of values, 0xfffff for variable length fields
1211  *     bcf_hdr_id2type     ..  the field type, one of BCF_HT_*
1212  *     bcf_hdr_id2coltype  ..  the column type, one of BCF_HL_*
1213  *
1214  *  Notes: Prior to using the macros, the presence of the info should be
1215  *  tested with bcf_hdr_idinfo_exists().
1216  */
1217 // TODO: for dict_type and col_type use ENUMs
1218 pragma(inline, true) {
1219     auto bcf_hdr_id2length (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>8 & 0xf); } // @suppress(dscanner.style.long_line)
1220     /// ditto
1221     auto bcf_hdr_id2number (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>12);    } // @suppress(dscanner.style.long_line)
1222     /// ditto
1223     uint bcf_hdr_id2type (const(bcf_hdr_t) *hdr, int type, int int_id)   { return cast(uint)((hdr).id[BCF_DT_ID][int_id].val.info[type]>>4 & 0xf); } // @suppress(dscanner.style.long_line)
1224     /// ditto
1225     uint bcf_hdr_id2coltype (const(bcf_hdr_t) *hdr, int type, int int_id){ return cast(uint)((hdr).id[BCF_DT_ID][int_id].val.info[type] & 0xf); } // @suppress(dscanner.style.long_line)
1226     /// ditto
1227     auto bcf_hdr_idinfo_exists (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((int_id<0 || bcf_hdr_id2coltype(hdr,type,int_id)==0xf) ? 0 : 1); } // @suppress(dscanner.style.long_line)
1228     /// ditto
1229     auto bcf_hdr_id2hrc (const(bcf_hdr_t) *hdr, int dict_type, int col_type, int int_id)
1230         { return ((hdr).id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val.hrec[(dict_type)==BCF_DT_CTG?0:(col_type)]); // @suppress(dscanner.style.long_line)
1231     }
1232 } 
1233 /// Convert BCF FORMAT data to string form
1234 /**
1235  * @param s    kstring to write into
1236  * @param n    number of items in @p data
1237  * @param type type of items in @p data
1238  * @param data BCF format data
1239  * @return  0 on success
1240  *         -1 if out of memory
1241  */
1242 int bcf_fmt_array(kstring_t* s, int n, int type, void* data);
1243 
1244 ubyte* bcf_fmt_sized_array(kstring_t* s, ubyte* ptr);
1245 
1246 /// Encode a variable-length char array in BCF format
1247 /**
1248  * @param s    kstring to write into
1249  * @param l    length of input
1250  * @param a    input data to encode
1251  * @return 0 on success; < 0 on error
1252  */
1253 int bcf_enc_vchar(kstring_t* s, int l, const(char)* a);
1254 
1255 /// Encode a variable-length integer array in BCF format
1256 /**
1257  * @param s      kstring to write into
1258  * @param n      total number of items in @p a (<= 0 to encode BCF_BT_NULL)
1259  * @param a      input data to encode
1260  * @param wsize  vector length (<= 0 is equivalent to @p n)
1261  * @return 0 on success; < 0 on error
1262  * @note @p n should be an exact multiple of @p wsize
1263  */
1264 int bcf_enc_vint(kstring_t* s, int n, int* a, int wsize);
1265 
1266 /// Encode a variable-length float array in BCF format
1267 /**
1268  * @param s      kstring to write into
1269  * @param n      total number of items in @p a (<= 0 to encode BCF_BT_NULL)
1270  * @param a      input data to encode
1271  * @return 0 on success; < 0 on error
1272  */
1273 int bcf_enc_vfloat(kstring_t* s, int n, float* a);
1274 
1275 /**************************************************************************
1276  *  BCF index
1277  *
1278  *  Note that these functions work with BCFs only. See synced_bcf_reader.h
1279  *  which provides (amongst other things) an API to work transparently with
1280  *  both indexed BCFs and VCFs.
1281  **************************************************************************/
1282 
1283 alias bcf_itr_destroy = hts_itr_destroy;
1284 
1285 } /// closing @nogc nothrow from line 1136
1286 
1287 pragma(inline, true) {
1288     /// Generate an iterator for an integer-based range query
1289     auto bcf_itr_queryi(const(hts_idx_t) *idx, int tid, int beg, int end)
1290         { return hts_itr_query(idx, tid, beg, end, &bcf_readrec); }
1291     
1292     /// Generate an iterator for a string-based range query
1293     auto bcf_itr_querys(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, const(char) *s)
1294         { return hts_itr_querys(idx, s, cast(hts_name2id_f) &bcf_hdr_name2id, cast(void *) hdr,
1295                                 &hts_itr_query, &bcf_readrec); }
1296 
1297     /// Iterate through the range
1298     /// r should (probably) point to your VCF (BCF) row structure
1299     /// TODO: attempt to define parameter r as bcf1_t *, which is what I think it should be
1300     int bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) {
1301         if (htsfp.is_bgzf)
1302             return hts_itr_next(htsfp.fp.bgzf, itr, r, null);
1303 
1304         hts_log_error(__FUNCTION__,"Only bgzf compressed files can be used with iterators");
1305         errno = EINVAL;
1306         return -2;
1307     }
1308 
1309 @nogc nothrow:
1310 
1311 /// Load a BCF index
1312 /** @param fn   BCF file name
1313     @return The index, or NULL if an error occurred.
1314      @note This only works for BCF files.  Consider synced_bcf_reader instead
1315 which works for both BCF and VCF.
1316 */
1317 auto bcf_index_load(const(char) *fn) { return hts_idx_load(fn, HTS_FMT_CSI); }
1318 
1319 /// Get a list (char **) of sequence names from the index -- free only the array, not the values
1320 auto bcf_index_seqnames(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, int *nptr)
1321     { return hts_idx_seqnames(idx, nptr, cast(hts_id2name_f) &bcf_hdr_id2name, cast(void *) hdr); }
1322 }
1323 
1324 /// Load a BCF index from a given index file name
1325 /**  @param fn     Input BAM/BCF/etc filename
1326      @param fnidx  The input index filename
1327      @return  The index, or NULL if an error occurred.
1328      @note This only works for BCF files.  Consider synced_bcf_reader instead
1329 which works for both BCF and VCF.
1330 */
1331 hts_idx_t* bcf_index_load2(const(char)* fn, const(char)* fnidx);
1332 
1333 /// Load a BCF index from a given index file name
1334 /**  @param fn     Input BAM/BCF/etc filename
1335      @param fnidx  The input index filename
1336      @param flags  Flags to alter behaviour (see description)
1337      @return  The index, or NULL if an error occurred.
1338      @note This only works for BCF files.  Consider synced_bcf_reader instead
1339 which works for both BCF and VCF.
1340 
1341      The @p flags parameter can be set to a combination of the following
1342      values:
1343 
1344         HTS_IDX_SAVE_REMOTE   Save a local copy of any remote indexes
1345         HTS_IDX_SILENT_FAIL   Fail silently if the index is not present
1346 
1347      Equivalent to hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags);
1348 */
1349 hts_idx_t* bcf_index_load3(const(char)* fn, const(char)* fnidx, int flags);
1350 
1351 /**
1352  *  bcf_index_build() - Generate and save an index file
1353  *  @fn:         Input VCF(compressed)/BCF filename
1354  *  @min_shift:  log2(width of the smallest bin), e.g. a value of 14
1355  *  imposes a 16k base lower limit on the width of index bins.
1356  *  Positive to generate CSI, or 0 to generate TBI. However, a small
1357  *  value of min_shift would create a large index, which would lead to
1358  *  reduced performance when using the index. A recommended value is 14.
1359  *  For BCF files, only the CSI index can be generated.
1360  *
1361  *  Returns 0 if successful, or negative if an error occurred.
1362  *
1363  *  List of error codes:
1364  *      -1 .. indexing failed
1365  *      -2 .. opening @fn failed
1366  *      -3 .. format not indexable
1367  *      -4 .. failed to create and/or save the index
1368  */
1369 int bcf_index_build(const(char)* fn, int min_shift);
1370 
1371 /**
1372  *  bcf_index_build2() - Generate and save an index to a specific file
1373  *  @fn:         Input VCF/BCF filename
1374  *  @fnidx:      Output filename, or NULL to add .csi/.tbi to @fn
1375  *  @min_shift:  Positive to generate CSI, or 0 to generate TBI
1376  *
1377  *  Returns 0 if successful, or negative if an error occurred.
1378  *
1379  *  List of error codes:
1380  *      -1 .. indexing failed
1381  *      -2 .. opening @fn failed
1382  *      -3 .. format not indexable
1383  *      -4 .. failed to create and/or save the index
1384  */
1385 int bcf_index_build2(const(char)* fn, const(char)* fnidx, int min_shift);
1386 
1387 /**
1388  *  bcf_index_build3() - Generate and save an index to a specific file
1389  *  @fn:         Input VCF/BCF filename
1390  *  @fnidx:      Output filename, or NULL to add .csi/.tbi to @fn
1391  *  @min_shift:  Positive to generate CSI, or 0 to generate TBI
1392  *  @n_threads:  Number of VCF/BCF decoder threads
1393  *
1394  *  Returns 0 if successful, or negative if an error occurred.
1395  *
1396  *  List of error codes:
1397  *      -1 .. indexing failed
1398  *      -2 .. opening @fn failed
1399  *      -3 .. format not indexable
1400  *      -4 .. failed to create and/or save the index
1401  */
1402 int bcf_index_build3(
1403     const(char)* fn,
1404     const(char)* fnidx,
1405     int min_shift,
1406     int n_threads);
1407 
1408 /// Initialise fp->idx for the current format type, for VCF and BCF files.
1409 /** @param fp        File handle for the data file being written.
1410     @param h         BCF header structured (needed for BAI and CSI).
1411     @param min_shift CSI bin size (CSI default is 14).
1412     @param fnidx     Filename to write index to.  This pointer must remain valid
1413                      until after bcf_idx_save is called.
1414     @return          0 on success, <0 on failure.
1415     @note This must be called after the header has been written, but before
1416           any other data.
1417 */
1418 int bcf_idx_init(htsFile* fp, bcf_hdr_t* h, int min_shift, const(char)* fnidx);
1419 
1420 /// Writes the index initialised with bcf_idx_init to disk.
1421 /** @param fp        File handle for the data file being written.
1422     @return          0 on success, <0 on failure.
1423 */
1424 int bcf_idx_save(htsFile* fp);
1425 
1426 /*******************
1427  * Typed value I/O *
1428  *******************/
1429 
1430 /**
1431     Note that in contrast with BCFv2.1 specification, HTSlib implementation
1432     allows missing values in vectors. For integer types, the values 0x80,
1433     0x8000, 0x80000000 are interpreted as missing values and 0x81, 0x8001,
1434     0x80000001 as end-of-vector indicators.  Similarly for floats, the value of
1435     0x7F800001 is interpreted as a missing value and 0x7F800002 as an
1436     end-of-vector indicator.
1437     Note that the end-of-vector byte is not part of the vector.
1438 
1439     This trial BCF version (v2.2) is compatible with the VCF specification and
1440     enables to handle correctly vectors with different ploidy in presence of
1441     missing values.
1442  */
1443 enum bcf_int8_vector_end = -127; /* INT8_MIN  + 1 */
1444 enum bcf_int16_vector_end = -32_767; /* INT16_MIN + 1 */
1445 enum bcf_int32_vector_end = -2_147_483_647; /* INT32_MIN + 1 */
1446 enum bcf_int64_vector_end = -9_223_372_036_854_775_807L; /* INT64_MIN + 1 */
1447 enum bcf_str_vector_end = 0;
1448 enum bcf_int8_missing = -128; /* INT8_MIN  */
1449 enum bcf_int16_missing = -32_767 - 1; /* INT16_MIN */
1450 enum bcf_int32_missing = -2_147_483_647 - 1; /* INT32_MIN */
1451 enum bcf_int64_missing = -9_223_372_036_854_775_807L - 1L; /* INT64_MIN */
1452 enum bcf_str_missing = 0x07;
1453 
1454 // Limits on BCF values stored in given types.  Max values are the same
1455 // as for the underlying type.  Min values are slightly different as
1456 // the last 8 values for each type were reserved by BCFv2.2.
1457 enum BCF_MAX_BT_INT8 = 0x7f; /* INT8_MAX  */
1458 enum BCF_MAX_BT_INT16 = 0x7fff; /* INT16_MAX */
1459 enum BCF_MAX_BT_INT32 = 0x7fffffff; /* INT32_MAX */
1460 enum BCF_MIN_BT_INT8 = -120; /* INT8_MIN  + 8 */
1461 enum BCF_MIN_BT_INT16 = -32_760; /* INT16_MIN + 8 */
1462 enum BCF_MIN_BT_INT32 = -2_147_483_640; /* INT32_MIN + 8 */
1463 
1464 extern __gshared uint bcf_float_vector_end;
1465 extern __gshared uint bcf_float_missing;
1466 version(LDC) pragma(inline, true):
1467 version(GNU) pragma(inline, true):
1468 /** u wot */
1469 void bcf_float_set(float *ptr, uint32_t value)
1470 {
1471     union U { uint32_t i; float f; }
1472     U u;
1473     u.i = value;
1474     *ptr = u.f;
1475 }
1476 
1477 /// float vector macros
1478 void bcf_float_set_vector_end(float x) { bcf_float_set(&x, bcf_float_vector_end); }
1479 /// ditto
1480 void bcf_float_set_missing(float x) { bcf_float_set(&x, bcf_float_missing); }
1481 
1482 /** u wot */
1483 pragma(inline, true)
1484 int bcf_float_is_missing(float f)
1485 {
1486     union U { uint32_t i; float f; }
1487     U u;
1488     u.f = f;
1489     return u.i==bcf_float_missing ? 1 : 0;
1490 }
1491 /// ditto
1492 pragma(inline, true)
1493 int bcf_float_is_vector_end(float f)
1494 {
1495     union U { uint32_t i; float f; }
1496     U u;
1497     u.f = f;
1498     return u.i==bcf_float_vector_end ? 1 : 0;
1499 }
1500 
1501 /// (Undocumented) Format GT field
1502 pragma(inline, true)
1503 int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)
1504 {
1505     uint32_t e = 0;
1506     void branch(T)()    // gets a closure over e (was #define macro)
1507     if (is(T == int8_t) || is(T == int16_t) || is(T == int32_t))
1508     {
1509         static if (is(T == int8_t))
1510             auto vector_end = bcf_int8_vector_end;
1511         else static if (is(T == int16_t))
1512             auto vector_end = bcf_int16_vector_end;
1513         else
1514             auto vector_end = bcf_int32_vector_end;
1515 
1516         T *ptr = cast(T*) (fmt.p + (isample * fmt.size));
1517         for (int i=0; i<fmt.n && ptr[i] != vector_end; i++)
1518         {
1519             if ( i ) e |= kputc("/|"[ptr[i]&1], str) < 0;
1520             if ( !(ptr[i]>>1) ) e |= kputc('.', str) < 0;
1521             else e |= kputw((ptr[i]>>1) - 1, str) < 0;
1522         }
1523         if (i == 0) e |= kputc('.', str) < 0;
1524     }
1525     switch (fmt.type) {
1526         case BCF_BT_INT8:  branch!int8_t; break;
1527         case BCF_BT_INT16: branch!int16_t; break;
1528         case BCF_BT_INT32: branch!int32_t; break;
1529         case BCF_BT_NULL:  e |= kputc('.', str) < 0; break;
1530         default: hts_log_error("Unexpected type %d", fmt.type); return -2;
1531     }
1532 
1533     return e == 0 ? 0 : -1;
1534 }
1535 
1536 
1537 pragma(inline, true)
1538 int bcf_enc_size(kstring_t *s, int size, int type)
1539 {
1540     uint32_t e = 0;
1541     if (size >= 15) {
1542         e |= kputc(15<<4|type, s) < 0;
1543         if (size >= 128) {
1544             if (size >= 32_768) {
1545                 int32_t x = size;
1546                 e |= kputc(1<<4|BCF_BT_INT32, s) < 0;
1547                 e |= kputsn(cast(char*)&x, 4, s) < 0;
1548             } else {
1549                 int16_t x = size;
1550                 e |= kputc(1<<4|BCF_BT_INT16, s) < 0;
1551                 e |= kputsn(cast(char*)&x, 2, s) < 0;
1552             }
1553         } else {
1554             e |= kputc(1<<4|BCF_BT_INT8, s) < 0;
1555             e |= kputc(size, s) < 0;
1556         }
1557     } else e |= kputc(size<<4|type, s) < 0;
1558     return e == 0 ? 0 : -1;
1559 }
1560 
1561 
1562 /// Undocumented Encode integer type?
1563 pragma(inline, true)
1564 int bcf_enc_inttype(long x)
1565 {
1566     if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) return BCF_BT_INT8;
1567     if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) return BCF_BT_INT16;
1568     return BCF_BT_INT32;
1569 }
1570 
1571 /// Undocumented Encode integer variant 1
1572 pragma(inline, true)
1573 int bcf_enc_int1(kstring_t *s, int32_t x)
1574 {
1575     uint32_t e = 0;
1576     if (x == bcf_int32_vector_end) {
1577         e |= bcf_enc_size(s, 1, BCF_BT_INT8);
1578         e |= kputc(bcf_int8_vector_end, s) < 0;
1579     } else if (x == bcf_int32_missing) {
1580         e |= bcf_enc_size(s, 1, BCF_BT_INT8);
1581         e |= kputc(bcf_int8_missing, s) < 0;
1582     } else if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) {
1583         e |= bcf_enc_size(s, 1, BCF_BT_INT8);
1584         e |= kputc(x, s) < 0;
1585     } else if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) {
1586         int16_t z = x;
1587         e |= bcf_enc_size(s, 1, BCF_BT_INT16);
1588         e |= kputsn(cast(char*)&z, 2, s) < 0;
1589     } else {
1590         int32_t z = x;
1591         e |= bcf_enc_size(s, 1, BCF_BT_INT32);
1592         e |= kputsn(cast(char*)&z, 4, s) < 0;
1593     }
1594     return e == 0 ? 0 : -1;
1595 }
1596 /// Return the value of a single typed integer.
1597 /** @param      p    Pointer to input data block.
1598     @param      type One of the BCF_BT_INT* type codes
1599     @param[out] q    Location to store an updated value for p
1600     @return The integer value, or zero if @p type is not valid.
1601 
1602 If @p type is not one of BCF_BT_INT8, BCF_BT_INT16, BCF_BT_INT32 or
1603 BCF_BT_INT64, zero will be returned and @p *q will not be updated.
1604 Otherwise, the integer value will be returned and @p *q will be set
1605 to the memory location immediately following the integer value.
1606 
1607 Cautious callers can detect invalid type codes by checking that *q has
1608 actually been updated.
1609 */
1610 pragma(inline, true)
1611 int64_t bcf_dec_int1(const(ubyte) *p, int type, ubyte **q)
1612 {
1613     if (type == BCF_BT_INT8) {
1614         *q = cast(ubyte*)p + 1;
1615         return le_to_i8(p);
1616     } else if (type == BCF_BT_INT16) {
1617         *q = cast(ubyte*)p + 2;
1618         return le_to_i16(p);
1619     } else if (type == BCF_BT_INT32) {
1620         *q = cast(ubyte*)p + 4;
1621         return le_to_i32(p);
1622     } else if (type == BCF_BT_INT64) {
1623         *q = cast(ubyte*)p + 4;
1624         return le_to_i64(p);
1625     } else { // Invalid type.
1626         return 0;
1627     }
1628 }
1629 
1630 /// Return the value of a single typed integer from a byte stream.
1631 /** @param      p    Pointer to input data block.
1632     @param[out] q    Location to store an updated value for p
1633     @return The integer value, or zero if the type code was not valid.
1634 
1635 Reads a one-byte type code from @p p, and uses it to decode an integer
1636 value from the following bytes in @p p.
1637 
1638 If the type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero
1639 will be returned and @p *q will unchanged.  Otherwise, the integer value will
1640 be returned and @p *q will be set to the memory location immediately following
1641 the integer value.
1642 
1643 Cautious callers can detect invalid type codes by checking that *q has
1644 actually been updated.
1645 */
1646 pragma(inline, true)
1647 long bcf_dec_typed_int1 (const(ubyte)* p, ubyte** q)
1648 {
1649     return bcf_dec_int1(p + 1, *p&0xf, q);
1650 }
1651 
1652 pragma(inline, true)
1653 int bcf_dec_size (const(ubyte)* p, ubyte** q, int* type)
1654 {
1655     *type = *p & 0xf;
1656     if (*p>>4 != 15) {
1657         *q = cast(ubyte*)p + 1;
1658         return *p>>4;
1659     } else return bcf_dec_typed_int1(p + 1, q);
1660 }