1 /// @file htslib/vcf.h
2 /// High-level VCF/BCF variant calling file operations.
3 /*
4     Copyright (C) 2012, 2013 Broad Institute.
5     Copyright (C) 2012-2020 Genome Research Ltd.
6 
7     Author: Heng Li <lh3@sanger.ac.uk>
8 
9 Permission is hereby granted, free of charge, to any person obtaining a copy
10 of this software and associated documentation files (the "Software"), to deal
11 in the Software without restriction, including without limitation the rights
12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
13 copies of the Software, and to permit persons to whom the Software is
14 furnished to do so, subject to the following conditions:
15 
16 The above copyright notice and this permission notice shall be included in
17 all copies or substantial portions of the Software.
18 
19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
25 DEALINGS IN THE SOFTWARE.  */
26 
27 /*
28     todo:
29         - make the function names consistent
30         - provide calls to abstract away structs as much as possible
31  */
32 /// Section numbers refer to VCF Specification v4.2: https://samtools.github.io/hts-specs/VCFv4.2.pdf
33 module htslib.vcf;
34 
35 import std.bitmanip;
36 import std.string: toStringz;
37 import core.stdc.errno : errno, EINVAL;
38 import core.stdc.config;
39 
40 import htslib.hts;
41 import htslib.hts_log;
42 import htslib.hts_endian;
43 import htslib.kstring : kstring_t;
44 import htslib.bgzf : BGZF;
45 
46 @system:
47 extern (C):
48 @nogc nothrow {
49 
50 /* Included only for backwards compatibility with e.g. bcftools 1.10 */
51 
52 /*****************
53  * Header struct *
54  *****************/
55 
56 enum BCF_HL_FLT = 0; /// header line: FILTER
57 enum BCF_HL_INFO = 1;/// header line: INFO
58 enum BCF_HL_FMT = 2; /// header line: FORMAT
59 enum BCF_HL_CTG = 3; /// header line: contig
60 enum BCF_HL_STR = 4; /// header line: structured header line TAG=<A=..,B=..>
61 enum BCF_HL_GEN = 5; /// header line: generic header line
62 
63 enum BCF_HT_FLAG = 0; /// header type: FLAG// header type
64 enum BCF_HT_INT = 1;  /// header type: INTEGER
65 enum BCF_HT_REAL = 2; /// header type: REAL
66 enum BCF_HT_STR = 3;  /// header type: STRING
67 enum BCF_HT_LONG = BCF_HT_INT | 0x100; // BCF_HT_INT, but for int64_t values; VCF only!
68 
69 enum BCF_VL_FIXED = 0; /// variable length: fixed (?)// variable length
70 enum BCF_VL_VAR = 1; /// variable length: variable
71 enum BCF_VL_A = 2; /// variable length: ?
72 enum BCF_VL_G = 3; /// variable length: ?
73 enum BCF_VL_R = 4; /// variable length: ?
74 
75 /* === Dictionary ===
76 
77    The header keeps three dictionaries. The first keeps IDs in the
78    "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths
79    in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[]
80    is the actual hash table, which is opaque to the end users. In the hash
81    table, the key is the ID or sample name as a C string and the value is a
82    bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash
83    table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the
84    size of the hash table or, equivalently, the length of the id[] arrays.
85 */
86 
87 enum BCF_DT_ID = 0;    /// dictionary type: ID
88 enum BCF_DT_CTG = 1;   /// dictionary type: CONTIG
89 enum BCF_DT_SAMPLE = 2;/// dictionary type: SAMPLE
90 
91 /// Structured representation of a header line (§1.2)
92 struct bcf_hrec_t // @suppress(dscanner.style.phobos_naming_convention)
93 {
94     int type; /// One of the BCF_HL_* type
95     char* key; /// The part before '=', i.e. FILTER/INFO/FORMAT/contig/fileformat etc.
96     char* value; /// Set only for generic lines, NULL for FILTER/INFO, etc.
97     int nkeys; /// Number of structured fields
98     char** keys; /// The key=value pairs
99     char** vals; /// The key=value pairs
100 }
101 
102 ///  ID Dictionary entry
103 struct bcf_idinfo_t
104 {
105     ulong[3] info; /** stores Number:20, var:4, Type:4, ColType:4 in info[0..2]
106                      for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG */
107     bcf_hrec_t*[3] hrec; /// pointers to header lines for [FILTER, INFO, FORMAT] in order
108     int id; /// primary key
109 }
110 
111 /// ID Dictionary k/v
112 struct bcf_idpair_t // @suppress(dscanner.style.phobos_naming_convention)
113 {
114     const(char)* key; /// header dictionary FILTER/INFO/FORMAT ID key
115     const(bcf_idinfo_t)* val; /// header dictionary FILTER/INFO/FORMAT ID entry
116 }
117 
118 /// Structured repreentation of VCF header (§1.2)
119 /// Note that bcf_hdr_t structs must always be created via bcf_hdr_init()
120 struct bcf_hdr_t // @suppress(dscanner.style.phobos_naming_convention)
121 {
122     int[3] n;           /// n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI)
123     bcf_idpair_t*[3] id;/// ID dictionary {FILTER/INFO/FORMAT, contig, sample} ID key/entry
124     void*[3] dict; /// hash table
125     char** samples; /// ?list of samples
126     bcf_hrec_t** hrec; /// Structured representation of this header line
127     int nhrec; /// # of header records
128     int dirty; /// ?
129     int ntransl; /// for bcf_translate()
130     int*[2] transl; /// for bcf_translate()
131     int nsamples_ori; /// for bcf_hdr_set_samples()
132     ubyte* keep_samples; /// ?
133     kstring_t mem; /// ?
134     int[3] m; /// m: allocated size of the dictionary block in use (see n above)
135 }
136 
137 /// Lookup table used in bcf_record_check
138 /// MAINTAINER: in C header is []
139 extern __gshared ubyte[] bcf_type_shift;
140 
141 /**************
142  * VCF record *
143  **************/
144 
145 enum BCF_BT_NULL = 0; /// null
146 enum BCF_BT_INT8 = 1;/// int8
147 enum BCF_BT_INT16 = 2;/// int16
148 enum BCF_BT_INT32 = 3;/// int32
149 enum BCF_BT_INT64 = 4;/// Unofficial, for internal use only per htslib headers 
150 enum BCF_BT_FLOAT = 5; /// float (32?)
151 enum BCF_BT_CHAR = 7;/// char (8 bit)
152 
153 enum VCF_REF = 0;     /// ref (e.g. in a gVCF)
154 enum VCF_SNP = 1;/// SNP 
155 enum VCF_MNP = 2;/// MNP
156 enum VCF_INDEL = 4;/// INDEL
157 enum VCF_OTHER = 8;/// other (e.g. SV)
158 enum VCF_BND = 16; // /// breakend
159 enum VCF_OVERLAP = 32;/// overlapping deletion, ALT=* 
160 enum VCF_INS = 64; 
161 enum VCF_DEL = 128; 
162 enum VCF_ANY = VCF_SNP | VCF_MNP | VCF_INDEL | VCF_OTHER | VCF_BND | VCF_OVERLAP | VCF_INS | VCF_DEL; 
163 
164 /// variant type record embedded in bcf_dec_t
165 /// variant type and the number of bases affected, negative for deletions
166 struct bcf_variant_t // @suppress(dscanner.style.phobos_naming_convention)
167 {
168     int type;   /// variant type and the number of bases affected, negative for deletions
169     int n;      /// variant type and the number of bases affected, negative for deletions
170 }
171 
172 /// FORMAT field data (§1.4.2 Genotype fields)
173 struct bcf_fmt_t // @suppress(dscanner.style.phobos_naming_convention)
174 {
175     import std.bitmanip : bitfields;
176 
177     int id;   /// id: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$id].key
178     int n;/// n: number of values per-sample; size: number of bytes per-sample; type: one of BCF_BT_* types
179     int size;/// size: number of bytes per-sample; type: one of BCF_BT_* types
180     int type; /// type: one of BCF_BT_* types
181     ubyte* p; /// same as vptr and vptr_* in bcf_info_t below
182     uint p_len;
183 
184     mixin(bitfields!(
185         uint, "p_off", 31,
186         bool, "p_free", 1));
187 }
188 
189 /// INFO field data (§1.4.1 Fixed fields, (8) INFO)
190 struct bcf_info_t // @suppress(dscanner.style.phobos_naming_convention)
191 {
192     import std.bitmanip : bitfields;
193 
194     int key; /// key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key
195     int type; /// type: one of BCF_BT_* types
196 
197     /// integer value
198     /// float value
199     union V1
200     {
201         long i;
202         float f;
203     }
204 
205     V1 v1; /// only set if $len==1; for easier access
206     ubyte* vptr; /// pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes
207     uint vptr_len;
208 
209     mixin(bitfields!(
210         uint, "vptr_off", 31,
211         uint, "vptr_free", 1)); /// length of the vptr block or, when set, of the vptr_mod block, excluding offset
212     /// vptr offset, i.e., the size of the INFO key plus size+type bytes
213     /// indicates that vptr-vptr_off must be freed; set only when modified and the new
214     ///    data block is bigger than the original
215     int len; /// vector length, 1 for scalars
216 }
217 
218 enum BCF1_DIRTY_ID = 1;  /// ID was edited
219 enum BCF1_DIRTY_ALS = 2; /// Allele(s) was edited
220 enum BCF1_DIRTY_FLT = 4; /// FILTER was edited
221 enum BCF1_DIRTY_INF = 8; /// INFO was edited
222 
223 /// Variable-length data from a VCF record
224 struct bcf_dec_t // @suppress(dscanner.style.phobos_naming_convention)
225 {
226     /// allocated size (high-water mark); do not change 
227     int m_fmt;          
228     int m_info;
229     int m_id;
230     int m_als;
231     int m_allele;
232     int m_flt; 
233     int n_flt;          /// Number of FILTER fields
234     int* flt; /// FILTER keys in the dictionary
235     char* id;/// ID
236     char* als; /// REF+ALT block (\0-seperated)
237     char** allele; /// allele[0] is the REF (allele[] pointers to the als block); all null terminated
238     bcf_info_t* info; /// INFO
239     bcf_fmt_t* fmt; /// FORMAT and individual sample
240     bcf_variant_t* var; /// $var and $var_type set only when set_variant_types called
241     int n_var;/// variant number(???)
242     int var_type;/// variant type (TODO: make enum)
243     int shared_dirty; /// if set, shared.s must be recreated on BCF output (TODO: make enum)
244     int indiv_dirty;   /// if set, indiv.s must be recreated on BCF output (TODO: make enum)
245 }
246 
247 enum BCF_ERR_CTG_UNDEF = 1;   /// BCF error: undefined contig
248 enum BCF_ERR_TAG_UNDEF = 2;/// BCF error: undefined tag
249 enum BCF_ERR_NCOLS = 4;/// BCF error: 
250 enum BCF_ERR_LIMITS = 8;/// BCF error: 
251 enum BCF_ERR_CHAR = 16;/// BCF error: 
252 enum BCF_ERR_CTG_INVALID = 32;/// BCF error: 
253 enum BCF_ERR_TAG_INVALID = 64;/// BCF error: 
254 
255 /**
256     The bcf1_t structure corresponds to one VCF/BCF line. Reading from VCF file
257     is slower because the string is first to be parsed, packed into BCF line
258     (done in vcf_parse), then unpacked into internal bcf1_t structure. If it
259     is known in advance that some of the fields will not be required (notably
260     the sample columns), parsing of these can be skipped by setting max_unpack
261     appropriately.
262     Similarly, it is fast to output a BCF line because the columns (kept in
263     shared.s, indiv.s, etc.) are written directly by bcf_write, whereas a VCF
264     line must be formatted in vcf_format.
265  */
266 struct bcf1_t
267 {
268     import std.bitmanip : bitfields;
269 
270     hts_pos_t pos; /// POS
271     hts_pos_t rlen; /// length of REF
272     int rid; /// CHROM
273     float qual;
274 
275     mixin(bitfields!(
276         uint, "n_info", 16,
277         uint, "n_allele", 16,
278         uint, "n_fmt", 8,
279         uint, "n_sample", 24)); /// QUAL
280 
281     kstring_t shared_;
282     kstring_t indiv;
283     bcf_dec_t d; /// lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack()
284     int max_unpack; /// Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields won't be needed
285     int unpacked; /// remember what has been unpacked to allow calling bcf_unpack() repeatedly without redoing the work
286     int[3] unpack_size; /// the original block size of ID, REF+ALT and FILTER
287     int errcode; /// one of BCF_ERR_* codes
288 }
289 
290 /*******
291  * API *
292  *******/
293 
294 /***********************************************************************
295  *  BCF and VCF I/O
296  *
297  *  A note about naming conventions: htslib internally represents VCF
298  *  records as bcf1_t data structures, therefore most functions are
299  *  prefixed with bcf_. There are a few exceptions where the functions must
300  *  be aware of both BCF and VCF worlds, such as bcf_parse vs vcf_parse. In
301  *  these cases, functions prefixed with bcf_ are more general and work
302  *  with both BCF and VCF.
303  *
304  ***********************************************************************/
305 
306 /** These macros are defined only for consistency with other parts of htslib */
307 alias bcf_init1 = bcf_init;
308 alias bcf_read1 = bcf_read;
309 alias vcf_read1 = vcf_read;
310 alias bcf_write1 = bcf_write;
311 alias vcf_write1 = vcf_write;
312 alias bcf_destroy1 = bcf_destroy;
313 alias bcf_empty1 = bcf_empty;
314 alias vcf_parse1 = vcf_parse;
315 alias bcf_clear1 = bcf_clear;
316 alias vcf_format1 = vcf_format;
317 
318 /**
319  *  bcf_hdr_init() - create an empty BCF header.
320  *  @param mode    "r" or "w"
321  *
322  *  When opened for writing, the mandatory fileFormat and
323  *  FILTER=PASS lines are added automatically.
324  *
325  * The bcf_hdr_t struct returned by a successful call should be freed
326  * via bcf_hdr_destroy() when it is no longer needed.
327  */
328 bcf_hdr_t* bcf_hdr_init(const(char)* mode);
329 
330 /** Destroy a BCF header struct */
331 void bcf_hdr_destroy(bcf_hdr_t* h);
332 
333 /** Allocate and initialize a bcf1_t object.
334  *
335  * The bcf1_t struct returned by a successful call should be freed
336  * via bcf_destroy() when it is no longer needed.
337  */
338 bcf1_t* bcf_init();
339 
340 /** Deallocate a bcf1_t object */
341 void bcf_destroy(bcf1_t* v);
342 
343 /**
344  *  Same as bcf_destroy() but frees only the memory allocated by bcf1_t,
345  *  not the bcf1_t object itself.
346  */
347 void bcf_empty(bcf1_t* v);
348 
349 /**
350  *  Make the bcf1_t object ready for next read. Intended mostly for
351  *  internal use, the user should rarely need to call this function
352  *  directly.
353  */
354 void bcf_clear(bcf1_t* v);
355 
356 /** bcf_open and vcf_open mode: please see hts_open() in hts.h */
357 alias vcfFile = htsFile;
358 alias bcf_open = hts_open;
359 alias vcf_open = hts_open;
360 alias bcf_flush = hts_flush;
361 alias bcf_close = hts_close;
362 alias vcf_close = hts_close;
363 
364 /// Read a VCF or BCF header
365 /** @param  fp  The file to read the header from
366     @return Pointer to a populated header structure on success;
367             NULL on failure
368 
369     The bcf_hdr_t struct returned by a successful call should be freed
370     via bcf_hdr_destroy() when it is no longer needed.
371 */
372 bcf_hdr_t* bcf_hdr_read(htsFile* fp);
373 
374 /**
375  *  bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed
376  *  @param samples  samples to include or exclude from file or as a comma-separated string.
377  *              LIST|FILE   .. select samples in list/file
378  *              ^LIST|FILE  .. exclude samples from list/file
379  *              -           .. include all samples
380  *              NULL        .. exclude all samples
381  *  @param is_file  @p samples is a file (1) or a comma-separated list (0)
382  *
383  *  The bottleneck of VCF reading is parsing of genotype fields. If the
384  *  reader knows in advance that only subset of samples is needed (possibly
385  *  no samples at all), the performance of bcf_read() can be significantly
386  *  improved by calling bcf_hdr_set_samples after bcf_hdr_read().
387  *  The function bcf_read() will subset the VCF/BCF records automatically
388  *  with the notable exception when reading records via bcf_itr_next().
389  *  In this case, bcf_subset_format() must be called explicitly, because
390  *  bcf_readrec() does not see the header.
391  *
392  *  Returns 0 on success, -1 on error or a positive integer if the list
393  *  contains samples not present in the VCF header. In such a case, the
394  *  return value is the index of the offending sample.
395  */
396 int bcf_hdr_set_samples(bcf_hdr_t* hdr, const(char)* samples, int is_file);
397 
398 int bcf_subset_format(const(bcf_hdr_t)* hdr, bcf1_t* rec);
399 
400 /// Write a VCF or BCF header
401 /** @param  fp  Output file
402     @param  h   The header to write
403     @return 0 on success; -1 on failure
404  */
405 int bcf_hdr_write(htsFile* fp, bcf_hdr_t* h);
406 
407 /**
408  * Parse VCF line contained in kstring and populate the bcf1_t struct
409  * The line must not end with \n or \r characters.
410  */
411 int vcf_parse(kstring_t* s, const(bcf_hdr_t)* h, bcf1_t* v);
412 
413 /**
414  * Complete the file opening mode, according to its extension.
415  * @param mode      Preallocated mode string to be completed.
416  * @param fn        File name to be opened.
417  * @param format    Format string (vcf|bcf|vcf.gz)
418  * @return          0 on success; -1 on failure
419  */
420 int vcf_open_mode(char* mode, const(char)* fn, const(char)* format);
421 
422 /** The opposite of vcf_parse. It should rarely be called directly, see vcf_write */
423 int vcf_format(const(bcf_hdr_t)* h, const(bcf1_t)* v, kstring_t* s);
424 
425 /// Read next VCF or BCF record
426 /** @param fp  The file to read the record from
427         @param h   The header for the vcf/bcf file
428         @param v   The bcf1_t structure to populate
429         @return 0 on success; -1 on end of file; < -1 on critical error
430 
431 On errors which are not critical for reading, such as missing header
432 definitions in vcf files, zero will be returned but v->errcode will have been
433 set to one of BCF_ERR* codes and must be checked before calling bcf_write().
434      */
435 int bcf_read(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v);
436 
437 /**
438  *  bcf_unpack() - unpack/decode a BCF record (fills the bcf1_t::d field)
439  *
440  *  Note that bcf_unpack() must be called even when reading VCF. It is safe
441  *  to call the function repeatedly, it will not unpack the same field
442  *  twice.
443  */
444 enum BCF_UN_STR = 1; // up to ALT inclusive
445 enum BCF_UN_FLT = 2; // up to FILTER
446 enum BCF_UN_INFO = 4; // up to INFO
447 enum BCF_UN_SHR = BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO; // all shared information
448 enum BCF_UN_FMT = 8; // unpack format and each sample
449 enum BCF_UN_IND = BCF_UN_FMT; // a synonym of BCF_UN_FMT
450 enum BCF_UN_ALL = BCF_UN_SHR | BCF_UN_FMT; // everything
451 int bcf_unpack(bcf1_t* b, int which);
452 
453 /*
454  *  bcf_dup() - create a copy of BCF record.
455  *
456  *  Note that bcf_unpack() must be called on the returned copy as if it was
457  *  obtained from bcf_read(). Also note that bcf_dup() calls bcf_sync1(src)
458  *  internally to reflect any changes made by bcf_update_* functions.
459  *
460  *  The bcf1_t struct returned by a successful call should be freed
461  *  via bcf_destroy() when it is no longer needed.
462  */
463 bcf1_t* bcf_dup(bcf1_t* src);
464 
465 bcf1_t* bcf_copy(bcf1_t* dst, bcf1_t* src);
466 
467 /// Write one VCF or BCF record. The type is determined at the open() call.
468 /** @param  fp  The file to write to
469     @param  h   The header for the vcf/bcf file
470     @param  v   The bcf1_t structure to write
471     @return 0 on success; -1 on error
472  */
473 int bcf_write(htsFile* fp, bcf_hdr_t* h, bcf1_t* v);
474 
475 /**
476  *  The following functions work only with VCFs and should rarely be called
477  *  directly. Usually one wants to use their bcf_* alternatives, which work
478  *  transparently with both VCFs and BCFs.
479  */
480 /// Read a VCF format header
481 /** @param  fp  The file to read the header from
482     @return Pointer to a populated header structure on success;
483             NULL on failure
484 
485     Use bcf_hdr_read() instead.
486 
487     The bcf_hdr_t struct returned by a successful call should be freed
488     via bcf_hdr_destroy() when it is no longer needed.
489 */
490 bcf_hdr_t* vcf_hdr_read(htsFile* fp);
491 
492 /// Write a VCF format header
493 /** @param  fp  Output file
494     @param  h   The header to write
495     @return 0 on success; -1 on failure
496 
497     Use bcf_hdr_write() instead
498 */
499 int vcf_hdr_write(htsFile* fp, const(bcf_hdr_t)* h);
500 
501 /// Read a record from a VCF file
502 /** @param fp  The file to read the record from
503     @param h   The header for the vcf file
504     @param v   The bcf1_t structure to populate
505     @return 0 on success; -1 on end of file; < -1 on error
506 
507     Use bcf_read() instead
508 */
509 int vcf_read(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v);
510 
511 /// Write a record to a VCF file
512 /** @param  fp  The file to write to
513     @param h   The header for the vcf file
514     @param v   The bcf1_t structure to write
515     @return 0 on success; -1 on error
516 
517     Use bcf_write() instead
518 */
519 int vcf_write(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v);
520 
521 /** Helper function for the bcf_itr_next() macro; internal use, ignore it */
522 int bcf_readrec(
523     BGZF* fp,
524     void* null_,
525     void* v,
526     int* tid,
527     hts_pos_t* beg,
528     hts_pos_t* end);
529 
530 /// Write a line to a VCF file
531 /** @param line   Line to write
532     @param fp     File to write it to
533     @return 0 on success; -1 on failure
534 
535     @note No checks are done on the line being added, apart from
536           ensuring that it ends with a newline.  This function
537           should therefore be used with care.
538 */
539 int vcf_write_line(htsFile* fp, kstring_t* line);
540 
541 /**************************************************************************
542  *  Header querying and manipulation routines
543  **************************************************************************/
544 
545 /** Create a new header using the supplied template
546  *
547  *  The bcf_hdr_t struct returned by a successful call should be freed
548  *  via bcf_hdr_destroy() when it is no longer needed.
549  *  @return NULL on failure, header otherwise
550  */
551 bcf_hdr_t* bcf_hdr_dup(const(bcf_hdr_t)* hdr);
552 
553 /**
554  *  Copy header lines from src to dst if not already present in dst. See also bcf_translate().
555  *  Returns 0 on success or sets a bit on error:
556  *      1 .. conflicting definitions of tag length
557  *      // todo
558  */
559 deprecated("Please use bcf_hdr_merge instead")
560 int bcf_hdr_combine(bcf_hdr_t* dst, const(bcf_hdr_t)* src);
561 
562 /**
563  *  bcf_hdr_merge() - copy header lines from src to dst, see also bcf_translate()
564  *  @param dst: the destination header to be merged into, NULL on the first pass
565  *  @param src: the source header
566  *  @return NULL on failure, header otherwise
567  *
568  *  Notes:
569  *      - use as:
570  *          bcf_hdr_t *dst = NULL;
571  *          for (i=0; i<nsrc; i++) dst = bcf_hdr_merge(dst,src[i]);
572  *
573  *      - bcf_hdr_merge() replaces bcf_hdr_combine() which had a problem when
574  *      combining multiple BCF headers. The current bcf_hdr_combine()
575  *      does not have this problem, but became slow when used for many files.
576  */
577 bcf_hdr_t* bcf_hdr_merge(bcf_hdr_t* dst, const(bcf_hdr_t)* src);
578 
579 /**
580  *  bcf_hdr_add_sample() - add a new sample.
581  *  @param sample:  sample name to be added
582  *
583  *  Note:
584  *      After all samples have been added, the internal header structure must be updated
585  *      by calling bcf_hdr_sync(). This is normally done automatically by the first bcf_hdr_write()
586  *      or bcf_write() call. Otherwise, the caller must force the update by calling bcf_hdr_sync()
587  *      explicitly.
588  */
589 int bcf_hdr_add_sample(bcf_hdr_t* hdr, const(char)* sample);
590 
591 /** Read VCF header from a file and update the header */
592 int bcf_hdr_set(bcf_hdr_t* hdr, const(char)* fname);
593 
594 /// Appends formatted header text to _str_.
595 /** If _is_bcf_ is zero, `IDX` fields are discarded.
596  *  @return 0 if successful, or negative if an error occurred
597  *  @since 1.4
598  */
599 int bcf_hdr_format(const(bcf_hdr_t)* hdr, int is_bcf, kstring_t* str);
600 
601 /** Returns formatted header (newly allocated string) and its length,
602  *  excluding the terminating \0. If is_bcf parameter is unset, IDX
603  *  fields are discarded.
604  *  @deprecated Use bcf_hdr_format() instead as it can handle huge headers.
605  */
606 deprecated("use bcf_hdr_format() instead")
607 char* bcf_hdr_fmt_text(const(bcf_hdr_t)* hdr, int is_bcf, int* len);
608 
609 /** Append new VCF header line, returns 0 on success */
610 int bcf_hdr_append(bcf_hdr_t* h, const(char)* line);
611 
612 int bcf_hdr_printf(bcf_hdr_t* h, const(char)* format, ...);
613 
614 /** VCF version, e.g. VCFv4.2 */
615 const(char)* bcf_hdr_get_version(const(bcf_hdr_t)* hdr);
616 
617 /// Set version in bcf header
618 /**
619    @param hdr     BCF header struct
620    @param version Version to set, e.g. "VCFv4.3"
621    @return 0 on success; < 0 on error
622  */
623 int bcf_hdr_set_version(bcf_hdr_t* hdr, const(char)* version_);
624 
625 /**
626  *  bcf_hdr_remove() - remove VCF header tag
627  *  @param type:      one of BCF_HL_*
628  *  @param key:       tag name or NULL to remove all tags of the given type
629  */
630 void bcf_hdr_remove(bcf_hdr_t* h, int type, const(char)* key);
631 
632 /**
633  *  bcf_hdr_subset() - creates a new copy of the header removing unwanted samples
634  *  @param n:        number of samples to keep
635  *  @param samples:  names of the samples to keep
636  *  @param imap:     mapping from index in @samples to the sample index in the original file
637  *  @return NULL on failure, header otherwise
638  *
639  *  Sample names not present in h0 are ignored. The number of unmatched samples can be checked
640  *  by comparing n and bcf_hdr_nsamples(out_hdr).
641  *  This function can be used to reorder samples.
642  *  See also bcf_subset() which subsets individual records.
643  *  The bcf_hdr_t struct returned by a successful call should be freed
644  *  via bcf_hdr_destroy() when it is no longer needed.
645  */
646  /// NOTE: char *const* samples really exmplifies what I hate about C pointers
647 /// My interpretation of this is it is equivalent to char **samples, but that the outer pointer is const
648 /// which in D would be const(char *)*samples. I don't know what it implies about constancy of *samples or samples.
649 bcf_hdr_t* bcf_hdr_subset(
650     const(bcf_hdr_t)* h0,
651     int n,
652     const(char*)* samples,
653     int* imap);
654 
655 /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */
656 const(char*)* bcf_hdr_seqnames(const(bcf_hdr_t)* h, int* nseqs);
657 
658 /** Get number of samples */
659 pragma(inline, true) auto bcf_hdr_nsamples (bcf_hdr_t *hdr)
660 {
661     return hdr.n[BCF_DT_SAMPLE];
662 }
663 
664 /** The following functions are for internal use and should rarely be called directly */
665 int bcf_hdr_parse(bcf_hdr_t* hdr, char* htxt);
666 
667 /// Synchronize internal header structures
668 /** @param h  Header
669     @return 0 on success, -1 on failure
670 
671     This function updates the id, sample and contig arrays in the
672     bcf_hdr_t structure so that they point to the same locations as
673     the id, sample and contig dictionaries.
674 */
675 int bcf_hdr_sync(bcf_hdr_t* h);
676 
677 /**
678  * bcf_hdr_parse_line() - parse a single line of VCF textual header
679  * @param h     BCF header struct
680  * @param line  One or more lines of header text
681  * @param len   Filled out with length data parsed from 'line'.
682  * @return bcf_hrec_t* on success;
683  *         NULL on error or on end of header text.
684  *         NB: to distinguish error from end-of-header, check *len:
685  *           *len == 0 indicates @p line did not start with "##"
686  *           *len == -1 indicates failure, likely due to out of memory
687  *           *len > 0 indicates a malformed header line
688  *
689  * If *len > 0 on exit, it will contain the full length of the line
690  * including any trailing newline (this includes cases where NULL was
691  * returned due to a malformed line).  Callers can use this to skip to
692  * the next header line.
693  */
694 bcf_hrec_t* bcf_hdr_parse_line(
695     const(bcf_hdr_t)* h,
696     const(char)* line,
697     int* len);
698 /// Convert a bcf header record to string form
699 /**
700  * @param hrec    Header record
701  * @param str     Destination kstring
702  * @return 0 on success; < 0 on error
703  */
704 int bcf_hrec_format(const(bcf_hrec_t)* hrec, kstring_t* str);
705 
706 int bcf_hdr_add_hrec(bcf_hdr_t* hdr, bcf_hrec_t* hrec);
707 
708 /**
709  *  bcf_hdr_get_hrec() - get header line info
710  *  @param type:  one of the BCF_HL_* types: FLT,INFO,FMT,CTG,STR,GEN
711  *  @param key:   the header key for generic lines (e.g. "fileformat"), any field
712  *                  for structured lines, typically "ID".
713  *  @param value: the value which pairs with key. Can be be NULL for BCF_HL_GEN
714  *  @param str_class: the class of BCF_HL_STR line (e.g. "ALT" or "SAMPLE"), otherwise NULL
715  */
716 bcf_hrec_t* bcf_hdr_get_hrec(
717     const(bcf_hdr_t)* hdr,
718     int type,
719     const(char)* key,
720     const(char)* value,
721     const(char)* str_class);
722 
723 /// Duplicate a header record
724 /** @param hrec   Header record to copy
725     @return A new header record on success; NULL on failure
726 
727     The bcf_hrec_t struct returned by a successful call should be freed
728     via bcf_hrec_destroy() when it is no longer needed.
729 */
730 bcf_hrec_t* bcf_hrec_dup(bcf_hrec_t* hrec);
731 
732 /// Add a new header record key
733 /** @param hrec  Header record
734     @param str   Key name
735     @param len   Length of @p str
736     @return 0 on success; -1 on failure
737 */
738 int bcf_hrec_add_key(bcf_hrec_t* hrec, const(char)* str, size_t len);
739 
740 /// Set a header record value
741 /** @param hrec      Header record
742     @param i         Index of value
743     @param str       Value to set
744     @param len       Length of @p str
745     @param is_quoted Value should be quoted
746     @return 0 on success; -1 on failure
747 */
748 int bcf_hrec_set_val(
749     bcf_hrec_t* hrec,
750     int i,
751     const(char)* str,
752     size_t len,
753     int is_quoted);
754 
755 /// Lookup header record by key
756 int bcf_hrec_find_key(bcf_hrec_t* hrec, const(char)* key);
757 
758 /// Add an IDX header record
759 /** @param hrec   Header record
760     @param idx    IDX value to add
761     @return 0 on success; -1 on failure
762 */
763 int hrec_add_idx(bcf_hrec_t* hrec, int idx);
764 
765 /// Free up a header record and associated structures
766 /** @param hrec  Header record
767  */
768 void bcf_hrec_destroy(bcf_hrec_t* hrec);
769 
770 /**************************************************************************
771  *  Individual record querying and manipulation routines
772  **************************************************************************/
773 
774 /** See the description of bcf_hdr_subset() */
775 int bcf_subset(const(bcf_hdr_t)* h, bcf1_t* v, int n, int* imap);
776 
777 /**
778  *  bcf_translate() - translate tags ids to be consistent with different header. This function
779  *                    is useful when lines from multiple VCF need to be combined.
780  *  @dst_hdr:   the destination header, to be used in bcf_write(), see also bcf_hdr_combine()
781  *  @src_hdr:   the source header, used in bcf_read()
782  *  @src_line:  line obtained by bcf_read()
783  */
784 int bcf_translate(
785     const(bcf_hdr_t)* dst_hdr,
786     bcf_hdr_t* src_hdr,
787     bcf1_t* src_line);
788 
789 
790 /**
791  *  @param rec   BCF/VCF record
792  *  @return Types of variant present
793  *
794  *  The return value will be a bitwise-or of VCF_SNP, VCF_MNP,
795  *  VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP.  If will return
796  *  VCF_REF (i.e. 0) if none of the other types is present.
797  *  @deprecated Please use bcf_has_variant_types() instead
798  */
799 int bcf_get_variant_types(bcf1_t* rec);
800 
801 
802 /*
803  *  @param  rec        BCF/VCF record
804  *  @param  ith_allele Allele to check
805  *  @return Type of variant present
806  *
807  *  The return value will be one of VCF_REF, VCF_SNP, VCF_MNP,
808  *  VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP.
809  *  @deprecated Please use bcf_has_variant_type() instead
810  */
811 int bcf_get_variant_type(bcf1_t* rec, int ith_allele);
812 
813 
814 enum bcf_variant_match
815 {
816     bcf_match_exact = 0, 
817     bcf_match_overlap = 1, 
818     bcf_match_subset = 2 
819 }
820 
821 
822 /*
823  *  @param rec      BCF/VCF record
824  *  @param bitmask  Set of variant types to test for
825  *  @param mode     Match mode
826  *  @return >0 if the variant types are present,
827  *           0 if not present,
828  *          -1 on error
829  *
830  *  @p bitmask should be the bitwise-or of the variant types (VCF_SNP,
831  *     VCF_MNP, etc.) to test for.
832  *
833  *  The return value is the bitwise-and of the set of types present
834  *  and @p bitmask.  Callers that want to check for the presence of more
835  *  than one type can avoid function call overhead by passing all the
836  *  types to be checked for in a single call to this function, in
837  *  bcf_match_overlap mode, and then check for them individually in the
838  *  returned value.
839  *
840  *  As VCF_REF is represented by 0 (i.e. the absence of other variants)
841  *  it should be tested for using
842  *    bcf_has_variant_types(rec, VCF_REF, bcf_match_exact)
843  *  which will return 1 if no other variant type is present, otherwise 0.
844  */
845 int bcf_has_variant_types(bcf1_t* rec, uint bitmask, bcf_variant_match mode);
846 
847 
848 /*
849  *  @param rec         BCF/VCF record
850  *  @param ith_allele  Allele to check
851  *  @param bitmask     Set of variant types to test for
852  *  @return >0 if one of the variant types is present,
853  *           0 if not present,
854  *          -1 on error
855  *
856  *  @p bitmask should be the bitwise-or of the variant types (VCF_SNP,
857  *     VCF_MNP, etc.) to test for, or VCF_REF on its own.
858  *
859  *  The return value is the bitwise-and of the set of types present
860  *  and @p bitmask.  Callers that want to check for the presence of more
861  *  than one type can avoid function call overhead by passing all the
862  *  types to be checked for in a single call to this function, and then
863  *  check for them individually in the returned value.
864  *
865  *  As a special case, if @p bitmask is VCF_REF (i.e. 0), the function
866  *  tests for an exact match.  The return value will be 1 if the
867  *  variant type calculated for the allele is VCF_REF, otherwise if
868  *  any other type is present it will be 0.
869  */
870 int bcf_has_variant_type(bcf1_t* rec, int ith_allele, uint bitmask);
871 
872 
873 /*
874  *  @param rec         BCF/VCF record
875  *  @param ith_allele  Allele index
876  *  @return The number of bases affected (negative for deletions),
877  *          or bcf_int32_missing on error.
878  */
879 int bcf_variant_length(bcf1_t* rec, int ith_allele);
880 
881 int bcf_is_snp(bcf1_t* v);
882 
883 /**
884  *  bcf_update_filter() - sets the FILTER column
885  *  @flt_ids:  The filter IDs to set, numeric IDs returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS")
886  *  @n:        Number of filters. If n==0, all filters are removed
887  */
888 int bcf_update_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, int* flt_ids, int n);
889 /**
890  *  bcf_add_filter() - adds to the FILTER column
891  *  @flt_id:   filter ID to add, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS")
892  *
893  *  If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed.
894  */
895 int bcf_add_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, int flt_id);
896 /**
897  *  bcf_remove_filter() - removes from the FILTER column
898  *  @flt_id:   filter ID to remove, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS")
899  *  @pass:     when set to 1 and no filters are present, set to PASS
900  */
901 int bcf_remove_filter(
902     const(bcf_hdr_t)* hdr,
903     bcf1_t* line,
904     int flt_id,
905     int pass);
906 /**
907  *  Returns 1 if present, 0 if absent, or -1 if filter does not exist. "PASS" and "." can be used interchangeably.
908  */
909 int bcf_has_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, char* filter);
910 /**
911  *  bcf_update_alleles() and bcf_update_alleles_str() - update REF and ALT column
912  *  @alleles:           Array of alleles
913  *  @nals:              Number of alleles
914  *  @alleles_string:    Comma-separated alleles, starting with the REF allele
915  */
916 int bcf_update_alleles(
917     const(bcf_hdr_t)* hdr,
918     bcf1_t* line,
919     const(char*)* alleles,
920     int nals);
921 
922 int bcf_update_alleles_str(
923     const(bcf_hdr_t)* hdr,
924     bcf1_t* line,
925     const(char)* alleles_string);
926 
927 /**
928   *  bcf_update_id() - sets new ID string
929   *  bcf_add_id() - adds to the ID string checking for duplicates
930   */
931 int bcf_update_id(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* id);
932 
933 int bcf_add_id(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* id);
934 
935 /**
936  *  bcf_update_info_*() - functions for updating INFO fields
937  *  @param hdr:       the BCF header
938  *  @param line:      VCF line to be edited
939  *  @param key:       the INFO tag to be updated
940  *  @param values:    pointer to the array of values. Pass NULL to remove the tag.
941  *  @param n:         number of values in the array. When set to 0, the INFO tag is removed
942  *  @return 0 on success or negative value on error.
943  *
944  *  The @p string in bcf_update_info_flag() is optional,
945  *  @p n indicates whether the flag is set or removed.
946  *
947  *  Note that updating an END info tag will cause line->rlen to be
948  *  updated as a side-effect (removing the tag will set it to the
949  *  string length of the REF allele). If line->pos is being changed as
950  *  well, it is important that this is done before calling
951  *  bcf_update_info_int32() to update the END tag, otherwise rlen will be
952  *  set incorrectly.  If the new END value is less than or equal to
953  *  line->pos, a warning will be printed and line->rlen will be set to
954  *  the length of the REF allele.
955  */
956 pragma(inline, true) {    // TODO: rewrite as template
957     auto bcf_update_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
958         { return bcf_update_info(hdr, line, key, values, n, BCF_HT_INT); }
959     auto bcf_update_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
960         { return bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL); }
961     auto bcf_update_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
962         { return bcf_update_info(hdr, line, key, values, n, BCF_HT_FLAG); }
963     auto bcf_update_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values) // @suppress(dscanner.style.undocumented_declaration)
964         { return bcf_update_info(hdr, line, key, values, 1, BCF_HT_STR); }
965     }
966 
967 int bcf_update_info (
968     const(bcf_hdr_t)* hdr,
969     bcf1_t* line,
970     const(char)* key,
971     const(void)* values,
972     int n,
973     int type);
974 
975 /// Set or update 64-bit integer INFO values
976 /**
977  *  @param hdr:       the BCF header
978  *  @param line:      VCF line to be edited
979  *  @param key:       the INFO tag to be updated
980  *  @param values:    pointer to the array of values. Pass NULL to remove the tag.
981  *  @param n:         number of values in the array. When set to 0, the INFO tag is removed
982  *  @return 0 on success or negative value on error.
983  *
984  *  This function takes an int64_t values array as input.  The data
985  *  actually stored will be shrunk to the minimum size that can
986  *  accept all of the values.
987  *
988  *  INFO values outside of the range BCF_MIN_BT_INT32 to BCF_MAX_BT_INT32
989  *  can only be written to VCF files.
990  */
991 pragma(inline, true)
992 auto bcf_update_info_int64( const(bcf_hdr_t) *hdr, bcf1_t *line,
993                             const(char) *key,
994                             const(long) *values, int n)
995 {
996     return bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG);
997 }
998 
999 /**
1000  *  bcf_update_format_*() - functions for updating FORMAT fields
1001  *  @values:    pointer to the array of values, the same number of elements
1002  *              is expected for each sample. Missing values must be padded
1003  *              with bcf_*_missing or bcf_*_vector_end values.
1004  *  @n:         number of values in the array. If n==0, existing tag is removed.
1005  *
1006  *  The function bcf_update_format_string() is a higher-level (slower) variant of
1007  *  bcf_update_format_char(). The former accepts array of \0-terminated strings
1008  *  whereas the latter requires that the strings are collapsed into a single array
1009  *  of fixed-length strings. In case of strings with variable length, shorter strings
1010  *  can be \0-padded. Note that the collapsed strings passed to bcf_update_format_char()
1011  *  are not \0-terminated.
1012  *
1013  *  Returns 0 on success or negative value on error.
1014  */
1015 
1016 }/// closing @nogc nothrow
1017 
1018 pragma(inline, true) {
1019     auto bcf_update_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(int) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
1020         { return bcf_update_format(hdr, line, key, values, n, BCF_HT_INT); }
1021     auto bcf_update_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(float) *values, int n) // @suppress(dscanner.style.undocumented_declaration)
1022         { return bcf_update_format(hdr, line, key, values, n, BCF_HT_REAL); }
1023     auto bcf_update_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(char) **values, int n) // @suppress(dscanner.style.undocumented_declaration)
1024         { return bcf_update_format(hdr, line, key, values, n, BCF_HT_STR); }
1025     auto bcf_update_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) **gts, int n) // @suppress(dscanner.style.undocumented_declaration)
1026         { return bcf_update_format(hdr, line, toStringz("GT"c), gts, n, BCF_HT_INT); 
1027     }
1028 }
1029 
1030 @nogc nothrow {
1031 
1032 int bcf_update_format_string (
1033     const(bcf_hdr_t)* hdr,
1034     bcf1_t* line,
1035     const(char)* key,
1036     const(char*)* values,
1037     int n);
1038 
1039 int bcf_update_format(
1040     const(bcf_hdr_t)* hdr,
1041     bcf1_t* line,
1042     const(char)* key,
1043     const(void)* values,
1044     int n,
1045     int type);
1046 
1047 /// Macros for setting genotypes correctly, for use with bcf_update_genotypes only; idx corresponds
1048 /// to VCF's GT (1-based index to ALT or 0 for the reference allele) and val is the opposite, obtained
1049 /// from bcf_get_genotypes() below.
1050 // TODO: is int appropriate?
1051 pragma(inline, true) {
1052     auto bcf_gt_phased(int idx)     { return (((idx)+1)<<1|1);  }
1053     /// ditto
1054     auto bcf_gt_unphased(int idx)   { return (((idx)+1)<<1);    }
1055     /// ditto
1056     auto bcf_gt_is_missing(int val) { return ((val)>>1 ? 0 : 1);}
1057     /// ditto
1058     auto bcf_gt_is_phased(int idx)  { return ((idx)&1);         }
1059     /// ditto
1060     auto bcf_gt_allele(int val)     { return (((val)>>1)-1);    }
1061 }
1062 /// ditto
1063     enum int bcf_gt_missing = 0;
1064 
1065 /** Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based) */
1066 pragma(inline, true) {
1067         auto bcf_alleles2gt(int a, int b) { return ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a))); }
1068         /// ditto
1069         void bcf_gt2alleles(int igt, int *a, int *b)
1070         {
1071             int k = 0, dk = 1; // @suppress(dscanner.useless-initializer)
1072             while ( k<igt ) { dk++; k += dk; }
1073             *b = dk - 1; *a = igt - k + *b;
1074         }
1075     }
1076 
1077 /**
1078  * bcf_get_fmt() - returns pointer to FORMAT's field data
1079  * @header: for access to BCF_DT_ID dictionary
1080  * @line:   VCF line obtained from vcf_parse1
1081  * @fmt:    one of GT,PL,...
1082  *
1083  * Returns bcf_fmt_t* if the call succeeded, or returns NULL when the field
1084  * is not available.
1085  */
1086 bcf_fmt_t* bcf_get_fmt(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* key);
1087 
1088 bcf_info_t* bcf_get_info(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* key);
1089 
1090 /**
1091  * bcf_get_*_id() - returns pointer to FORMAT/INFO field data given the header index instead of the string ID
1092  * @line: VCF line obtained from vcf_parse1
1093  * @id:  The header index for the tag, obtained from bcf_hdr_id2int()
1094  *
1095  * Returns bcf_fmt_t* / bcf_info_t*. These functions do not check if the index is valid
1096  * as their goal is to avoid the header lookup.
1097  */
1098 bcf_fmt_t* bcf_get_fmt_id(bcf1_t* line, const int id);
1099 
1100 bcf_info_t* bcf_get_info_id(bcf1_t* line, const int id);
1101 
1102 /**
1103  *  bcf_get_info_*() - get INFO values, integers or floats
1104  *  @param hdr:    BCF header
1105  *  @param line:   BCF record
1106  *  @param tag:    INFO tag to retrieve
1107  *  @param dst:    *dst is pointer to a memory location, can point to NULL
1108  *  @param ndst:   pointer to the size of allocated memory
1109  *  @return  >=0 on success
1110  *          -1 .. no such INFO tag defined in the header
1111  *          -2 .. clash between types defined in the header and encountered in the VCF record
1112  *          -3 .. tag is not present in the VCF record
1113  *          -4 .. the operation could not be completed (e.g. out of memory)
1114  *
1115  *  Returns negative value on error or the number of values (including
1116  *  missing values) put in *dst on success. bcf_get_info_string() returns
1117  *  on success the number of characters stored excluding the nul-
1118  *  terminating byte. bcf_get_info_flag() does not store anything in *dst
1119  *  but returns 1 if the flag is set or 0 if not.
1120  *
1121  *  *dst will be reallocated if it is not big enough (i.e. *ndst is too
1122  *  small) or NULL on entry.  The new size will be stored in *ndst.
1123  */
1124 pragma(inline, true) {
1125     auto bcf_get_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
1126         { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); }
1127     auto bcf_get_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
1128         { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); }
1129     auto bcf_get_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
1130         { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); }
1131     auto bcf_get_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration)
1132         { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_FLAG); }
1133     }
1134 
1135 int bcf_get_info_values (
1136     const(bcf_hdr_t)* hdr,
1137     bcf1_t* line,
1138     const(char)* tag,
1139     void** dst,
1140     int* ndst,
1141     int type);
1142 
1143 /// Put integer INFO values into an int64_t array
1144 /**
1145  *  @param hdr:    BCF header
1146  *  @param line:   BCF record
1147  *  @param tag:    INFO tag to retrieve
1148  *  @param dst:    *dst is pointer to a memory location, can point to NULL
1149  *  @param ndst:   pointer to the size of allocated memory
1150  *  @return  >=0 on success
1151  *          -1 .. no such INFO tag defined in the header
1152  *          -2 .. clash between types defined in the header and encountered in the VCF record
1153  *          -3 .. tag is not present in the VCF record
1154  *          -4 .. the operation could not be completed (e.g. out of memory)
1155  *
1156  *  Returns negative value on error or the number of values (including
1157  *  missing values) put in *dst on success.
1158  *
1159  *  *dst will be reallocated if it is not big enough (i.e. *ndst is too
1160  *  small) or NULL on entry.  The new size will be stored in *ndst.
1161  */
1162 pragma(inline, true)
1163 auto bcf_get_info_int64(const(bcf_hdr_t) *hdr, bcf1_t *line,
1164                                         const(char) *tag, long **dst,
1165                                         int *ndst)
1166 {
1167     return bcf_get_info_values(hdr, line, tag,
1168                                 cast(void **) dst, ndst, BCF_HT_LONG);
1169 }
1170 
1171 /**
1172  *  bcf_get_format_*() - same as bcf_get_info*() above
1173  *
1174  *  The function bcf_get_format_string() is a higher-level (slower) variant of bcf_get_format_char().
1175  *  see the description of bcf_update_format_string() and bcf_update_format_char() above.
1176  *  Unlike other bcf_get_format__*() functions, bcf_get_format_string() allocates two arrays:
1177  *  a single block of \0-terminated strings collapsed into a single array and an array of pointers
1178  *  to these strings. Both arrays must be cleaned by the user.
1179  *
1180  *  Returns negative value on error or the number of written values on success.
1181  *
1182  *  Use the returned number of written values for accessing valid entries of dst, as ndst is only a
1183  *  watermark that can be higher than the returned value, i.e. the end of dst can contain carry-over
1184  *  values from previous calls to bcf_get_format_*() on lines with more values per sample.
1185  *
1186  *  Example:
1187  *      int ndst = 0; char **dst = NULL;
1188  *      if ( bcf_get_format_string(hdr, line, "XX", &dst, &ndst) > 0 )
1189  *          for (i=0; i<bcf_hdr_nsamples(hdr); i++) printf("%s\n", dst[i]);
1190  *      free(dst[0]); free(dst);
1191  *
1192  *  Example:
1193  *      int i, j, ngt, nsmpl = bcf_hdr_nsamples(hdr);
1194  *      int32_t *gt_arr = NULL, ngt_arr = 0;
1195  *
1196  *      ngt = bcf_get_genotypes(hdr, line, &gt_arr, &ngt_arr);
1197  *      if ( ngt<=0 ) return; // GT not present
1198  *
1199  *      int max_ploidy = ngt/nsmpl;
1200  *      for (i=0; i<nsmpl; i++)
1201  *      {
1202  *        int32_t *ptr = gt_arr + i*max_ploidy;
1203  *        for (j=0; j<max_ploidy; j++)
1204  *        {
1205  *           // if true, the sample has smaller ploidy
1206  *           if ( ptr[j]==bcf_int32_vector_end ) break;
1207  *
1208  *           // missing allele
1209  *           if ( bcf_gt_is_missing(ptr[j]) ) continue;
1210  *
1211  *           // the VCF 0-based allele index
1212  *           int allele_index = bcf_gt_allele(ptr[j]);
1213  *
1214  *           // is phased?
1215  *           int is_phased = bcf_gt_is_phased(ptr[j]);
1216  *
1217  *           // .. do something ..
1218  *         }
1219  *      }
1220  *      free(gt_arr);
1221  *
1222  */
1223 
1224 }/// closing @nogc nothrow from line 928
1225 
1226 pragma(inline, true) {
1227     auto bcf_get_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
1228         { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); }
1229     auto bcf_get_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
1230         { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); }
1231     auto bcf_get_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
1232         { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); }
1233     auto bcf_get_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line)
1234         { return bcf_get_format_values(hdr, line, toStringz("GT"c), cast(void**) dst, ndst, BCF_HT_INT); }
1235 }
1236 
1237 @nogc nothrow {
1238 
1239 int bcf_get_format_string(
1240     const(bcf_hdr_t)* hdr,
1241     bcf1_t* line,
1242     const(char)* tag,
1243     char*** dst,
1244     int* ndst);
1245 
1246 int bcf_get_format_values(
1247     const(bcf_hdr_t)* hdr,
1248     bcf1_t* line,
1249     const(char)* tag,
1250     void** dst,
1251     int* ndst,
1252     int type);
1253 
1254 /**************************************************************************
1255  *  Helper functions
1256  **************************************************************************/
1257 
1258 /**
1259  *  bcf_hdr_id2int() - Translates string into numeric ID
1260  *  bcf_hdr_int2id() - Translates numeric ID into string
1261  *  @type:     one of BCF_DT_ID, BCF_DT_CTG, BCF_DT_SAMPLE
1262  *  @id:       tag name, such as: PL, DP, GT, etc.
1263  *
1264  *  Returns -1 if string is not in dictionary, otherwise numeric ID which identifies
1265  *  fields in BCF records.
1266  */
1267 int bcf_hdr_id2int(const(bcf_hdr_t)* hdr, int type, const(char)* id);
1268 
1269 pragma(inline, true)
1270 auto bcf_hdr_int2id(const(bcf_hdr_t) *hdr, int type, int int_id)
1271     { return hdr.id[type][int_id].key; }
1272 
1273 /**
1274  *  bcf_hdr_name2id() - Translates sequence names (chromosomes) into numeric ID
1275  *  bcf_hdr_id2name() - Translates numeric ID to sequence name
1276  */
1277 pragma(inline, true) int bcf_hdr_name2id(const(bcf_hdr_t) *hdr, const(char) *id) { return bcf_hdr_id2int(hdr, BCF_DT_CTG, id); } // @suppress(dscanner.style.long_line)
1278     /// ditto
1279     pragma(inline, true) const(char) *bcf_hdr_id2name(const(bcf_hdr_t) *hdr, int rid) { return hdr.id[BCF_DT_CTG][rid].key; } // @suppress(dscanner.style.long_line)
1280     /// ditto
1281     pragma(inline, true) const(char) *bcf_seqname(const(bcf_hdr_t) *hdr, bcf1_t *rec) { return hdr.id[BCF_DT_CTG][rec.rid].key; } // @suppress(dscanner.style.long_line)
1282 
1283 /** Return CONTIG name, or "(unknown)"
1284 
1285     Like bcf_seqname(), but this function will never return NULL.  If
1286     the contig name cannot be found (either because @p hdr was not
1287     supplied or rec->rid was out of range) it returns the string
1288     "(unknown)".
1289 */
1290 const(char)* bcf_seqname_safe(const(bcf_hdr_t)* hdr, const(bcf1_t)* rec);
1291 
1292 /**
1293  *  bcf_hdr_id2*() - Macros for accessing bcf_idinfo_t
1294  *  @type:      one of BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT
1295  *  @int_id:    return value of bcf_hdr_id2int, must be >=0
1296  *
1297  *  The returned values are:
1298  *     bcf_hdr_id2length   ..  whether the number of values is fixed or variable, one of BCF_VL_*
1299  *     bcf_hdr_id2number   ..  the number of values, 0xfffff for variable length fields
1300  *     bcf_hdr_id2type     ..  the field type, one of BCF_HT_*
1301  *     bcf_hdr_id2coltype  ..  the column type, one of BCF_HL_*
1302  *
1303  *  Notes: Prior to using the macros, the presence of the info should be
1304  *  tested with bcf_hdr_idinfo_exists().
1305  */
1306 // TODO: for dict_type and col_type use ENUMs
1307 pragma(inline, true) {
1308     auto bcf_hdr_id2length (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>8 & 0xf); } // @suppress(dscanner.style.long_line)
1309     /// ditto
1310     auto bcf_hdr_id2number (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>12);    } // @suppress(dscanner.style.long_line)
1311     /// ditto
1312     uint bcf_hdr_id2type (const(bcf_hdr_t) *hdr, int type, int int_id)   { return cast(uint)((hdr).id[BCF_DT_ID][int_id].val.info[type]>>4 & 0xf); } // @suppress(dscanner.style.long_line)
1313     /// ditto
1314     uint bcf_hdr_id2coltype (const(bcf_hdr_t) *hdr, int type, int int_id){ return cast(uint)((hdr).id[BCF_DT_ID][int_id].val.info[type] & 0xf); } // @suppress(dscanner.style.long_line)
1315     /// ditto
1316     auto bcf_hdr_idinfo_exists (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((int_id<0 || bcf_hdr_id2coltype(hdr,type,int_id)==0xf) ? 0 : 1); } // @suppress(dscanner.style.long_line)
1317     /// ditto
1318     auto bcf_hdr_id2hrc (const(bcf_hdr_t) *hdr, int dict_type, int col_type, int int_id)
1319         { return ((hdr).id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val.hrec[(dict_type)==BCF_DT_CTG?0:(col_type)]); // @suppress(dscanner.style.long_line)
1320     }
1321 } 
1322 /// Convert BCF FORMAT data to string form
1323 /**
1324  * @param s    kstring to write into
1325  * @param n    number of items in @p data
1326  * @param type type of items in @p data
1327  * @param data BCF format data
1328  * @return  0 on success
1329  *         -1 if out of memory
1330  */
1331 int bcf_fmt_array(kstring_t* s, int n, int type, void* data);
1332 
1333 ubyte* bcf_fmt_sized_array(kstring_t* s, ubyte* ptr);
1334 
1335 /// Encode a variable-length char array in BCF format
1336 /**
1337  * @param s    kstring to write into
1338  * @param l    length of input
1339  * @param a    input data to encode
1340  * @return 0 on success; < 0 on error
1341  */
1342 int bcf_enc_vchar(kstring_t* s, int l, const(char)* a);
1343 
1344 /// Encode a variable-length integer array in BCF format
1345 /**
1346  * @param s      kstring to write into
1347  * @param n      total number of items in @p a (<= 0 to encode BCF_BT_NULL)
1348  * @param a      input data to encode
1349  * @param wsize  vector length (<= 0 is equivalent to @p n)
1350  * @return 0 on success; < 0 on error
1351  * @note @p n should be an exact multiple of @p wsize
1352  */
1353 int bcf_enc_vint(kstring_t* s, int n, int* a, int wsize);
1354 
1355 /// Encode a variable-length float array in BCF format
1356 /**
1357  * @param s      kstring to write into
1358  * @param n      total number of items in @p a (<= 0 to encode BCF_BT_NULL)
1359  * @param a      input data to encode
1360  * @return 0 on success; < 0 on error
1361  */
1362 int bcf_enc_vfloat(kstring_t* s, int n, float* a);
1363 
1364 /**************************************************************************
1365  *  BCF index
1366  *
1367  *  Note that these functions work with BCFs only. See synced_bcf_reader.h
1368  *  which provides (amongst other things) an API to work transparently with
1369  *  both indexed BCFs and VCFs.
1370  **************************************************************************/
1371 
1372 alias bcf_itr_destroy = hts_itr_destroy;
1373 
1374 } /// closing @nogc nothrow from line 1136
1375 
1376 pragma(inline, true) {
1377     /// Generate an iterator for an integer-based range query
1378     auto bcf_itr_queryi(const(hts_idx_t) *idx, int tid, int beg, int end)
1379         { return hts_itr_query(idx, tid, beg, end, &bcf_readrec); }
1380     
1381     /// Generate an iterator for a string-based range query
1382     auto bcf_itr_querys(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, const(char) *s)
1383         { return hts_itr_querys(idx, s, cast(hts_name2id_f) &bcf_hdr_name2id, cast(void *) hdr,
1384                                 &hts_itr_query, &bcf_readrec); }
1385 
1386     /// Iterate through the range
1387     /// r should (probably) point to your VCF (BCF) row structure
1388     /// TODO: attempt to define parameter r as bcf1_t *, which is what I think it should be
1389     int bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) {
1390         if (htsfp.is_bgzf)
1391             return hts_itr_next(htsfp.fp.bgzf, itr, r, null);
1392 
1393         hts_log_error(__FUNCTION__,"Only bgzf compressed files can be used with iterators");
1394         errno = EINVAL;
1395         return -2;
1396     }
1397 
1398 @nogc nothrow:
1399 
1400 /// Load a BCF index
1401 /** @param fn   BCF file name
1402     @return The index, or NULL if an error occurred.
1403      @note This only works for BCF files.  Consider synced_bcf_reader instead
1404 which works for both BCF and VCF.
1405 */
1406 auto bcf_index_load(const(char) *fn) { return hts_idx_load(fn, HTS_FMT_CSI); }
1407 
1408 /// Get a list (char **) of sequence names from the index -- free only the array, not the values
1409 auto bcf_index_seqnames(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, int *nptr)
1410     { return hts_idx_seqnames(idx, nptr, cast(hts_id2name_f) &bcf_hdr_id2name, cast(void *) hdr); }
1411 }
1412 
1413 /// Load a BCF index from a given index file name
1414 /**  @param fn     Input BAM/BCF/etc filename
1415      @param fnidx  The input index filename
1416      @return  The index, or NULL if an error occurred.
1417      @note This only works for BCF files.  Consider synced_bcf_reader instead
1418 which works for both BCF and VCF.
1419 */
1420 hts_idx_t* bcf_index_load2(const(char)* fn, const(char)* fnidx);
1421 
1422 /// Load a BCF index from a given index file name
1423 /**  @param fn     Input BAM/BCF/etc filename
1424      @param fnidx  The input index filename
1425      @param flags  Flags to alter behaviour (see description)
1426      @return  The index, or NULL if an error occurred.
1427      @note This only works for BCF files.  Consider synced_bcf_reader instead
1428 which works for both BCF and VCF.
1429 
1430      The @p flags parameter can be set to a combination of the following
1431      values:
1432 
1433         HTS_IDX_SAVE_REMOTE   Save a local copy of any remote indexes
1434         HTS_IDX_SILENT_FAIL   Fail silently if the index is not present
1435 
1436      Equivalent to hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags);
1437 */
1438 hts_idx_t* bcf_index_load3(const(char)* fn, const(char)* fnidx, int flags);
1439 
1440 /**
1441  *  bcf_index_build() - Generate and save an index file
1442  *  @fn:         Input VCF(compressed)/BCF filename
1443  *  @min_shift:  log2(width of the smallest bin), e.g. a value of 14
1444  *  imposes a 16k base lower limit on the width of index bins.
1445  *  Positive to generate CSI, or 0 to generate TBI. However, a small
1446  *  value of min_shift would create a large index, which would lead to
1447  *  reduced performance when using the index. A recommended value is 14.
1448  *  For BCF files, only the CSI index can be generated.
1449  *
1450  *  Returns 0 if successful, or negative if an error occurred.
1451  *
1452  *  List of error codes:
1453  *      -1 .. indexing failed
1454  *      -2 .. opening @fn failed
1455  *      -3 .. format not indexable
1456  *      -4 .. failed to create and/or save the index
1457  */
1458 int bcf_index_build(const(char)* fn, int min_shift);
1459 
1460 /**
1461  *  bcf_index_build2() - Generate and save an index to a specific file
1462  *  @fn:         Input VCF/BCF filename
1463  *  @fnidx:      Output filename, or NULL to add .csi/.tbi to @fn
1464  *  @min_shift:  Positive to generate CSI, or 0 to generate TBI
1465  *
1466  *  Returns 0 if successful, or negative if an error occurred.
1467  *
1468  *  List of error codes:
1469  *      -1 .. indexing failed
1470  *      -2 .. opening @fn failed
1471  *      -3 .. format not indexable
1472  *      -4 .. failed to create and/or save the index
1473  */
1474 int bcf_index_build2(const(char)* fn, const(char)* fnidx, int min_shift);
1475 
1476 /**
1477  *  bcf_index_build3() - Generate and save an index to a specific file
1478  *  @fn:         Input VCF/BCF filename
1479  *  @fnidx:      Output filename, or NULL to add .csi/.tbi to @fn
1480  *  @min_shift:  Positive to generate CSI, or 0 to generate TBI
1481  *  @n_threads:  Number of VCF/BCF decoder threads
1482  *
1483  *  Returns 0 if successful, or negative if an error occurred.
1484  *
1485  *  List of error codes:
1486  *      -1 .. indexing failed
1487  *      -2 .. opening @fn failed
1488  *      -3 .. format not indexable
1489  *      -4 .. failed to create and/or save the index
1490  */
1491 int bcf_index_build3(
1492     const(char)* fn,
1493     const(char)* fnidx,
1494     int min_shift,
1495     int n_threads);
1496 
1497 /// Initialise fp->idx for the current format type, for VCF and BCF files.
1498 /** @param fp        File handle for the data file being written.
1499     @param h         BCF header structured (needed for BAI and CSI).
1500     @param min_shift CSI bin size (CSI default is 14).
1501     @param fnidx     Filename to write index to.  This pointer must remain valid
1502                      until after bcf_idx_save is called.
1503     @return          0 on success, <0 on failure.
1504     @note This must be called after the header has been written, but before
1505           any other data.
1506 */
1507 int bcf_idx_init(htsFile* fp, bcf_hdr_t* h, int min_shift, const(char)* fnidx);
1508 
1509 /// Writes the index initialised with bcf_idx_init to disk.
1510 /** @param fp        File handle for the data file being written.
1511     @return          0 on success, <0 on failure.
1512 */
1513 int bcf_idx_save(htsFile* fp);
1514 
1515 /*******************
1516  * Typed value I/O *
1517  *******************/
1518 
1519 /**
1520     Note that in contrast with BCFv2.1 specification, HTSlib implementation
1521     allows missing values in vectors. For integer types, the values 0x80,
1522     0x8000, 0x80000000 are interpreted as missing values and 0x81, 0x8001,
1523     0x80000001 as end-of-vector indicators.  Similarly for floats, the value of
1524     0x7F800001 is interpreted as a missing value and 0x7F800002 as an
1525     end-of-vector indicator.
1526     Note that the end-of-vector byte is not part of the vector.
1527 
1528     This trial BCF version (v2.2) is compatible with the VCF specification and
1529     enables to handle correctly vectors with different ploidy in presence of
1530     missing values.
1531  */
1532 enum bcf_int8_vector_end = -127; /* INT8_MIN  + 1 */
1533 enum bcf_int16_vector_end = -32_767; /* INT16_MIN + 1 */
1534 enum bcf_int32_vector_end = -2_147_483_647; /* INT32_MIN + 1 */
1535 enum bcf_int64_vector_end = -9_223_372_036_854_775_807L; /* INT64_MIN + 1 */
1536 enum bcf_str_vector_end = 0;
1537 enum bcf_int8_missing = -128; /* INT8_MIN  */
1538 enum bcf_int16_missing = -32_767 - 1; /* INT16_MIN */
1539 enum bcf_int32_missing = -2_147_483_647 - 1; /* INT32_MIN */
1540 enum bcf_int64_missing = -9_223_372_036_854_775_807L - 1L; /* INT64_MIN */
1541 enum bcf_str_missing = 0x07;
1542 
1543 // Limits on BCF values stored in given types.  Max values are the same
1544 // as for the underlying type.  Min values are slightly different as
1545 // the last 8 values for each type were reserved by BCFv2.2.
1546 enum BCF_MAX_BT_INT8 = 0x7f; /* INT8_MAX  */
1547 enum BCF_MAX_BT_INT16 = 0x7fff; /* INT16_MAX */
1548 enum BCF_MAX_BT_INT32 = 0x7fffffff; /* INT32_MAX */
1549 enum BCF_MIN_BT_INT8 = -120; /* INT8_MIN  + 8 */
1550 enum BCF_MIN_BT_INT16 = -32_760; /* INT16_MIN + 8 */
1551 enum BCF_MIN_BT_INT32 = -2_147_483_640; /* INT32_MIN + 8 */
1552 
1553 extern __gshared uint bcf_float_vector_end;
1554 extern __gshared uint bcf_float_missing;
1555 version(LDC) pragma(inline, true):
1556 version(GNU) pragma(inline, true):
1557 /** u wot */
1558 void bcf_float_set(float *ptr, uint32_t value)
1559 {
1560     union U { uint32_t i; float f; }
1561     U u;
1562     u.i = value;
1563     *ptr = u.f;
1564 }
1565 
1566 /// float vector macros
1567 void bcf_float_set_vector_end(float x) { bcf_float_set(&x, bcf_float_vector_end); }
1568 /// ditto
1569 void bcf_float_set_missing(float x) { bcf_float_set(&x, bcf_float_missing); }
1570 
1571 /** u wot */
1572 pragma(inline, true)
1573 int bcf_float_is_missing(float f)
1574 {
1575     union U { uint32_t i; float f; }
1576     U u;
1577     u.f = f;
1578     return u.i==bcf_float_missing ? 1 : 0;
1579 }
1580 /// ditto
1581 pragma(inline, true)
1582 int bcf_float_is_vector_end(float f)
1583 {
1584     union U { uint32_t i; float f; }
1585     U u;
1586     u.f = f;
1587     return u.i==bcf_float_vector_end ? 1 : 0;
1588 }
1589 
1590 /// (Undocumented) Format GT field
1591 pragma(inline, true)
1592 int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str)
1593 {
1594     uint32_t e = 0;
1595     void branch(T)()    // gets a closure over e (was #define macro)
1596     if (is(T == int8_t) || is(T == int16_t) || is(T == int32_t))
1597     {
1598         static if (is(T == int8_t))
1599             auto vector_end = bcf_int8_vector_end;
1600         else static if (is(T == int16_t))
1601             auto vector_end = bcf_int16_vector_end;
1602         else
1603             auto vector_end = bcf_int32_vector_end;
1604 
1605         T *ptr = cast(T*) (fmt.p + (isample * fmt.size));
1606         for (int i=0; i<fmt.n && ptr[i] != vector_end; i++)
1607         {
1608             if ( i ) e |= kputc("/|"[ptr[i]&1], str) < 0;
1609             if ( !(ptr[i]>>1) ) e |= kputc('.', str) < 0;
1610             else e |= kputw((ptr[i]>>1) - 1, str) < 0;
1611         }
1612         if (i == 0) e |= kputc('.', str) < 0;
1613     }
1614     switch (fmt.type) {
1615         case BCF_BT_INT8:  branch!int8_t; break;
1616         case BCF_BT_INT16: branch!int16_t; break;
1617         case BCF_BT_INT32: branch!int32_t; break;
1618         case BCF_BT_NULL:  e |= kputc('.', str) < 0; break;
1619         default: hts_log_error("Unexpected type %d", fmt.type); return -2;
1620     }
1621 
1622     return e == 0 ? 0 : -1;
1623 }
1624 
1625 
1626 pragma(inline, true)
1627 int bcf_enc_size(kstring_t *s, int size, int type)
1628 {
1629     uint32_t e = 0;
1630     if (size >= 15) {
1631         e |= kputc(15<<4|type, s) < 0;
1632         if (size >= 128) {
1633             if (size >= 32_768) {
1634                 int32_t x = size;
1635                 e |= kputc(1<<4|BCF_BT_INT32, s) < 0;
1636                 e |= kputsn(cast(char*)&x, 4, s) < 0;
1637             } else {
1638                 int16_t x = size;
1639                 e |= kputc(1<<4|BCF_BT_INT16, s) < 0;
1640                 e |= kputsn(cast(char*)&x, 2, s) < 0;
1641             }
1642         } else {
1643             e |= kputc(1<<4|BCF_BT_INT8, s) < 0;
1644             e |= kputc(size, s) < 0;
1645         }
1646     } else e |= kputc(size<<4|type, s) < 0;
1647     return e == 0 ? 0 : -1;
1648 }
1649 
1650 
1651 /// Undocumented Encode integer type?
1652 pragma(inline, true)
1653 int bcf_enc_inttype(long x)
1654 {
1655     if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) return BCF_BT_INT8;
1656     if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) return BCF_BT_INT16;
1657     return BCF_BT_INT32;
1658 }
1659 
1660 /// Undocumented Encode integer variant 1
1661 pragma(inline, true)
1662 int bcf_enc_int1(kstring_t *s, int32_t x)
1663 {
1664     uint32_t e = 0;
1665     if (x == bcf_int32_vector_end) {
1666         e |= bcf_enc_size(s, 1, BCF_BT_INT8);
1667         e |= kputc(bcf_int8_vector_end, s) < 0;
1668     } else if (x == bcf_int32_missing) {
1669         e |= bcf_enc_size(s, 1, BCF_BT_INT8);
1670         e |= kputc(bcf_int8_missing, s) < 0;
1671     } else if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) {
1672         e |= bcf_enc_size(s, 1, BCF_BT_INT8);
1673         e |= kputc(x, s) < 0;
1674     } else if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) {
1675         int16_t z = x;
1676         e |= bcf_enc_size(s, 1, BCF_BT_INT16);
1677         e |= kputsn(cast(char*)&z, 2, s) < 0;
1678     } else {
1679         int32_t z = x;
1680         e |= bcf_enc_size(s, 1, BCF_BT_INT32);
1681         e |= kputsn(cast(char*)&z, 4, s) < 0;
1682     }
1683     return e == 0 ? 0 : -1;
1684 }
1685 /// Return the value of a single typed integer.
1686 /** @param      p    Pointer to input data block.
1687     @param      type One of the BCF_BT_INT* type codes
1688     @param[out] q    Location to store an updated value for p
1689     @return The integer value, or zero if @p type is not valid.
1690 
1691 If @p type is not one of BCF_BT_INT8, BCF_BT_INT16, BCF_BT_INT32 or
1692 BCF_BT_INT64, zero will be returned and @p *q will not be updated.
1693 Otherwise, the integer value will be returned and @p *q will be set
1694 to the memory location immediately following the integer value.
1695 
1696 Cautious callers can detect invalid type codes by checking that *q has
1697 actually been updated.
1698 */
1699 pragma(inline, true)
1700 int64_t bcf_dec_int1(const(ubyte) *p, int type, ubyte **q)
1701 {
1702     if (type == BCF_BT_INT8) {
1703         *q = cast(ubyte*)p + 1;
1704         return le_to_i8(p);
1705     } else if (type == BCF_BT_INT16) {
1706         *q = cast(ubyte*)p + 2;
1707         return le_to_i16(p);
1708     } else if (type == BCF_BT_INT32) {
1709         *q = cast(ubyte*)p + 4;
1710         return le_to_i32(p);
1711     } else if (type == BCF_BT_INT64) {
1712         *q = cast(ubyte*)p + 4;
1713         return le_to_i64(p);
1714     } else { // Invalid type.
1715         return 0;
1716     }
1717 }
1718 
1719 /// Return the value of a single typed integer from a byte stream.
1720 /** @param      p    Pointer to input data block.
1721     @param[out] q    Location to store an updated value for p
1722     @return The integer value, or zero if the type code was not valid.
1723 
1724 Reads a one-byte type code from @p p, and uses it to decode an integer
1725 value from the following bytes in @p p.
1726 
1727 If the type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero
1728 will be returned and @p *q will unchanged.  Otherwise, the integer value will
1729 be returned and @p *q will be set to the memory location immediately following
1730 the integer value.
1731 
1732 Cautious callers can detect invalid type codes by checking that *q has
1733 actually been updated.
1734 */
1735 pragma(inline, true)
1736 long bcf_dec_typed_int1 (const(ubyte)* p, ubyte** q)
1737 {
1738     return bcf_dec_int1(p + 1, *p&0xf, q);
1739 }
1740 
1741 pragma(inline, true)
1742 int bcf_dec_size (const(ubyte)* p, ubyte** q, int* type)
1743 {
1744     *type = *p & 0xf;
1745     if (*p>>4 != 15) {
1746         *q = cast(ubyte*)p + 1;
1747         return *p>>4;
1748     } else return bcf_dec_typed_int1(p + 1, q);
1749 }