1 /// D bindings to htslib-1.9 vcf 2 /// Copyright 2018 James S Blachly, MD 3 /// Changes are MIT licensed 4 /// Section numbers refer to VCF Specification v4.2: https://samtools.github.io/hts-specs/VCFv4.2.pdf 5 module htslib.vcf; 6 7 import std.bitmanip; 8 import std..string: toStringz; 9 10 extern (C): 11 12 /// @file htslib/vcf.h 13 /// High-level VCF/BCF variant calling file operations. 14 /* 15 Copyright (C) 2012, 2013 Broad Institute. 16 Copyright (C) 2012-2019 Genome Research Ltd. 17 18 Author: Heng Li <lh3@sanger.ac.uk> 19 20 Permission is hereby granted, free of charge, to any person obtaining a copy 21 of this software and associated documentation files (the "Software"), to deal 22 in the Software without restriction, including without limitation the rights 23 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 24 copies of the Software, and to permit persons to whom the Software is 25 furnished to do so, subject to the following conditions: 26 27 The above copyright notice and this permission notice shall be included in 28 all copies or substantial portions of the Software. 29 30 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 31 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 32 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 33 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 34 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 35 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 36 DEALINGS IN THE SOFTWARE. */ 37 38 /* 39 todo: 40 - make the function names consistent 41 - provide calls to abstract away structs as much as possible 42 */ 43 44 import core.stdc.stdint; 45 import core.stdc.limits; 46 import core.stdc.assert_; 47 import core.stdc.errno : errno, EINVAL; 48 import htslib.hts; 49 import htslib.kstring: kstring_t, kputc, kputsn, kputw; 50 import htslib.bgzf: BGZF; // normally typedefed as opaque struct in hts.h 51 import htslib.hts_log; // hts.h imports hts_log.h 52 //#include "hts_defs.h" 53 //#include "hts_endian.h" 54 55 /***************** 56 * Header struct * 57 *****************/ 58 enum { 59 BCF_HL_FLT = 0, /// header line: FILTER 60 BCF_HL_INFO = 1, /// header line: INFO 61 BCF_HL_FMT = 2, /// header line: FORMAT 62 BCF_HL_CTG = 3, /// header line: contig 63 BCF_HL_STR = 4, /// header line: structured header line TAG=<A=..,B=..> 64 BCF_HL_GEN = 5, /// header line: generic header line 65 66 BCF_HT_FLAG = 0, /// header type: FLAG 67 BCF_HT_INT = 1, /// header type: INTEGER 68 BCF_HT_REAL = 2, /// header type: REAL 69 BCF_HT_STR = 3, /// header type: STRING 70 BCF_HT_LONG = (BCF_HT_INT | 0x100), /// BCF_HT_INT, but for int64_t values; VCF only! 71 72 BCF_VL_FIXED= 0, /// variable length: fixed (?) 73 BCF_VL_VAR = 1, /// variable length: variable 74 BCF_VL_A = 2, /// variable length: ? 75 BCF_VL_G = 3, /// variable length: ? 76 BCF_VL_R = 4 /// variable length: ? 77 } 78 79 /* === Dictionary === 80 81 The header keeps three dictonaries. The first keeps IDs in the 82 "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths 83 in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[] 84 is the actual hash table, which is opaque to the end users. In the hash 85 table, the key is the ID or sample name as a C string and the value is a 86 bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash 87 table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the 88 size of the hash table or, equivalently, the length of the id[] arrays. 89 */ 90 91 enum int BCF_DT_ID = 0; /// dictionary type: ID 92 enum int BCF_DT_CTG = 1; /// dictionary type: CONTIG 93 enum int BCF_DT_SAMPLE = 2; /// dictionary type: SAMPLE 94 95 /// Structured representation of a header line (§1.2) 96 struct bcf_hrec_t { // @suppress(dscanner.style.phobos_naming_convention) 97 int type; /// One of the BCF_HL_* type 98 char *key; /// The part before '=', i.e. FILTER/INFO/FORMAT/contig/fileformat etc. 99 char *value; /// Set only for generic lines, NULL for FILTER/INFO, etc. 100 int nkeys; /// Number of structured fields 101 char **keys; /// The key=value pairs 102 char **vals; /// The key=value pairs 103 } 104 105 /// ID Dictionary entry 106 struct bcf_idinfo_t { // @suppress(dscanner.style.phobos_naming_convention) 107 uint64_t[3] info; /** stores Number:20, var:4, Type:4, ColType:4 in info[0..2] 108 for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG */ 109 bcf_hrec_t *[3] hrec; /// pointers to header lines for [FILTER, INFO, FORMAT] in order 110 int id; /// primary key 111 } 112 113 /// ID Dictionary k/v 114 struct bcf_idpair_t { // @suppress(dscanner.style.phobos_naming_convention) 115 const(char) *key; /// header dictionary FILTER/INFO/FORMAT ID key 116 const bcf_idinfo_t *val;/// header dictionary FILTER/INFO/FORMAT ID entry 117 } 118 119 /// Structured repreentation of VCF header (§1.2) 120 /// Note that bcf_hdr_t structs must always be created via bcf_hdr_init() 121 struct bcf_hdr_t { // @suppress(dscanner.style.phobos_naming_convention) 122 int32_t[3] n; /// n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI) 123 bcf_idpair_t *[3] id; /// ID dictionary {FILTER/INFO/FORMAT, contig, sample} ID key/entry 124 void *[3] dict; /// hash table 125 char **samples; /// ?list of samples 126 bcf_hrec_t **hrec; /// Structured representation of this header line 127 int nhrec; /// # of header records 128 int dirty; /// ? 129 int ntransl; /// for bcf_translate() 130 int *[2] transl; /// for bcf_translate() 131 int nsamples_ori; /// for bcf_hdr_set_samples() 132 uint8_t *keep_samples; /// ? 133 kstring_t mem; /// ? 134 int32_t[3] m; /// m: allocated size of the dictionary block in use (see n above) 135 } 136 137 /// Lookup table used in bcf_record_check 138 /// MAINTAINER: in C header is [] 139 extern __gshared uint8_t[16] bcf_type_shift; 140 141 /************** 142 * VCF record * 143 **************/ 144 145 enum int BCF_BT_NULL = 0; /// null 146 enum int BCF_BT_INT8 = 1; /// int8 147 enum int BCF_BT_INT16 = 2; /// int16 148 enum int BCF_BT_INT32 = 3; /// int32 149 enum int BCF_BT_INT64 = 4; /// Unofficial, for internal use only per htslib headers 150 151 enum int BCF_BT_FLOAT = 5; /// float (32?) 152 enum int BCF_BT_CHAR = 7; /// char (8 bit) 153 154 enum int VCF_REF = 0; /// ref (e.g. in a gVCF) 155 enum int VCF_SNP = 1; /// SNP 156 enum int VCF_MNP = 2; /// MNP 157 enum int VCF_INDEL = 4; /// INDEL 158 enum int VCF_OTHER = 8; /// other (e.g. SV) 159 enum int VCF_BND = 16; /// breakend 160 enum int VCF_OVERLAP = 32; /// overlapping deletion, ALT=* 161 162 /// variant type record embedded in bcf_dec_t 163 /// variant type and the number of bases affected, negative for deletions 164 struct variant_t { // @suppress(dscanner.style.phobos_naming_convention) 165 int type; /// variant type and the number of bases affected, negative for deletions 166 int n; /// variant type and the number of bases affected, negative for deletions 167 } 168 169 /// FORMAT field data (§1.4.2 Genotype fields) 170 struct bcf_fmt_t { // @suppress(dscanner.style.phobos_naming_convention) 171 int id; /// id: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$id].key 172 int n; /// n: number of values per-sample; size: number of bytes per-sample; type: one of BCF_BT_* types 173 int size; /// size: number of bytes per-sample; type: one of BCF_BT_* types 174 int type; /// type: one of BCF_BT_* types 175 uint8_t *p; /// same as vptr and vptr_* in bcf_info_t below 176 uint32_t p_len; /// ? 177 178 /// ??? 179 mixin(bitfields!( 180 uint, "p_off", 31, 181 bool, "p_free", 1)); /// ? 182 } 183 184 /// INFO field data (§1.4.1 Fixed fields, (8) INFO) 185 struct bcf_info_t { // @suppress(dscanner.style.phobos_naming_convention) 186 int key; /// key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key 187 int type; /// type: one of BCF_BT_* types 188 /// Stores a numeric value iff this INFO field is a scalar 189 union V1 { 190 int64_t i; /// integer value 191 float f; /// float value 192 } 193 V1 v1; /// only set if $len==1; for easier access 194 uint8_t *vptr; /// pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes 195 uint32_t vptr_len; /// length of the vptr block or, when set, of the vptr_mod block, excluding offset 196 197 /// vptr offset, i.e., the size of the INFO key plus size+type bytes 198 /// indicates that vptr-vptr_off must be freed; set only when modified and the new 199 /// data block is bigger than the original 200 mixin(bitfields!( 201 uint, "vptr_off", 31, 202 bool, "vptr_free", 1)); 203 204 int len; /// len: vector length, 1 for scalars 205 } 206 207 208 enum int BCF1_DIRTY_ID = 1; /// ID was edited 209 enum int BCF1_DIRTY_ALS = 2; /// Allele(s) was edited 210 enum int BCF1_DIRTY_FLT = 4; /// FILTER was edited 211 enum int BCF1_DIRTY_INF = 8; /// INFO was edited 212 213 /// Variable-length data from a VCF record 214 struct bcf_dec_t { // @suppress(dscanner.style.phobos_naming_convention) 215 /// allocated size (high-water mark); do not change 216 int m_fmt, m_info, m_id, m_als, m_allele, m_flt; 217 int n_flt; /// Number of FILTER fields 218 int *flt; /// FILTER keys in the dictionary 219 char *id; /// ID 220 char *als; /// REF+ALT block (\0-seperated) 221 char **allele; /// allele[0] is the REF (allele[] pointers to the als block); all null terminated 222 bcf_info_t *info; /// INFO 223 bcf_fmt_t *fmt; /// FORMAT and individual sample 224 variant_t *var; /// $var and $var_type set only when set_variant_types called 225 int n_var; /// variant number(???) 226 int var_type; /// variant type (TODO: make enum) 227 int shared_dirty; /// if set, shared.s must be recreated on BCF output (TODO: make enum) 228 int indiv_dirty; /// if set, indiv.s must be recreated on BCF output (TODO: make enum) 229 } 230 231 232 enum int BCF_ERR_CTG_UNDEF = 1; /// BCF error: undefined contig 233 enum int BCF_ERR_TAG_UNDEF = 2; /// BCF error: undefined tag 234 enum int BCF_ERR_NCOLS = 4; /// BCF error: 235 enum int BCF_ERR_LIMITS = 8; /// BCF error: 236 enum int BCF_ERR_CHAR = 16; /// BCF error: 237 enum int BCF_ERR_CTG_INVALID = 32; /// BCF error: 238 enum int BCF_ERR_TAG_INVALID = 64; /// BCF error: 239 240 /** 241 The bcf1_t structure corresponds to one VCF/BCF line. Reading from VCF file 242 is slower because the string is first to be parsed, packed into BCF line 243 (done in vcf_parse), then unpacked into internal bcf1_t structure. If it 244 is known in advance that some of the fields will not be required (notably 245 the sample columns), parsing of these can be skipped by setting max_unpack 246 appropriately. 247 Similarly, it is fast to output a BCF line because the columns (kept in 248 shared.s, indiv.s, etc.) are written directly by bcf_write, whereas a VCF 249 line must be formatted in vcf_format. 250 */ 251 struct bcf1_t { // @suppress(dscanner.style.phobos_naming_convention) 252 hts_pos_t pos; /// Chromosomal position 253 hts_pos_t rlen; /// length of REF 254 int32_t rid; /// CHROM 255 float qual; /// QUAL 256 257 mixin(bitfields!( 258 uint, "n_info", 16, /// For whatever reason, windows doesn't like uint32_t 259 uint, "n_allele", 16)); 260 261 mixin(bitfields!( 262 uint, "n_fmt", 8, /// For whatever reason, windows doesn't like uint32_t 263 uint, "n_sample", 24)); 264 265 kstring_t _shared; /// ??? (name mangled due to D reserved keyword shared) 266 kstring_t indiv; /// ??? 267 bcf_dec_t d; /// lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack() 268 int max_unpack; /// Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields won't be needed 269 int unpacked; /// remember what has been unpacked to allow calling bcf_unpack() repeatedly without redoing the work 270 int[3] unpack_size; /// the original block size of ID, REF+ALT and FILTER 271 int errcode; /// one of BCF_ERR_* codes (TODO: make enum) 272 } 273 274 /******* 275 * API * 276 *******/ 277 278 /*********************************************************************** 279 * BCF and VCF I/O 280 * 281 * A note about naming conventions: htslib internally represents VCF 282 * records as bcf1_t data structures, therefore most functions are 283 * prefixed with bcf_. There are a few exceptions where the functions must 284 * be aware of both BCF and VCF worlds, such as bcf_parse vs vcf_parse. In 285 * these cases, functions prefixed with bcf_ are more general and work 286 * with both BCF and VCF. 287 * 288 ***********************************************************************/ 289 290 /** These macros are defined only for consistency with other parts of htslib */ 291 alias bcf_init1 = bcf_init; 292 alias bcf_read1 = bcf_read; 293 alias vcf_read1 = vcf_read; 294 alias bcf_write1 = bcf_write; 295 alias vcf_write1 = vcf_write; 296 alias bcf_destroy1 = bcf_destroy; 297 alias bcf_empty1 = bcf_empty; 298 alias vcf_parse1 = vcf_parse; 299 alias bcf_clear1 = bcf_clear; 300 alias vcf_format1 = vcf_format; 301 302 /** 303 * bcf_hdr_init() - create an empty BCF header. 304 * @param mode "r" or "w" 305 * 306 * When opened for writing, the mandatory fileFormat and 307 * FILTER=PASS lines are added automatically. 308 * 309 * The bcf_hdr_t struct returned by a successful call should be freed 310 * via bcf_hdr_destroy() when it is no longer needed. 311 */ 312 bcf_hdr_t *bcf_hdr_init(const(char) *mode); 313 314 /** Destroy a BCF header struct */ 315 void bcf_hdr_destroy(bcf_hdr_t *h); 316 317 /** Allocate and initialize a bcf1_t object. 318 * 319 * The bcf1_t struct returned by a successful call should be freed 320 * via bcf_destroy() when it is no longer needed. 321 */ 322 bcf1_t *bcf_init(); 323 324 /** Deallocate a bcf1_t object */ 325 void bcf_destroy(bcf1_t *v); 326 327 /** 328 * Same as bcf_destroy() but frees only the memory allocated by bcf1_t, 329 * not the bcf1_t object itself. 330 */ 331 void bcf_empty(bcf1_t *v); 332 333 /** 334 * Make the bcf1_t object ready for next read. Intended mostly for 335 * internal use, the user should rarely need to call this function 336 * directly. 337 */ 338 void bcf_clear(bcf1_t *v); 339 340 341 /** bcf_open and vcf_open mode: please see hts_open() in hts.h */ 342 alias vcfFile = htsFile; 343 alias bcf_open = hts_open; 344 alias vcf_open = hts_open; 345 alias bcf_close = hts_close; 346 alias vcf_close = hts_close; 347 348 /// Read a VCF or BCF header 349 /** @param fp The file to read the header from 350 @return Pointer to a populated header structure on success; 351 NULL on failure 352 353 The bcf_hdr_t struct returned by a successful call should be freed 354 via bcf_hdr_destroy() when it is no longer needed. 355 */ 356 bcf_hdr_t *bcf_hdr_read(htsFile *fp); 357 358 /** 359 * bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed 360 * @param samples samples to include or exclude from file or as a comma-separated string. 361 * LIST|FILE .. select samples in list/file 362 * ^LIST|FILE .. exclude samples from list/file 363 * - .. include all samples 364 * NULL .. exclude all samples 365 * @param is_file @p samples is a file (1) or a comma-separated list (0) 366 * 367 * The bottleneck of VCF reading is parsing of genotype fields. If the 368 * reader knows in advance that only subset of samples is needed (possibly 369 * no samples at all), the performance of bcf_read() can be significantly 370 * improved by calling bcf_hdr_set_samples after bcf_hdr_read(). 371 * The function bcf_read() will subset the VCF/BCF records automatically 372 * with the notable exception when reading records via bcf_itr_next(). 373 * In this case, bcf_subset_format() must be called explicitly, because 374 * bcf_readrec() does not see the header. 375 * 376 * Returns 0 on success, -1 on error or a positive integer if the list 377 * contains samples not present in the VCF header. In such a case, the 378 * return value is the index of the offending sample. 379 */ 380 int bcf_hdr_set_samples(bcf_hdr_t *hdr, const(char) *samples, int is_file); 381 /// ditto 382 int bcf_subset_format(const(bcf_hdr_t) *hdr, bcf1_t *rec); 383 384 385 /// Write a VCF or BCF header 386 /** @param fp Output file 387 @param h The header to write 388 @return 0 on success; -1 on failure 389 */ 390 int bcf_hdr_write(htsFile *fp, bcf_hdr_t *h); 391 392 /** 393 * Parse VCF line contained in kstring and populate the bcf1_t struct 394 * The line must not end with \n or \r characters. 395 */ 396 int vcf_parse(kstring_t *s, const(bcf_hdr_t) *h, bcf1_t *v); 397 398 /** The opposite of vcf_parse. It should rarely be called directly, see vcf_write */ 399 int vcf_format(const(bcf_hdr_t) *h, const(bcf1_t) *v, kstring_t *s); 400 401 /// Read next VCF or BCF record 402 /** @param fp The file to read the record from 403 @param h The header for the vcf/bcf file 404 @param v The bcf1_t structure to populate 405 @return 0 on success; -1 on end of file; < -1 on critical error 406 407 On errors which are not critical for reading, such as missing header 408 definitions in vcf files, zero will be returned but v->errcode will have been 409 set to one of BCF_ERR* codes and must be checked before calling bcf_write(). 410 */ 411 int bcf_read(htsFile *fp, const(bcf_hdr_t) *h, bcf1_t *v); 412 413 /** 414 * bcf_unpack() - unpack/decode a BCF record (fills the bcf1_t::d field) 415 * 416 * Note that bcf_unpack() must be called even when reading VCF. It is safe 417 * to call the function repeatedly, it will not unpack the same field 418 * twice. 419 */ 420 int bcf_unpack(bcf1_t *b, int which); 421 enum int BCF_UN_STR = 1; /// up to ALT inclusive 422 enum int BCF_UN_FLT = 2; /// up to FILTER 423 enum int BCF_UN_INFO = 4; /// up to INFO 424 enum int BCF_UN_SHR = (BCF_UN_STR|BCF_UN_FLT|BCF_UN_INFO); /// all shared information 425 enum int BCF_UN_FMT = 8; /// unpack format and each sample 426 alias BCF_UN_IND = BCF_UN_FMT; // a synonymo of BCF_UN_FMT 427 enum int BCF_UN_ALL = (BCF_UN_SHR|BCF_UN_FMT); /// everything 428 429 /** 430 * bcf_dup() - create a copy of BCF record. 431 * 432 * Note that bcf_unpack() must be called on the returned copy as if it was 433 * obtained from bcf_read(). Also note that bcf_dup() calls bcf_sync1(src) 434 * internally to reflect any changes made by bcf_update_* functions. 435 * 436 * The bcf1_t struct returned by a successful call should be freed 437 * via bcf_destroy() when it is no longer needed. 438 */ 439 bcf1_t *bcf_dup(bcf1_t *src); 440 /// ditto 441 bcf1_t *bcf_copy(bcf1_t *dst, bcf1_t *src); 442 443 /// Write one VCF or BCF record. The type is determined at the open() call. 444 /** @param fp The file to write to 445 @param h The header for the vcf/bcf file 446 @param v The bcf1_t structure to write 447 @return 0 on success; -1 on error 448 */ 449 int bcf_write(htsFile *fp, bcf_hdr_t *h, bcf1_t *v); 450 451 /* 452 * The following functions work only with VCFs and should rarely be called 453 * directly. Usually one wants to use their bcf_* alternatives, which work 454 * transparently with both VCFs and BCFs. 455 */ 456 457 /// Read a VCF format header 458 /** @param fp The file to read the header from 459 @return Pointer to a populated header structure on success; 460 NULL on failure 461 462 Use bcf_hdr_read() instead. 463 464 The bcf_hdr_t struct returned by a successful call should be freed 465 via bcf_hdr_destroy() when it is no longer needed. 466 */ 467 bcf_hdr_t *vcf_hdr_read(htsFile *fp); 468 469 /// Write a VCF format header 470 /** @param fp Output file 471 @param h The header to write 472 @return 0 on success; -1 on failure 473 474 Use bcf_hdr_write() instead 475 */ 476 int vcf_hdr_write(htsFile *fp, const(bcf_hdr_t) *h); 477 478 /// Read a record from a VCF file 479 /** @param fp The file to read the record from 480 @param h The header for the vcf file 481 @param v The bcf1_t structure to populate 482 @return 0 on success; -1 on end of file; < -1 on error 483 484 Use bcf_read() instead 485 */ 486 int vcf_read(htsFile *fp, const(bcf_hdr_t) *h, bcf1_t *v); 487 488 /// Write a record to a VCF file 489 /** @param fp The file to write to 490 @param h The header for the vcf file 491 @param v The bcf1_t structure to write 492 @return 0 on success; -1 on error 493 494 Use bcf_write() instead 495 */ 496 int vcf_write(htsFile *fp, const(bcf_hdr_t) *h, bcf1_t *v); 497 498 /** Helper function for the bcf_itr_next() macro; internal use, ignore it */ 499 /** NOTE: C API second parameter called "null", mangled here as _null */ 500 int bcf_readrec(BGZF *fp, void *_null, void *v, int *tid, hts_pos_t *beg, hts_pos_t *end); 501 502 /// Write a line to a VCF file 503 /** @param line Line to write 504 @param fp File to write it to 505 @return 0 on success; -1 on failure 506 507 @note No checks are done on the line being added, apart from 508 ensuring that it ends with a newline. This function 509 should therefore be used with care. 510 */ 511 int vcf_write_line(htsFile *fp, kstring_t *line); // new in htslib-1.10 512 513 /************************************************************************** 514 * Header querying and manipulation routines 515 **************************************************************************/ 516 517 /** Create a new header using the supplied template 518 * 519 * The bcf_hdr_t struct returned by a successful call should be freed 520 * via bcf_hdr_destroy() when it is no longer needed. 521 */ 522 bcf_hdr_t *bcf_hdr_dup(const(bcf_hdr_t) *hdr); 523 524 /** 525 * Copy header lines from src to dst if not already present in dst. See also bcf_translate(). 526 * Returns 0 on success or sets a bit on error: 527 * 1 .. conflicting definitions of tag length 528 * // todo 529 */ 530 deprecated("Please use bcf_hdr_merge instead") 531 int bcf_hdr_combine(bcf_hdr_t *dst, const(bcf_hdr_t) *src); 532 533 /** 534 * bcf_hdr_merge() - copy header lines from src to dst, see also bcf_translate() 535 * @param dst: the destination header to be merged into, NULL on the first pass 536 * @param src: the source header 537 * 538 * Notes: 539 * - use as: 540 * bcf_hdr_t *dst = NULL; 541 * for (i=0; i<nsrc; i++) dst = bcf_hdr_merge(dst,src[i]); 542 * 543 * - bcf_hdr_merge() replaces bcf_hdr_combine() which had a problem when 544 * combining multiple BCF headers. The current bcf_hdr_combine() 545 * does not have this problem, but became slow when used for many files. 546 */ 547 bcf_hdr_t *bcf_hdr_merge(bcf_hdr_t *dst, const(bcf_hdr_t) *src); 548 549 /** 550 * bcf_hdr_add_sample() - add a new sample. 551 * @param sample: sample name to be added 552 * 553 * Note: 554 * After all samples have been added, the internal header structure must be updated 555 * by calling bcf_hdr_sync(). This is normally done automatically by the first bcf_hdr_write() 556 * or bcf_write() call. Otherwise, the caller must force the update by calling bcf_hdr_sync() 557 * explicitly. 558 */ 559 int bcf_hdr_add_sample(bcf_hdr_t *hdr, const(char) *sample); 560 561 /** Read VCF header from a file and update the header */ 562 int bcf_hdr_set(bcf_hdr_t *hdr, const(char) *fname); 563 564 /// Appends formatted header text to _str_. 565 /** If _is_bcf_ is zero, `IDX` fields are discarded. 566 * @return 0 if successful, or negative if an error occurred 567 * @since 1.4 568 */ 569 int bcf_hdr_format(const(bcf_hdr_t) *hdr, int is_bcf, kstring_t *str); 570 571 /** Returns formatted header (newly allocated string) and its length, 572 * excluding the terminating \0. If is_bcf parameter is unset, IDX 573 * fields are discarded. 574 * @deprecated Use bcf_hdr_format() instead as it can handle huge headers. 575 */ 576 deprecated("use bcf_hdr_format() instead") 577 char *bcf_hdr_fmt_text(const(bcf_hdr_t) *hdr, int is_bcf, int *len); 578 579 /** Append new VCF header line, returns 0 on success */ 580 int bcf_hdr_append(bcf_hdr_t *h, const(char) *line); 581 582 int bcf_hdr_printf(bcf_hdr_t *h, const(char) *format, ...); 583 584 /** VCF version, e.g. VCFv4.2 */ 585 const(char) *bcf_hdr_get_version(const(bcf_hdr_t) *hdr); 586 587 /// Set version in bcf header 588 /** 589 @param hdr BCF header struct 590 @param version Version to set, e.g. "VCFv4.3" 591 @return 0 on success; < 0 on error 592 */ 593 /// NB: mangled second parameter to _version 594 void bcf_hdr_set_version(bcf_hdr_t *hdr, const(char) *_version); 595 596 /** 597 * bcf_hdr_remove() - remove VCF header tag 598 * @param type: one of BCF_HL_* 599 * @param key: tag name or NULL to remove all tags of the given type 600 */ 601 void bcf_hdr_remove(bcf_hdr_t *h, int type, const(char) *key); 602 603 /** 604 * bcf_hdr_subset() - creates a new copy of the header removing unwanted samples 605 * @param n: number of samples to keep 606 * @param samples: names of the samples to keep 607 * @param imap: mapping from index in @samples to the sample index in the original file 608 * 609 * Sample names not present in h0 are ignored. The number of unmatched samples can be checked 610 * by comparing n and bcf_hdr_nsamples(out_hdr). 611 * This function can be used to reorder samples. 612 * See also bcf_subset() which subsets individual records. 613 * The bcf_hdr_t struct returned by a successful call should be freed 614 * via bcf_hdr_destroy() when it is no longer needed. 615 */ 616 /// NOTE: char *const* samples really exmplifies what I hate about C pointers 617 /// My interpretation of this is it is equivalent to char **samples, but that the outer pointer is const 618 /// which in D would be const(char *)*samples. I don't know what it implies about constancy of *samples or samples. 619 bcf_hdr_t *bcf_hdr_subset(const(bcf_hdr_t) *h0, int n, const(char *)*samples, int *imap); 620 //bcf_hdr_t *bcf_hdr_subset(const(bcf_hdr_t) *h0, int n, char *const* samples, int *imap); 621 622 /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */ 623 const(char) **bcf_hdr_seqnames(const(bcf_hdr_t) *h, int *nseqs); 624 625 /** Get number of samples */ 626 pragma(inline, true) auto bcf_hdr_nsamples(bcf_hdr_t *hdr) { return hdr.n[BCF_DT_SAMPLE]; } 627 //#define bcf_hdr_nsamples(hdr) (hdr)->n[BCF_DT_SAMPLE] 628 629 630 /* The following functions are for internal use and should rarely be called directly */ 631 int bcf_hdr_parse(bcf_hdr_t *hdr, char *htxt); 632 633 /// Synchronize internal header structures 634 /** @param h Header 635 @return 0 on success, -1 on failure 636 637 This function updates the id, sample and contig arrays in the 638 bcf_hdr_t structure so that they point to the same locations as 639 the id, sample and contig dictionaries. 640 */ 641 int bcf_hdr_sync(bcf_hdr_t *h); 642 643 bcf_hrec_t *bcf_hdr_parse_line(const(bcf_hdr_t) *h, const(char) *line, int *len); 644 645 /// Convert a bcf header record to string form 646 /** 647 * @param hrec Header record 648 * @param str Destination kstring 649 * @return 0 on success; < 0 on error 650 */ 651 int bcf_hrec_format(const(bcf_hrec_t) *hrec, kstring_t *str); 652 653 int bcf_hdr_add_hrec(bcf_hdr_t *hdr, bcf_hrec_t *hrec); 654 655 /** 656 * bcf_hdr_get_hrec() - get header line info 657 * @param type: one of the BCF_HL_* types: FLT,INFO,FMT,CTG,STR,GEN 658 * @param key: the header key for generic lines (e.g. "fileformat"), any field 659 * for structured lines, typically "ID". 660 * @param value: the value which pairs with key. Can be be NULL for BCF_HL_GEN 661 * @param str_class: the class of BCF_HL_STR line (e.g. "ALT" or "SAMPLE"), otherwise NULL 662 */ 663 bcf_hrec_t *bcf_hdr_get_hrec(const(bcf_hdr_t) *hdr, int type, const(char) *key, const(char) *value, 664 const(char) *str_class); 665 /// Duplicate a header record 666 /** @param hrec Header record to copy 667 @return A new header record on success; NULL on failure 668 669 The bcf_hrec_t struct returned by a successful call should be freed 670 via bcf_hrec_destroy() when it is no longer needed. 671 */ 672 bcf_hrec_t *bcf_hrec_dup(bcf_hrec_t *hrec); 673 674 /// Add a new header record key 675 /** @param hrec Header record 676 @param str Key name 677 @param len Length of @p str 678 @return 0 on success; -1 on failure 679 */ 680 void bcf_hrec_add_key(bcf_hrec_t *hrec, const(char) *str, size_t len); 681 682 /// Set a header record value 683 /** @param hrec Header record 684 @param i Index of value 685 @param str Value to set 686 @param len Length of @p str 687 @param is_quoted Value should be quoted 688 @return 0 on success; -1 on failure 689 */ 690 void bcf_hrec_set_val(bcf_hrec_t *hrec, int i, const(char) *str, size_t len, int is_quoted); 691 692 /// Lookup header record by key 693 int bcf_hrec_find_key(bcf_hrec_t *hrec, const(char) *key); 694 695 /// Add an IDX header record 696 /** @param hrec Header record 697 @param idx IDX value to add 698 @return 0 on success; -1 on failure 699 */ 700 void hrec_add_idx(bcf_hrec_t *hrec, int idx); 701 702 /// Free up a header record and associated structures 703 /** @param hrec Header record 704 */ 705 void bcf_hrec_destroy(bcf_hrec_t *hrec); 706 707 708 709 /************************************************************************** 710 * Individual record querying and manipulation routines 711 **************************************************************************/ 712 713 /** See the description of bcf_hdr_subset() */ 714 int bcf_subset(const(bcf_hdr_t) *h, bcf1_t *v, int n, int *imap); 715 716 /** 717 * bcf_translate() - translate tags ids to be consistent with different header. This function 718 * is useful when lines from multiple VCF need to be combined. 719 * @dst_hdr: the destination header, to be used in bcf_write(), see also bcf_hdr_combine() 720 * @src_hdr: the source header, used in bcf_read() 721 * @src_line: line obtained by bcf_read() 722 */ 723 int bcf_translate(const(bcf_hdr_t) *dst_hdr, bcf_hdr_t *src_hdr, bcf1_t *src_line); 724 725 /** 726 * bcf_get_variant_type[s]() - returns one of VCF_REF, VCF_SNP, etc 727 */ 728 int bcf_get_variant_types(bcf1_t *rec); 729 /// ditto 730 int bcf_get_variant_type(bcf1_t *rec, int ith_allele); 731 /// returns int as ersatz bool, but dlang bool is 8-bit 732 int bcf_is_snp(bcf1_t *v); 733 734 /** 735 * bcf_update_filter() - sets the FILTER column 736 * @flt_ids: The filter IDs to set, numeric IDs returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") 737 * @n: Number of filters. If n==0, all filters are removed 738 * Returns: zero 739 */ 740 int bcf_update_filter(const(bcf_hdr_t) *hdr, bcf1_t *line, int *flt_ids, int n); 741 /** 742 * bcf_add_filter() - adds to the FILTER column 743 * @flt_id: filter ID to add, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") 744 * 745 * If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed. 746 */ 747 int bcf_add_filter(const(bcf_hdr_t) *hdr, bcf1_t *line, int flt_id); 748 /** 749 * bcf_remove_filter() - removes from the FILTER column 750 * @flt_id: filter ID to remove, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") 751 * @pass: when set to 1 and no filters are present, set to PASS 752 */ 753 int bcf_remove_filter(const(bcf_hdr_t) *hdr, bcf1_t *line, int flt_id, int pass); 754 /** 755 * Returns 1 if present, 0 if absent, or -1 if filter does not exist. "PASS" and "." can be used interchangeably. 756 */ 757 int bcf_has_filter(const(bcf_hdr_t) *hdr, bcf1_t *line, char *filter); 758 /** 759 * bcf_update_alleles() and bcf_update_alleles_str() - update REF and ALLT column 760 * @alleles: Array of alleles 761 * @nals: Number of alleles 762 * @alleles_string: Comma-separated alleles, starting with the REF allele 763 */ 764 int bcf_update_alleles(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) **alleles, int nals); 765 /// ditto 766 int bcf_update_alleles_str(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *alleles_string); 767 768 /** 769 * bcf_update_id() - sets new ID string 770 * bcf_add_id() - adds to the ID string checking for duplicates 771 */ 772 int bcf_update_id(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *id); 773 /// ditto 774 int bcf_add_id(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *id); 775 776 /** 777 * bcf_update_info_*() - functions for updating INFO fields 778 * @param hdr: the BCF header 779 * @param line: VCF line to be edited 780 * @param key: the INFO tag to be updated 781 * @param values: pointer to the array of values. Pass NULL to remove the tag. 782 * @param n: number of values in the array. When set to 0, the INFO tag is removed 783 * @return 0 on success or negative value on error. 784 * 785 * The @p string in bcf_update_info_flag() is optional, 786 * @p n indicates whether the flag is set or removed. 787 * 788 */ 789 pragma(inline, true) { // TODO: rewrite as template 790 auto bcf_update_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 791 { return bcf_update_info(hdr, line, key, values, n, BCF_HT_INT); } 792 auto bcf_update_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 793 { return bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL); } 794 auto bcf_update_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 795 { return bcf_update_info(hdr, line, key, values, n, BCF_HT_FLAG); } 796 auto bcf_update_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values) // @suppress(dscanner.style.undocumented_declaration) 797 { return bcf_update_info(hdr, line, key, values, 1, BCF_HT_STR); } 798 } 799 int bcf_update_info(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n, int type); 800 801 /// Set or update 64-bit integer INFO values 802 /** 803 * @param hdr: the BCF header 804 * @param line: VCF line to be edited 805 * @param key: the INFO tag to be updated 806 * @param values: pointer to the array of values. Pass NULL to remove the tag. 807 * @param n: number of values in the array. When set to 0, the INFO tag is removed 808 * @return 0 on success or negative value on error. 809 * 810 * This function takes an int64_t values array as input. The data 811 * actually stored will be shrunk to the minimum size that can 812 * accept all of the values. 813 * 814 * INFO values outside of the range BCF_MIN_BT_INT32 to BCF_MAX_BT_INT32 815 * can only be written to VCF files. 816 */ 817 pragma(inline, true) 818 auto bcf_update_info_int64( const(bcf_hdr_t) *hdr, bcf1_t *line, 819 const(char) *key, 820 const(int64_t) *values, int n) 821 { 822 return bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG); 823 } 824 825 /** 826 * bcf_update_format_*() - functions for updating FORMAT fields 827 * @values: pointer to the array of values, the same number of elements 828 * is expected for each sample. Missing values must be padded 829 * with bcf_*_missing or bcf_*_vector_end values. 830 * @n: number of values in the array. If n==0, existing tag is removed. 831 * 832 * The function bcf_update_format_string() is a higher-level (slower) variant of 833 * bcf_update_format_char(). The former accepts array of \0-terminated strings 834 * whereas the latter requires that the strings are collapsed into a single array 835 * of fixed-length strings. In case of strings with variable length, shorter strings 836 * can be \0-padded. Note that the collapsed strings passed to bcf_update_format_char() 837 * are not \0-terminated. 838 * 839 * Returns 0 on success or negative value on error. 840 */ 841 int bcf_update_format_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(char) **values, int n); 842 /// ditto 843 int bcf_update_format(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n, int type); 844 // TODO: Write D template 845 pragma(inline, true) { 846 auto bcf_update_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(int) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 847 { return bcf_update_format(hdr, line, key, values, n, BCF_HT_INT); } 848 auto bcf_update_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(float) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 849 { return bcf_update_format(hdr, line, key, values, n, BCF_HT_REAL); } 850 auto bcf_update_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(char) **values, int n) // @suppress(dscanner.style.undocumented_declaration) 851 { return bcf_update_format(hdr, line, key, values, n, BCF_HT_STR); } 852 auto bcf_update_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) **gts, int n) // @suppress(dscanner.style.undocumented_declaration) 853 { return bcf_update_format(hdr, line, toStringz("GT"c), gts, n, BCF_HT_INT); } 854 } 855 856 857 /// Macros for setting genotypes correctly, for use with bcf_update_genotypes only; idx corresponds 858 /// to VCF's GT (1-based index to ALT or 0 for the reference allele) and val is the opposite, obtained 859 /// from bcf_get_genotypes() below. 860 // TODO: is int appropriate? 861 pragma(inline, true) { 862 auto bcf_gt_phased(int idx) { return (((idx)+1)<<1|1); } 863 /// ditto 864 auto bcf_gt_unphased(int idx) { return (((idx)+1)<<1); } 865 /// ditto 866 auto bcf_gt_is_missing(int val) { return ((val)>>1 ? 0 : 1);} 867 /// ditto 868 auto bcf_gt_is_phased(int idx) { return ((idx)&1); } 869 /// ditto 870 auto bcf_gt_allele(int val) { return (((val)>>1)-1); } 871 } 872 /// ditto 873 enum int bcf_gt_missing = 0; 874 875 /** Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based) */ 876 pragma(inline, true) { 877 auto bcf_alleles2gt(int a, int b) { return ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a))); } 878 /// ditto 879 void bcf_gt2alleles(int igt, int *a, int *b) 880 { 881 int k = 0, dk = 1; // @suppress(dscanner.useless-initializer) 882 while ( k<igt ) { dk++; k += dk; } 883 *b = dk - 1; *a = igt - k + *b; 884 } 885 } 886 887 /** 888 * bcf_get_fmt() - returns pointer to FORMAT's field data 889 * @header: for access to BCF_DT_ID dictionary 890 * @line: VCF line obtained from vcf_parse1 891 * @fmt: one of GT,PL,... 892 * 893 * Returns bcf_fmt_t* if the call succeeded, or returns NULL when the field 894 * is not available. 895 */ 896 bcf_fmt_t *bcf_get_fmt(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key); 897 /// ditto 898 bcf_info_t *bcf_get_info(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key); 899 900 /** 901 * bcf_get_*_id() - returns pointer to FORMAT/INFO field data given the header index instead of the string ID 902 * @line: VCF line obtained from vcf_parse1 903 * @id: The header index for the tag, obtained from bcf_hdr_id2int() 904 * 905 * Returns bcf_fmt_t* / bcf_info_t*. These functions do not check if the index is valid 906 * as their goal is to avoid the header lookup. 907 */ 908 bcf_fmt_t *bcf_get_fmt_id(bcf1_t *line, const int id); 909 /// ditto 910 bcf_info_t *bcf_get_info_id(bcf1_t *line, const int id); 911 912 /** 913 * bcf_get_info_*() - get INFO values, integers or floats 914 * @param hdr: BCF header 915 * @param line: BCF record 916 * @param tag: INFO tag to retrieve 917 * @param dst: *dst is pointer to a memory location, can point to NULL 918 * @param ndst: pointer to the size of allocated memory 919 * @return >=0 on success 920 * -1 .. no such INFO tag defined in the header 921 * -2 .. clash between types defined in the header and encountered in the VCF record 922 * -3 .. tag is not present in the VCF record 923 * -4 .. the operation could not be completed (e.g. out of memory) 924 * 925 * Returns negative value on error or the number of values (including 926 * missing values) put in *dst on success. bcf_get_info_string() returns 927 * on success the number of characters stored excluding the nul- 928 * terminating byte. bcf_get_info_flag() does not store anything in *dst 929 * but returns 1 if the flag is set or 0 if not. 930 * 931 * *dst will be reallocated if it is not big enough (i.e. *ndst is too 932 * small) or NULL on entry. The new size will be stored in *ndst. 933 */ 934 pragma(inline, true) { 935 auto bcf_get_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 936 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); } 937 auto bcf_get_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 938 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); } 939 auto bcf_get_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 940 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); } 941 auto bcf_get_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 942 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_FLAG); } 943 } 944 int bcf_get_info_values(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst, int type); 945 946 /// Put integer INFO values into an int64_t array 947 /** 948 * @param hdr: BCF header 949 * @param line: BCF record 950 * @param tag: INFO tag to retrieve 951 * @param dst: *dst is pointer to a memory location, can point to NULL 952 * @param ndst: pointer to the size of allocated memory 953 * @return >=0 on success 954 * -1 .. no such INFO tag defined in the header 955 * -2 .. clash between types defined in the header and encountered in the VCF record 956 * -3 .. tag is not present in the VCF record 957 * -4 .. the operation could not be completed (e.g. out of memory) 958 * 959 * Returns negative value on error or the number of values (including 960 * missing values) put in *dst on success. 961 * 962 * *dst will be reallocated if it is not big enough (i.e. *ndst is too 963 * small) or NULL on entry. The new size will be stored in *ndst. 964 */ 965 pragma(inline, true) 966 auto bcf_get_info_int64(const(bcf_hdr_t) *hdr, bcf1_t *line, 967 const(char) *tag, int64_t **dst, 968 int *ndst) 969 { 970 return bcf_get_info_values(hdr, line, tag, 971 cast(void **) dst, ndst, BCF_HT_LONG); 972 } 973 974 /** 975 * bcf_get_format_*() - same as bcf_get_info*() above 976 * 977 * The function bcf_get_format_string() is a higher-level (slower) variant of bcf_get_format_char(). 978 * see the description of bcf_update_format_string() and bcf_update_format_char() above. 979 * Unlike other bcf_get_format__*() functions, bcf_get_format_string() allocates two arrays: 980 * a single block of \0-terminated strings collapsed into a single array and an array of pointers 981 * to these strings. Both arrays must be cleaned by the user. 982 * 983 * Returns negative value on error or the number of written values on success. 984 * 985 * Use the returned number of written values for accessing valid entries of dst, as ndst is only a 986 * watermark that can be higher than the returned value, i.e. the end of dst can contain carry-over 987 * values from previous calls to bcf_get_format_*() on lines with more values per sample. 988 * 989 * Example: 990 * int ndst = 0; char **dst = NULL; 991 * if ( bcf_get_format_string(hdr, line, "XX", &dst, &ndst) > 0 ) 992 * for (i=0; i<bcf_hdr_nsamples(hdr); i++) printf("%s\n", dst[i]); 993 * free(dst[0]); free(dst); 994 * 995 * Example: 996 * int i, j, ngt, nsmpl = bcf_hdr_nsamples(hdr); 997 * int32_t *gt_arr = NULL, ngt_arr = 0; 998 * 999 * ngt = bcf_get_genotypes(hdr, line, >_arr, &ngt_arr); 1000 * if ( ngt<=0 ) return; // GT not present 1001 * 1002 * int max_ploidy = ngt/nsmpl; 1003 * for (i=0; i<nsmpl; i++) 1004 * { 1005 * int32_t *ptr = gt + i*max_ploidy; 1006 * for (j=0; j<max_ploidy; j++) 1007 * { 1008 * // if true, the sample has smaller ploidy 1009 * if ( ptr[j]==bcf_int32_vector_end ) break; 1010 * 1011 * // missing allele 1012 * if ( bcf_gt_is_missing(ptr[j]) ) continue; 1013 * 1014 * // the VCF 0-based allele index 1015 * int allele_index = bcf_gt_allele(ptr[j]); 1016 * 1017 * // is phased? 1018 * int is_phased = bcf_gt_is_phased(ptr[j]); 1019 * 1020 * // .. do something .. 1021 * } 1022 * } 1023 * free(gt_arr); 1024 * 1025 */ 1026 int bcf_get_format_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, char ***dst, int *ndst); 1027 /// ditto 1028 int bcf_get_format_values(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst, int type); 1029 pragma(inline, true) { 1030 auto bcf_get_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1031 { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); } 1032 auto bcf_get_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1033 { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); } 1034 auto bcf_get_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1035 { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); } 1036 auto bcf_get_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1037 { return bcf_get_format_values(hdr, line, toStringz("GT"c), cast(void**) dst, ndst, BCF_HT_INT); } 1038 } 1039 1040 1041 1042 /************************************************************************** 1043 * Helper functions 1044 **************************************************************************/ 1045 1046 /** 1047 * bcf_hdr_id2int() - Translates string into numeric ID 1048 * bcf_hdr_int2id() - Translates numeric ID into string 1049 * @type: one of BCF_DT_ID, BCF_DT_CTG, BCF_DT_SAMPLE 1050 * @id: tag name, such as: PL, DP, GT, etc. 1051 * 1052 * Returns -1 if string is not in dictionary, otherwise numeric ID which identifies 1053 * fields in BCF records. 1054 */ 1055 int bcf_hdr_id2int(const(bcf_hdr_t) *hdr, int type, const(char) *id); 1056 /// ditto 1057 pragma(inline, true) 1058 auto bcf_hdr_int2id(const(bcf_hdr_t) *hdr, int type, int int_id) 1059 { return hdr.id[type][int_id].key; } 1060 //#define bcf_hdr_int2id(hdr,type,int_id) ((hdr)->id[type][int_id].key) 1061 1062 /** 1063 * bcf_hdr_name2id() - Translates sequence names (chromosomes) into numeric ID 1064 * bcf_hdr_id2name() - Translates numeric ID to sequence name 1065 */ 1066 pragma(inline, true) int bcf_hdr_name2id(const(bcf_hdr_t) *hdr, const(char) *id) { return bcf_hdr_id2int(hdr, BCF_DT_CTG, id); } // @suppress(dscanner.style.long_line) 1067 /// ditto 1068 pragma(inline, true) const(char) *bcf_hdr_id2name(const(bcf_hdr_t) *hdr, int rid) { return hdr.id[BCF_DT_CTG][rid].key; } // @suppress(dscanner.style.long_line) 1069 /// ditto 1070 pragma(inline, true) const(char) *bcf_seqname(const(bcf_hdr_t) *hdr, bcf1_t *rec) { return hdr.id[BCF_DT_CTG][rec.rid].key; } // @suppress(dscanner.style.long_line) 1071 1072 /** 1073 * bcf_hdr_id2*() - Macros for accessing bcf_idinfo_t 1074 * @type: one of BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT 1075 * @int_id: return value of bcf_hdr_id2int, must be >=0 1076 * 1077 * The returned values are: 1078 * bcf_hdr_id2length .. whether the number of values is fixed or variable, one of BCF_VL_* 1079 * bcf_hdr_id2number .. the number of values, 0xfffff for variable length fields 1080 * bcf_hdr_id2type .. the field type, one of BCF_HT_* 1081 * bcf_hdr_id2coltype .. the column type, one of BCF_HL_* 1082 * 1083 * Notes: Prior to using the macros, the presence of the info should be 1084 * tested with bcf_hdr_idinfo_exists(). 1085 */ 1086 // TODO: for dict_type and col_type use ENUMs 1087 pragma(inline, true) { 1088 auto bcf_hdr_id2length(const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>8 & 0xf); } // @suppress(dscanner.style.long_line) 1089 /// ditto 1090 auto bcf_hdr_id2number(const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>12); } // @suppress(dscanner.style.long_line) 1091 /// ditto 1092 uint32_t bcf_hdr_id2type(const(bcf_hdr_t) *hdr, int type, int int_id) { return cast(uint32_t)((hdr).id[BCF_DT_ID][int_id].val.info[type]>>4 & 0xf); } // @suppress(dscanner.style.long_line) 1093 /// ditto 1094 uint32_t bcf_hdr_id2coltype(const(bcf_hdr_t) *hdr, int type, int int_id){ return cast(uint32_t)((hdr).id[BCF_DT_ID][int_id].val.info[type] & 0xf); } // @suppress(dscanner.style.long_line) 1095 /// ditto 1096 auto bcf_hdr_idinfo_exists(const(bcf_hdr_t) *hdr, int type, int int_id) { return ((int_id<0 || bcf_hdr_id2coltype(hdr,type,int_id)==0xf) ? 0 : 1); } // @suppress(dscanner.style.long_line) 1097 /// ditto 1098 auto bcf_hdr_id2hrc(const(bcf_hdr_t) *hdr, int dict_type, int col_type, int int_id) 1099 { return ((hdr).id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val.hrec[(dict_type)==BCF_DT_CTG?0:(col_type)]); } // @suppress(dscanner.style.long_line) 1100 } 1101 1102 /// Convert BCF FORMAT data to string form 1103 /** 1104 * @param s kstring to write into 1105 * @param n number of items in @p data 1106 * @param type type of items in @p data 1107 * @param data BCF format data 1108 * @return 0 on success 1109 * -1 if out of memory 1110 */ 1111 int bcf_fmt_array(kstring_t *s, int n, int type, void *data); 1112 /// ditto 1113 uint8_t *bcf_fmt_sized_array(kstring_t *s, uint8_t *ptr); 1114 1115 /// Encode a variable-length char array in BCF format 1116 /** 1117 * @param s kstring to write into 1118 * @param l length of input 1119 * @param a input data to encode 1120 * @return 0 on success; < 0 on error 1121 */ 1122 int bcf_enc_vchar(kstring_t *s, int l, const(char) *a); 1123 1124 /// Encode a variable-length integer array in BCF format 1125 /** 1126 * @param s kstring to write into 1127 * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) 1128 * @param a input data to encode 1129 * @param wsize vector length (<= 0 is equivalent to @p n) 1130 * @return 0 on success; < 0 on error 1131 * @note @p n should be an exact multiple of @p wsize 1132 */ 1133 int bcf_enc_vint(kstring_t *s, int n, int32_t *a, int wsize); 1134 1135 /// Encode a variable-length float array in BCF format 1136 /** 1137 * @param s kstring to write into 1138 * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) 1139 * @param a input data to encode 1140 * @return 0 on success; < 0 on error 1141 */ 1142 int bcf_enc_vfloat(kstring_t *s, int n, float *a); 1143 1144 1145 /************************************************************************** 1146 * BCF index 1147 * 1148 * Note that these functions work with BCFs only. See synced_bcf_reader.h 1149 * which provides (amongst other things) an API to work transparently with 1150 * both indexed BCFs and VCFs. 1151 **************************************************************************/ 1152 1153 alias bcf_itr_destroy = hts_itr_destroy; 1154 1155 pragma(inline, true) { 1156 /// Generate an iterator for an integer-based range query 1157 auto bcf_itr_queryi(const(hts_idx_t) *idx, int tid, int beg, int end) 1158 { return hts_itr_query(idx, tid, beg, end, &bcf_readrec); } 1159 1160 /// Generate an iterator for a string-based range query 1161 auto bcf_itr_querys(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, const(char) *s) 1162 { return hts_itr_querys(idx, s, cast(hts_name2id_f) &bcf_hdr_name2id, cast(void *) hdr, 1163 &hts_itr_query, &bcf_readrec); } 1164 1165 /// Iterate through the range 1166 /// r should (probably) point to your VCF (BCF) row structure 1167 /// TODO: attempt to define parameter r as bcf1_t *, which is what I think it should be 1168 int bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) { 1169 if (htsfp.is_bgzf) 1170 return hts_itr_next(htsfp.fp.bgzf, itr, r, null); 1171 1172 hts_log_error(__FUNCTION__,"Only bgzf compressed files can be used with iterators"); 1173 errno = EINVAL; 1174 return -2; 1175 } 1176 1177 /// Load a BCF index 1178 /** @param fn BCF file name 1179 @return The index, or NULL if an error occurred. 1180 @note This only works for BCF files. Consider synced_bcf_reader instead 1181 which works for both BCF and VCF. 1182 */ 1183 auto bcf_index_load(const(char) *fn) { return hts_idx_load(fn, HTS_FMT_CSI); } 1184 1185 /// Get a list (char **) of sequence names from the index -- free only the array, not the values 1186 auto bcf_index_seqnames(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, int *nptr) 1187 { return hts_idx_seqnames(idx, nptr, cast(hts_id2name_f) &bcf_hdr_id2name, cast(void *) hdr); } 1188 } 1189 1190 /// Load a BCF index from a given index file name 1191 /** @param fn Input BAM/BCF/etc filename 1192 @param fnidx The input index filename 1193 @return The index, or NULL if an error occurred. 1194 @note This only works for BCF files. Consider synced_bcf_reader instead 1195 which works for both BCF and VCF. 1196 */ 1197 hts_idx_t *bcf_index_load2(const(char) *fn, const(char) *fnidx); 1198 1199 /// Load a BCF index from a given index file name 1200 /** @param fn Input BAM/BCF/etc filename 1201 @param fnidx The input index filename 1202 @param flags Flags to alter behaviour (see description) 1203 @return The index, or NULL if an error occurred. 1204 @note This only works for BCF files. Consider synced_bcf_reader instead 1205 which works for both BCF and VCF. 1206 1207 The @p flags parameter can be set to a combination of the following 1208 values: 1209 1210 HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes 1211 HTS_IDX_SILENT_FAIL Fail silently if the index is not present 1212 1213 Equivalent to hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags); 1214 */ 1215 hts_idx_t *bcf_index_load3(const(char) *fn, const(char) *fnidx, int flags); 1216 1217 /** 1218 * bcf_index_build() - Generate and save an index file 1219 * @fn: Input VCF(compressed)/BCF filename 1220 * @min_shift: log2(width of the smallest bin), e.g. a value of 14 1221 * imposes a 16k base lower limit on the width of index bins. 1222 * Positive to generate CSI, or 0 to generate TBI. However, a small 1223 * value of min_shift would create a large index, which would lead to 1224 * reduced performance when using the index. A recommended value is 14. 1225 * For BCF files, only the CSI index can be generated. 1226 * 1227 * Returns 0 if successful, or negative if an error occurred. 1228 * 1229 * List of error codes: 1230 * -1 .. indexing failed 1231 * -2 .. opening @fn failed 1232 * -3 .. format not indexable 1233 * -4 .. failed to create and/or save the index 1234 */ 1235 int bcf_index_build(const(char) *fn, int min_shift); 1236 1237 /** 1238 * bcf_index_build2() - Generate and save an index to a specific file 1239 * @fn: Input VCF/BCF filename 1240 * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn 1241 * @min_shift: Positive to generate CSI, or 0 to generate TBI 1242 * 1243 * Returns 0 if successful, or negative if an error occurred. 1244 * 1245 * List of error codes: 1246 * -1 .. indexing failed 1247 * -2 .. opening @fn failed 1248 * -3 .. format not indexable 1249 * -4 .. failed to create and/or save the index 1250 */ 1251 int bcf_index_build2(const(char) *fn, const(char) *fnidx, int min_shift); 1252 1253 /** 1254 * bcf_index_build3() - Generate and save an index to a specific file 1255 * @fn: Input VCF/BCF filename 1256 * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn 1257 * @min_shift: Positive to generate CSI, or 0 to generate TBI 1258 * @n_threads: Number of VCF/BCF decoder threads 1259 * 1260 * Returns 0 if successful, or negative if an error occurred. 1261 * 1262 * List of error codes: 1263 * -1 .. indexing failed 1264 * -2 .. opening @fn failed 1265 * -3 .. format not indexable 1266 * -4 .. failed to create and/or save the index 1267 */ 1268 int bcf_index_build3(const(char) *fn, const(char) *fnidx, int min_shift, int n_threads); 1269 1270 /// Initialise fp->idx for the current format type, for VCF and BCF files. 1271 /** @param fp File handle for the data file being written. 1272 @param h BCF header structured (needed for BAI and CSI). 1273 @param min_shift CSI bin size (CSI default is 14). 1274 @param fnidx Filename to write index to. This pointer must remain valid 1275 until after bcf_idx_save is called. 1276 @return 0 on success, <0 on failure. 1277 @note This must be called after the header has been written, but before 1278 any other data. 1279 */ 1280 int bcf_idx_init(htsFile *fp, bcf_hdr_t *h, int min_shift, const(char) *fnidx); 1281 1282 /// Writes the index initialised with bcf_idx_init to disk. 1283 /** @param fp File handle for the data file being written. 1284 @return 0 on success, <0 on failure. 1285 */ 1286 int bcf_idx_save(htsFile *fp); 1287 1288 /******************* 1289 * Typed value I/O * 1290 *******************/ 1291 1292 /** 1293 Note that in contrast with BCFv2.1 specification, HTSlib implementation 1294 allows missing values in vectors. For integer types, the values 0x80, 1295 0x8000, 0x80000000 are interpreted as missing values and 0x81, 0x8001, 1296 0x80000001 as end-of-vector indicators. Similarly for floats, the value of 1297 0x7F800001 is interpreted as a missing value and 0x7F800002 as an 1298 end-of-vector indicator. 1299 Note that the end-of-vector byte is not part of the vector. 1300 1301 This trial BCF version (v2.2) is compatible with the VCF specification and 1302 enables to handle correctly vectors with different ploidy in presence of 1303 missing values. 1304 */ 1305 enum int8_t bcf_int8_vector_end = (-127); /* INT8_MIN + 1 */ 1306 /// ditto 1307 enum int16_t bcf_int16_vector_end = (-32_767); /* INT16_MIN + 1 */ 1308 /// ditto 1309 enum int32_t bcf_int32_vector_end = (-2_147_483_647); /* INT32_MIN + 1 */ 1310 /// ditto 1311 enum int64_t bcf_int64_vector_end = (-9_223_372_036_854_775_807L); /* INT64_MIN + 1 */ 1312 /// ditto 1313 enum char bcf_str_vector_end = 0; //#define bcf_str_vector_end 0 1314 /// ditto 1315 enum int8_t bcf_int8_missing = (-128); /* INT8_MIN */ 1316 /// ditto 1317 enum int16_t bcf_int16_missing = (-32_767-1); /* INT16_MIN */ 1318 /// ditto 1319 enum int32_t bcf_int32_missing = (-2_147_483_647-1); /* INT32_MIN */ 1320 /// ditto 1321 enum int64_t bcf_int64_missing = (-9_223_372_036_854_775_807L - 1L); /* INT64_MIN */ 1322 /// ditto 1323 enum char bcf_str_missing = 0x07; // #define bcf_str_missing 0x07 1324 1325 // Limits on BCF values stored in given types. Max values are the same 1326 // as for the underlying type. Min values are slightly different as 1327 // the last 8 values for each type were reserved by BCFv2.2. 1328 enum int8_t BCF_MAX_BT_INT8 = (0x7f); /* INT8_MAX */ 1329 enum int16_t BCF_MAX_BT_INT16 = (0x7fff); /* INT16_MAX */ 1330 enum int32_t BCF_MAX_BT_INT32 = (0x7fffffff); /* INT32_MAX */ 1331 enum int8_t BCF_MIN_BT_INT8 = (-120); /* INT8_MIN + 8 */ 1332 enum int16_t BCF_MIN_BT_INT16 = (-32_760); /* INT16_MIN + 8 */ 1333 enum int32_t BCF_MIN_BT_INT32 = (-2_147_483_640); /* INT32_MIN + 8 */ 1334 1335 extern __gshared uint32_t bcf_float_vector_end; /// ditto 1336 extern __gshared uint32_t bcf_float_missing; /// ditto 1337 1338 version(LDC) pragma(inline, true): 1339 version(GNU) pragma(inline, true): 1340 /** u wot */ 1341 void bcf_float_set(float *ptr, uint32_t value) 1342 { 1343 union U { uint32_t i; float f; } 1344 U u; 1345 u.i = value; 1346 *ptr = u.f; 1347 } 1348 1349 /// float vector macros 1350 void bcf_float_set_vector_end(float x) { bcf_float_set(&x, bcf_float_vector_end); } 1351 /// ditto 1352 void bcf_float_set_missing(float x) { bcf_float_set(&x, bcf_float_missing); } 1353 1354 /** u wot */ 1355 pragma(inline, true) 1356 int bcf_float_is_missing(float f) 1357 { 1358 union U { uint32_t i; float f; } 1359 U u; 1360 u.f = f; 1361 return u.i==bcf_float_missing ? 1 : 0; 1362 } 1363 /// ditto 1364 pragma(inline, true) 1365 int bcf_float_is_vector_end(float f) 1366 { 1367 union U { uint32_t i; float f; } 1368 U u; 1369 u.f = f; 1370 return u.i==bcf_float_vector_end ? 1 : 0; 1371 } 1372 1373 /// (Undocumented) Format GT field 1374 pragma(inline, true) 1375 int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) 1376 { 1377 uint32_t e = 0; 1378 void branch(T)() // gets a closure over e (was #define macro) 1379 if (is(T == int8_t) || is(T == int16_t) || is(T == int32_t)) 1380 { 1381 static if (is(T == int8_t)) 1382 auto vector_end = bcf_int8_vector_end; 1383 else static if (is(T == int16_t)) 1384 auto vector_end = bcf_int16_vector_end; 1385 else 1386 auto vector_end = bcf_int32_vector_end; 1387 1388 T *ptr = cast(T*) (fmt.p + (isample * fmt.size)); 1389 for (int i=0; i<fmt.n && ptr[i] != vector_end; i++) 1390 { 1391 if ( i ) e |= kputc("/|"[ptr[i]&1], str) < 0; 1392 if ( !(ptr[i]>>1) ) e |= kputc('.', str) < 0; 1393 else e |= kputw((ptr[i]>>1) - 1, str) < 0; 1394 } 1395 if (i == 0) e |= kputc('.', str) < 0; 1396 } 1397 switch (fmt.type) { 1398 case BCF_BT_INT8: branch!int8_t; break; 1399 case BCF_BT_INT16: branch!int16_t; break; 1400 case BCF_BT_INT32: branch!int32_t; break; 1401 case BCF_BT_NULL: e |= kputc('.', str) < 0; break; 1402 default: hts_log_error("Unexpected type %d", fmt.type); return -2; 1403 } 1404 1405 return e == 0 ? 0 : -1; 1406 } 1407 1408 /// ditto 1409 /+void bcf_enc_size(kstring_t *s, int size, int type) 1410 { 1411 if (size >= 15) { 1412 kputc(15<<4|type, s); 1413 if (size >= 128) { 1414 if (size >= 32768) { 1415 int32_t x = size; 1416 kputc(1<<4|BCF_BT_INT32, s); 1417 kputsn(cast(char*)&x, 4, s); 1418 } else { 1419 int16_t x = size; 1420 kputc(1<<4|BCF_BT_INT16, s); 1421 kputsn(cast(char*)&x, 2, s); 1422 } 1423 } else { 1424 kputc(1<<4|BCF_BT_INT8, s); 1425 kputc(size, s); 1426 } 1427 } else kputc(size<<4|type, s); 1428 }+/ 1429 /// Undocumented Encode size? 1430 pragma(inline, true) 1431 int bcf_enc_size(kstring_t *s, int size, int type) 1432 { 1433 uint32_t e = 0; 1434 if (size >= 15) { 1435 e |= kputc(15<<4|type, s) < 0; 1436 if (size >= 128) { 1437 if (size >= 32_768) { 1438 int32_t x = size; 1439 e |= kputc(1<<4|BCF_BT_INT32, s) < 0; 1440 e |= kputsn(cast(char*)&x, 4, s) < 0; 1441 } else { 1442 int16_t x = size; 1443 e |= kputc(1<<4|BCF_BT_INT16, s) < 0; 1444 e |= kputsn(cast(char*)&x, 2, s) < 0; 1445 } 1446 } else { 1447 e |= kputc(1<<4|BCF_BT_INT8, s) < 0; 1448 e |= kputc(size, s) < 0; 1449 } 1450 } else e |= kputc(size<<4|type, s) < 0; 1451 return e == 0 ? 0 : -1; 1452 } 1453 1454 1455 /// Undocumented Encode integer type? 1456 pragma(inline, true) 1457 int bcf_enc_inttype(long x) 1458 { 1459 if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) return BCF_BT_INT8; 1460 if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) return BCF_BT_INT16; 1461 return BCF_BT_INT32; 1462 } 1463 1464 /// Undocumented Encode integer variant 1 1465 pragma(inline, true) 1466 int bcf_enc_int1(kstring_t *s, int32_t x) 1467 { 1468 uint32_t e = 0; 1469 if (x == bcf_int32_vector_end) { 1470 e |= bcf_enc_size(s, 1, BCF_BT_INT8); 1471 e |= kputc(bcf_int8_vector_end, s) < 0; 1472 } else if (x == bcf_int32_missing) { 1473 e |= bcf_enc_size(s, 1, BCF_BT_INT8); 1474 e |= kputc(bcf_int8_missing, s) < 0; 1475 } else if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) { 1476 e |= bcf_enc_size(s, 1, BCF_BT_INT8); 1477 e |= kputc(x, s) < 0; 1478 } else if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) { 1479 int16_t z = x; 1480 e |= bcf_enc_size(s, 1, BCF_BT_INT16); 1481 e |= kputsn(cast(char*)&z, 2, s) < 0; 1482 } else { 1483 int32_t z = x; 1484 e |= bcf_enc_size(s, 1, BCF_BT_INT32); 1485 e |= kputsn(cast(char*)&z, 4, s) < 0; 1486 } 1487 return e == 0 ? 0 : -1; 1488 } 1489 1490 1491 /// Return the value of a single typed integer. 1492 /** @param p Pointer to input data block. 1493 @param type One of the BCF_BT_INT* type codes 1494 @param[out] q Location to store an updated value for p 1495 @return The integer value, or zero if @p type is not valid. 1496 1497 If @p type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero 1498 will be returned and @p *q will not be updated. Otherwise, the integer 1499 value will be returned and @p *q will be set to the memory location 1500 immediately following the integer value. 1501 1502 Cautious callers can detect invalid type codes by checking that *q has 1503 actually been updated. 1504 */ 1505 1506 /// NOTE: Need hts_endian for le_to_i* functions 1507 /+ 1508 pragma(inline, true) 1509 int64_t bcf_dec_int1(const(uint8_t) *p, int type, uint8_t **q) 1510 { 1511 if (type == BCF_BT_INT8) { 1512 *q = cast(uint8_t*)p + 1; 1513 return le_to_i8(p); 1514 } else if (type == BCF_BT_INT16) { 1515 *q = cast(uint8_t*)p + 2; 1516 return le_to_i16(p); 1517 } else if (type == BCF_BT_INT32) { 1518 *q = cast(uint8_t*)p + 4; 1519 return le_to_i32(p); 1520 } else if (type == BCF_BT_INT64) { 1521 *q = cast(uint8_t*)p + 4; 1522 return le_to_i64(p); 1523 } else { // Invalid type. 1524 return 0; 1525 } 1526 }+/ 1527 1528 /// Return the value of a single typed integer from a byte stream. 1529 /** @param p Pointer to input data block. 1530 @param[out] q Location to store an updated value for p 1531 @return The integer value, or zero if the type code was not valid. 1532 1533 Reads a one-byte type code from @p p, and uses it to decode an integer 1534 value from the following bytes in @p p. 1535 1536 If the type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero 1537 will be returned and @p *q will unchanged. Otherwise, the integer value will 1538 be returned and @p *q will be set to the memory location immediately following 1539 the integer value. 1540 1541 Cautious callers can detect invalid type codes by checking that *q has 1542 actually been updated. 1543 */ 1544 /+static inline int64_t bcf_dec_typed_int1(const uint8_t *p, uint8_t **q) 1545 { 1546 return bcf_dec_int1(p + 1, *p&0xf, q); 1547 } 1548 1549 static inline int32_t bcf_dec_size(const uint8_t *p, uint8_t **q, int *type) 1550 { 1551 *type = *p & 0xf; 1552 if (*p>>4 != 15) { 1553 *q = cast(uint8_t*)p + 1; 1554 return *p>>4; 1555 } else return bcf_dec_typed_int1(p + 1, q); 1556 }+/