1 /// @file htslib/vcf.h 2 /// High-level VCF/BCF variant calling file operations. 3 /* 4 Copyright (C) 2012, 2013 Broad Institute. 5 Copyright (C) 2012-2020 Genome Research Ltd. 6 7 Author: Heng Li <lh3@sanger.ac.uk> 8 9 Permission is hereby granted, free of charge, to any person obtaining a copy 10 of this software and associated documentation files (the "Software"), to deal 11 in the Software without restriction, including without limitation the rights 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 copies of the Software, and to permit persons to whom the Software is 14 furnished to do so, subject to the following conditions: 15 16 The above copyright notice and this permission notice shall be included in 17 all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 DEALINGS IN THE SOFTWARE. */ 26 27 /* 28 todo: 29 - make the function names consistent 30 - provide calls to abstract away structs as much as possible 31 */ 32 /// Section numbers refer to VCF Specification v4.2: https://samtools.github.io/hts-specs/VCFv4.2.pdf 33 module htslib.vcf; 34 35 import std.bitmanip; 36 import std.string: toStringz; 37 import core.stdc.errno : errno, EINVAL; 38 import core.stdc.config; 39 40 import htslib.hts; 41 import htslib.hts_log; 42 import htslib.hts_endian; 43 import htslib.kstring : kstring_t; 44 import htslib.bgzf : BGZF; 45 46 @system: 47 extern (C): 48 @nogc nothrow { 49 50 /* Included only for backwards compatibility with e.g. bcftools 1.10 */ 51 52 /***************** 53 * Header struct * 54 *****************/ 55 56 enum BCF_HL_FLT = 0; /// header line: FILTER 57 enum BCF_HL_INFO = 1;/// header line: INFO 58 enum BCF_HL_FMT = 2; /// header line: FORMAT 59 enum BCF_HL_CTG = 3; /// header line: contig 60 enum BCF_HL_STR = 4; /// header line: structured header line TAG=<A=..,B=..> 61 enum BCF_HL_GEN = 5; /// header line: generic header line 62 63 enum BCF_HT_FLAG = 0; /// header type: FLAG// header type 64 enum BCF_HT_INT = 1; /// header type: INTEGER 65 enum BCF_HT_REAL = 2; /// header type: REAL 66 enum BCF_HT_STR = 3; /// header type: STRING 67 enum BCF_HT_LONG = BCF_HT_INT | 0x100; // BCF_HT_INT, but for int64_t values; VCF only! 68 69 enum BCF_VL_FIXED = 0; /// variable length: fixed (?)// variable length 70 enum BCF_VL_VAR = 1; /// variable length: variable 71 enum BCF_VL_A = 2; /// variable length: ? 72 enum BCF_VL_G = 3; /// variable length: ? 73 enum BCF_VL_R = 4; /// variable length: ? 74 75 /* === Dictionary === 76 77 The header keeps three dictionaries. The first keeps IDs in the 78 "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths 79 in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[] 80 is the actual hash table, which is opaque to the end users. In the hash 81 table, the key is the ID or sample name as a C string and the value is a 82 bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash 83 table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the 84 size of the hash table or, equivalently, the length of the id[] arrays. 85 */ 86 87 enum BCF_DT_ID = 0; /// dictionary type: ID 88 enum BCF_DT_CTG = 1; /// dictionary type: CONTIG 89 enum BCF_DT_SAMPLE = 2;/// dictionary type: SAMPLE 90 91 /// Structured representation of a header line (§1.2) 92 struct bcf_hrec_t // @suppress(dscanner.style.phobos_naming_convention) 93 { 94 int type; /// One of the BCF_HL_* type 95 char* key; /// The part before '=', i.e. FILTER/INFO/FORMAT/contig/fileformat etc. 96 char* value; /// Set only for generic lines, NULL for FILTER/INFO, etc. 97 int nkeys; /// Number of structured fields 98 char** keys; /// The key=value pairs 99 char** vals; /// The key=value pairs 100 } 101 102 /// ID Dictionary entry 103 struct bcf_idinfo_t 104 { 105 ulong[3] info; /** stores Number:20, var:4, Type:4, ColType:4 in info[0..2] 106 for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG */ 107 bcf_hrec_t*[3] hrec; /// pointers to header lines for [FILTER, INFO, FORMAT] in order 108 int id; /// primary key 109 } 110 111 /// ID Dictionary k/v 112 struct bcf_idpair_t // @suppress(dscanner.style.phobos_naming_convention) 113 { 114 const(char)* key; /// header dictionary FILTER/INFO/FORMAT ID key 115 const(bcf_idinfo_t)* val; /// header dictionary FILTER/INFO/FORMAT ID entry 116 } 117 118 /// Structured repreentation of VCF header (§1.2) 119 /// Note that bcf_hdr_t structs must always be created via bcf_hdr_init() 120 struct bcf_hdr_t // @suppress(dscanner.style.phobos_naming_convention) 121 { 122 int[3] n; /// n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI) 123 bcf_idpair_t*[3] id;/// ID dictionary {FILTER/INFO/FORMAT, contig, sample} ID key/entry 124 void*[3] dict; /// hash table 125 char** samples; /// ?list of samples 126 bcf_hrec_t** hrec; /// Structured representation of this header line 127 int nhrec; /// # of header records 128 int dirty; /// ? 129 int ntransl; /// for bcf_translate() 130 int*[2] transl; /// for bcf_translate() 131 int nsamples_ori; /// for bcf_hdr_set_samples() 132 ubyte* keep_samples; /// ? 133 kstring_t mem; /// ? 134 int[3] m; /// m: allocated size of the dictionary block in use (see n above) 135 } 136 137 /// Lookup table used in bcf_record_check 138 /// MAINTAINER: in C header is [] 139 extern __gshared ubyte[] bcf_type_shift; 140 141 /************** 142 * VCF record * 143 **************/ 144 145 enum BCF_BT_NULL = 0; /// null 146 enum BCF_BT_INT8 = 1;/// int8 147 enum BCF_BT_INT16 = 2;/// int16 148 enum BCF_BT_INT32 = 3;/// int32 149 enum BCF_BT_INT64 = 4;/// Unofficial, for internal use only per htslib headers 150 enum BCF_BT_FLOAT = 5; /// float (32?) 151 enum BCF_BT_CHAR = 7;/// char (8 bit) 152 153 enum VCF_REF = 0; /// ref (e.g. in a gVCF) 154 enum VCF_SNP = 1;/// SNP 155 enum VCF_MNP = 2;/// MNP 156 enum VCF_INDEL = 4;/// INDEL 157 enum VCF_OTHER = 8;/// other (e.g. SV) 158 enum VCF_BND = 16; // /// breakend 159 enum VCF_OVERLAP = 32;/// overlapping deletion, ALT=* 160 enum VCF_INS = 64; 161 enum VCF_DEL = 128; 162 enum VCF_ANY = VCF_SNP | VCF_MNP | VCF_INDEL | VCF_OTHER | VCF_BND | VCF_OVERLAP | VCF_INS | VCF_DEL; 163 164 /// variant type record embedded in bcf_dec_t 165 /// variant type and the number of bases affected, negative for deletions 166 struct bcf_variant_t // @suppress(dscanner.style.phobos_naming_convention) 167 { 168 int type; /// variant type and the number of bases affected, negative for deletions 169 int n; /// variant type and the number of bases affected, negative for deletions 170 } 171 172 /// FORMAT field data (§1.4.2 Genotype fields) 173 struct bcf_fmt_t // @suppress(dscanner.style.phobos_naming_convention) 174 { 175 import std.bitmanip : bitfields; 176 177 int id; /// id: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$id].key 178 int n;/// n: number of values per-sample; size: number of bytes per-sample; type: one of BCF_BT_* types 179 int size;/// size: number of bytes per-sample; type: one of BCF_BT_* types 180 int type; /// type: one of BCF_BT_* types 181 ubyte* p; /// same as vptr and vptr_* in bcf_info_t below 182 uint p_len; 183 184 mixin(bitfields!( 185 uint, "p_off", 31, 186 bool, "p_free", 1)); 187 } 188 189 /// INFO field data (§1.4.1 Fixed fields, (8) INFO) 190 struct bcf_info_t // @suppress(dscanner.style.phobos_naming_convention) 191 { 192 import std.bitmanip : bitfields; 193 194 int key; /// key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key 195 int type; /// type: one of BCF_BT_* types 196 197 /// integer value 198 /// float value 199 union V1 200 { 201 long i; 202 float f; 203 } 204 205 V1 v1; /// only set if $len==1; for easier access 206 ubyte* vptr; /// pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes 207 uint vptr_len; 208 209 mixin(bitfields!( 210 uint, "vptr_off", 31, 211 uint, "vptr_free", 1)); /// length of the vptr block or, when set, of the vptr_mod block, excluding offset 212 /// vptr offset, i.e., the size of the INFO key plus size+type bytes 213 /// indicates that vptr-vptr_off must be freed; set only when modified and the new 214 /// data block is bigger than the original 215 int len; /// vector length, 1 for scalars 216 } 217 218 enum BCF1_DIRTY_ID = 1; /// ID was edited 219 enum BCF1_DIRTY_ALS = 2; /// Allele(s) was edited 220 enum BCF1_DIRTY_FLT = 4; /// FILTER was edited 221 enum BCF1_DIRTY_INF = 8; /// INFO was edited 222 223 /// Variable-length data from a VCF record 224 struct bcf_dec_t // @suppress(dscanner.style.phobos_naming_convention) 225 { 226 /// allocated size (high-water mark); do not change 227 int m_fmt; 228 int m_info; 229 int m_id; 230 int m_als; 231 int m_allele; 232 int m_flt; 233 int n_flt; /// Number of FILTER fields 234 int* flt; /// FILTER keys in the dictionary 235 char* id;/// ID 236 char* als; /// REF+ALT block (\0-seperated) 237 char** allele; /// allele[0] is the REF (allele[] pointers to the als block); all null terminated 238 bcf_info_t* info; /// INFO 239 bcf_fmt_t* fmt; /// FORMAT and individual sample 240 bcf_variant_t* var; /// $var and $var_type set only when set_variant_types called 241 int n_var;/// variant number(???) 242 int var_type;/// variant type (TODO: make enum) 243 int shared_dirty; /// if set, shared.s must be recreated on BCF output (TODO: make enum) 244 int indiv_dirty; /// if set, indiv.s must be recreated on BCF output (TODO: make enum) 245 } 246 247 enum BCF_ERR_CTG_UNDEF = 1; /// BCF error: undefined contig 248 enum BCF_ERR_TAG_UNDEF = 2;/// BCF error: undefined tag 249 enum BCF_ERR_NCOLS = 4;/// BCF error: 250 enum BCF_ERR_LIMITS = 8;/// BCF error: 251 enum BCF_ERR_CHAR = 16;/// BCF error: 252 enum BCF_ERR_CTG_INVALID = 32;/// BCF error: 253 enum BCF_ERR_TAG_INVALID = 64;/// BCF error: 254 255 /** 256 The bcf1_t structure corresponds to one VCF/BCF line. Reading from VCF file 257 is slower because the string is first to be parsed, packed into BCF line 258 (done in vcf_parse), then unpacked into internal bcf1_t structure. If it 259 is known in advance that some of the fields will not be required (notably 260 the sample columns), parsing of these can be skipped by setting max_unpack 261 appropriately. 262 Similarly, it is fast to output a BCF line because the columns (kept in 263 shared.s, indiv.s, etc.) are written directly by bcf_write, whereas a VCF 264 line must be formatted in vcf_format. 265 */ 266 struct bcf1_t 267 { 268 import std.bitmanip : bitfields; 269 270 hts_pos_t pos; /// POS 271 hts_pos_t rlen; /// length of REF 272 int rid; /// CHROM 273 float qual; 274 275 mixin(bitfields!( 276 uint, "n_info", 16, 277 uint, "n_allele", 16, 278 uint, "n_fmt", 8, 279 uint, "n_sample", 24)); /// QUAL 280 281 kstring_t shared_; 282 kstring_t indiv; 283 bcf_dec_t d; /// lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack() 284 int max_unpack; /// Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields won't be needed 285 int unpacked; /// remember what has been unpacked to allow calling bcf_unpack() repeatedly without redoing the work 286 int[3] unpack_size; /// the original block size of ID, REF+ALT and FILTER 287 int errcode; /// one of BCF_ERR_* codes 288 } 289 290 /******* 291 * API * 292 *******/ 293 294 /*********************************************************************** 295 * BCF and VCF I/O 296 * 297 * A note about naming conventions: htslib internally represents VCF 298 * records as bcf1_t data structures, therefore most functions are 299 * prefixed with bcf_. There are a few exceptions where the functions must 300 * be aware of both BCF and VCF worlds, such as bcf_parse vs vcf_parse. In 301 * these cases, functions prefixed with bcf_ are more general and work 302 * with both BCF and VCF. 303 * 304 ***********************************************************************/ 305 306 /** These macros are defined only for consistency with other parts of htslib */ 307 alias bcf_init1 = bcf_init; 308 alias bcf_read1 = bcf_read; 309 alias vcf_read1 = vcf_read; 310 alias bcf_write1 = bcf_write; 311 alias vcf_write1 = vcf_write; 312 alias bcf_destroy1 = bcf_destroy; 313 alias bcf_empty1 = bcf_empty; 314 alias vcf_parse1 = vcf_parse; 315 alias bcf_clear1 = bcf_clear; 316 alias vcf_format1 = vcf_format; 317 318 /** 319 * bcf_hdr_init() - create an empty BCF header. 320 * @param mode "r" or "w" 321 * 322 * When opened for writing, the mandatory fileFormat and 323 * FILTER=PASS lines are added automatically. 324 * 325 * The bcf_hdr_t struct returned by a successful call should be freed 326 * via bcf_hdr_destroy() when it is no longer needed. 327 */ 328 bcf_hdr_t* bcf_hdr_init(const(char)* mode); 329 330 /** Destroy a BCF header struct */ 331 void bcf_hdr_destroy(bcf_hdr_t* h); 332 333 /** Allocate and initialize a bcf1_t object. 334 * 335 * The bcf1_t struct returned by a successful call should be freed 336 * via bcf_destroy() when it is no longer needed. 337 */ 338 bcf1_t* bcf_init(); 339 340 /** Deallocate a bcf1_t object */ 341 void bcf_destroy(bcf1_t* v); 342 343 /** 344 * Same as bcf_destroy() but frees only the memory allocated by bcf1_t, 345 * not the bcf1_t object itself. 346 */ 347 void bcf_empty(bcf1_t* v); 348 349 /** 350 * Make the bcf1_t object ready for next read. Intended mostly for 351 * internal use, the user should rarely need to call this function 352 * directly. 353 */ 354 void bcf_clear(bcf1_t* v); 355 356 /** bcf_open and vcf_open mode: please see hts_open() in hts.h */ 357 alias vcfFile = htsFile; 358 alias bcf_open = hts_open; 359 alias vcf_open = hts_open; 360 alias bcf_flush = hts_flush; 361 alias bcf_close = hts_close; 362 alias vcf_close = hts_close; 363 364 /// Read a VCF or BCF header 365 /** @param fp The file to read the header from 366 @return Pointer to a populated header structure on success; 367 NULL on failure 368 369 The bcf_hdr_t struct returned by a successful call should be freed 370 via bcf_hdr_destroy() when it is no longer needed. 371 */ 372 bcf_hdr_t* bcf_hdr_read(htsFile* fp); 373 374 /** 375 * bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed 376 * @param samples samples to include or exclude from file or as a comma-separated string. 377 * LIST|FILE .. select samples in list/file 378 * ^LIST|FILE .. exclude samples from list/file 379 * - .. include all samples 380 * NULL .. exclude all samples 381 * @param is_file @p samples is a file (1) or a comma-separated list (0) 382 * 383 * The bottleneck of VCF reading is parsing of genotype fields. If the 384 * reader knows in advance that only subset of samples is needed (possibly 385 * no samples at all), the performance of bcf_read() can be significantly 386 * improved by calling bcf_hdr_set_samples after bcf_hdr_read(). 387 * The function bcf_read() will subset the VCF/BCF records automatically 388 * with the notable exception when reading records via bcf_itr_next(). 389 * In this case, bcf_subset_format() must be called explicitly, because 390 * bcf_readrec() does not see the header. 391 * 392 * Returns 0 on success, -1 on error or a positive integer if the list 393 * contains samples not present in the VCF header. In such a case, the 394 * return value is the index of the offending sample. 395 */ 396 int bcf_hdr_set_samples(bcf_hdr_t* hdr, const(char)* samples, int is_file); 397 398 int bcf_subset_format(const(bcf_hdr_t)* hdr, bcf1_t* rec); 399 400 /// Write a VCF or BCF header 401 /** @param fp Output file 402 @param h The header to write 403 @return 0 on success; -1 on failure 404 */ 405 int bcf_hdr_write(htsFile* fp, bcf_hdr_t* h); 406 407 /** 408 * Parse VCF line contained in kstring and populate the bcf1_t struct 409 * The line must not end with \n or \r characters. 410 */ 411 int vcf_parse(kstring_t* s, const(bcf_hdr_t)* h, bcf1_t* v); 412 413 /** 414 * Complete the file opening mode, according to its extension. 415 * @param mode Preallocated mode string to be completed. 416 * @param fn File name to be opened. 417 * @param format Format string (vcf|bcf|vcf.gz) 418 * @return 0 on success; -1 on failure 419 */ 420 int vcf_open_mode(char* mode, const(char)* fn, const(char)* format); 421 422 /** The opposite of vcf_parse. It should rarely be called directly, see vcf_write */ 423 int vcf_format(const(bcf_hdr_t)* h, const(bcf1_t)* v, kstring_t* s); 424 425 /// Read next VCF or BCF record 426 /** @param fp The file to read the record from 427 @param h The header for the vcf/bcf file 428 @param v The bcf1_t structure to populate 429 @return 0 on success; -1 on end of file; < -1 on critical error 430 431 On errors which are not critical for reading, such as missing header 432 definitions in vcf files, zero will be returned but v->errcode will have been 433 set to one of BCF_ERR* codes and must be checked before calling bcf_write(). 434 */ 435 int bcf_read(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v); 436 437 /** 438 * bcf_unpack() - unpack/decode a BCF record (fills the bcf1_t::d field) 439 * 440 * Note that bcf_unpack() must be called even when reading VCF. It is safe 441 * to call the function repeatedly, it will not unpack the same field 442 * twice. 443 */ 444 enum BCF_UN_STR = 1; // up to ALT inclusive 445 enum BCF_UN_FLT = 2; // up to FILTER 446 enum BCF_UN_INFO = 4; // up to INFO 447 enum BCF_UN_SHR = BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO; // all shared information 448 enum BCF_UN_FMT = 8; // unpack format and each sample 449 enum BCF_UN_IND = BCF_UN_FMT; // a synonym of BCF_UN_FMT 450 enum BCF_UN_ALL = BCF_UN_SHR | BCF_UN_FMT; // everything 451 int bcf_unpack(bcf1_t* b, int which); 452 453 /* 454 * bcf_dup() - create a copy of BCF record. 455 * 456 * Note that bcf_unpack() must be called on the returned copy as if it was 457 * obtained from bcf_read(). Also note that bcf_dup() calls bcf_sync1(src) 458 * internally to reflect any changes made by bcf_update_* functions. 459 * 460 * The bcf1_t struct returned by a successful call should be freed 461 * via bcf_destroy() when it is no longer needed. 462 */ 463 bcf1_t* bcf_dup(bcf1_t* src); 464 465 bcf1_t* bcf_copy(bcf1_t* dst, bcf1_t* src); 466 467 /// Write one VCF or BCF record. The type is determined at the open() call. 468 /** @param fp The file to write to 469 @param h The header for the vcf/bcf file 470 @param v The bcf1_t structure to write 471 @return 0 on success; -1 on error 472 */ 473 int bcf_write(htsFile* fp, bcf_hdr_t* h, bcf1_t* v); 474 475 /** 476 * The following functions work only with VCFs and should rarely be called 477 * directly. Usually one wants to use their bcf_* alternatives, which work 478 * transparently with both VCFs and BCFs. 479 */ 480 /// Read a VCF format header 481 /** @param fp The file to read the header from 482 @return Pointer to a populated header structure on success; 483 NULL on failure 484 485 Use bcf_hdr_read() instead. 486 487 The bcf_hdr_t struct returned by a successful call should be freed 488 via bcf_hdr_destroy() when it is no longer needed. 489 */ 490 bcf_hdr_t* vcf_hdr_read(htsFile* fp); 491 492 /// Write a VCF format header 493 /** @param fp Output file 494 @param h The header to write 495 @return 0 on success; -1 on failure 496 497 Use bcf_hdr_write() instead 498 */ 499 int vcf_hdr_write(htsFile* fp, const(bcf_hdr_t)* h); 500 501 /// Read a record from a VCF file 502 /** @param fp The file to read the record from 503 @param h The header for the vcf file 504 @param v The bcf1_t structure to populate 505 @return 0 on success; -1 on end of file; < -1 on error 506 507 Use bcf_read() instead 508 */ 509 int vcf_read(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v); 510 511 /// Write a record to a VCF file 512 /** @param fp The file to write to 513 @param h The header for the vcf file 514 @param v The bcf1_t structure to write 515 @return 0 on success; -1 on error 516 517 Use bcf_write() instead 518 */ 519 int vcf_write(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v); 520 521 /** Helper function for the bcf_itr_next() macro; internal use, ignore it */ 522 int bcf_readrec( 523 BGZF* fp, 524 void* null_, 525 void* v, 526 int* tid, 527 hts_pos_t* beg, 528 hts_pos_t* end); 529 530 /// Write a line to a VCF file 531 /** @param line Line to write 532 @param fp File to write it to 533 @return 0 on success; -1 on failure 534 535 @note No checks are done on the line being added, apart from 536 ensuring that it ends with a newline. This function 537 should therefore be used with care. 538 */ 539 int vcf_write_line(htsFile* fp, kstring_t* line); 540 541 /************************************************************************** 542 * Header querying and manipulation routines 543 **************************************************************************/ 544 545 /** Create a new header using the supplied template 546 * 547 * The bcf_hdr_t struct returned by a successful call should be freed 548 * via bcf_hdr_destroy() when it is no longer needed. 549 * @return NULL on failure, header otherwise 550 */ 551 bcf_hdr_t* bcf_hdr_dup(const(bcf_hdr_t)* hdr); 552 553 /** 554 * Copy header lines from src to dst if not already present in dst. See also bcf_translate(). 555 * Returns 0 on success or sets a bit on error: 556 * 1 .. conflicting definitions of tag length 557 * // todo 558 */ 559 deprecated("Please use bcf_hdr_merge instead") 560 int bcf_hdr_combine(bcf_hdr_t* dst, const(bcf_hdr_t)* src); 561 562 /** 563 * bcf_hdr_merge() - copy header lines from src to dst, see also bcf_translate() 564 * @param dst: the destination header to be merged into, NULL on the first pass 565 * @param src: the source header 566 * @return NULL on failure, header otherwise 567 * 568 * Notes: 569 * - use as: 570 * bcf_hdr_t *dst = NULL; 571 * for (i=0; i<nsrc; i++) dst = bcf_hdr_merge(dst,src[i]); 572 * 573 * - bcf_hdr_merge() replaces bcf_hdr_combine() which had a problem when 574 * combining multiple BCF headers. The current bcf_hdr_combine() 575 * does not have this problem, but became slow when used for many files. 576 */ 577 bcf_hdr_t* bcf_hdr_merge(bcf_hdr_t* dst, const(bcf_hdr_t)* src); 578 579 /** 580 * bcf_hdr_add_sample() - add a new sample. 581 * @param sample: sample name to be added 582 * 583 * Note: 584 * After all samples have been added, the internal header structure must be updated 585 * by calling bcf_hdr_sync(). This is normally done automatically by the first bcf_hdr_write() 586 * or bcf_write() call. Otherwise, the caller must force the update by calling bcf_hdr_sync() 587 * explicitly. 588 */ 589 int bcf_hdr_add_sample(bcf_hdr_t* hdr, const(char)* sample); 590 591 /** Read VCF header from a file and update the header */ 592 int bcf_hdr_set(bcf_hdr_t* hdr, const(char)* fname); 593 594 /// Appends formatted header text to _str_. 595 /** If _is_bcf_ is zero, `IDX` fields are discarded. 596 * @return 0 if successful, or negative if an error occurred 597 * @since 1.4 598 */ 599 int bcf_hdr_format(const(bcf_hdr_t)* hdr, int is_bcf, kstring_t* str); 600 601 /** Returns formatted header (newly allocated string) and its length, 602 * excluding the terminating \0. If is_bcf parameter is unset, IDX 603 * fields are discarded. 604 * @deprecated Use bcf_hdr_format() instead as it can handle huge headers. 605 */ 606 deprecated("use bcf_hdr_format() instead") 607 char* bcf_hdr_fmt_text(const(bcf_hdr_t)* hdr, int is_bcf, int* len); 608 609 /** Append new VCF header line, returns 0 on success */ 610 int bcf_hdr_append(bcf_hdr_t* h, const(char)* line); 611 612 int bcf_hdr_printf(bcf_hdr_t* h, const(char)* format, ...); 613 614 /** VCF version, e.g. VCFv4.2 */ 615 const(char)* bcf_hdr_get_version(const(bcf_hdr_t)* hdr); 616 617 /// Set version in bcf header 618 /** 619 @param hdr BCF header struct 620 @param version Version to set, e.g. "VCFv4.3" 621 @return 0 on success; < 0 on error 622 */ 623 int bcf_hdr_set_version(bcf_hdr_t* hdr, const(char)* version_); 624 625 /** 626 * bcf_hdr_remove() - remove VCF header tag 627 * @param type: one of BCF_HL_* 628 * @param key: tag name or NULL to remove all tags of the given type 629 */ 630 void bcf_hdr_remove(bcf_hdr_t* h, int type, const(char)* key); 631 632 /** 633 * bcf_hdr_subset() - creates a new copy of the header removing unwanted samples 634 * @param n: number of samples to keep 635 * @param samples: names of the samples to keep 636 * @param imap: mapping from index in @samples to the sample index in the original file 637 * @return NULL on failure, header otherwise 638 * 639 * Sample names not present in h0 are ignored. The number of unmatched samples can be checked 640 * by comparing n and bcf_hdr_nsamples(out_hdr). 641 * This function can be used to reorder samples. 642 * See also bcf_subset() which subsets individual records. 643 * The bcf_hdr_t struct returned by a successful call should be freed 644 * via bcf_hdr_destroy() when it is no longer needed. 645 */ 646 /// NOTE: char *const* samples really exmplifies what I hate about C pointers 647 /// My interpretation of this is it is equivalent to char **samples, but that the outer pointer is const 648 /// which in D would be const(char *)*samples. I don't know what it implies about constancy of *samples or samples. 649 bcf_hdr_t* bcf_hdr_subset( 650 const(bcf_hdr_t)* h0, 651 int n, 652 const(char*)* samples, 653 int* imap); 654 655 /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */ 656 const(char*)* bcf_hdr_seqnames(const(bcf_hdr_t)* h, int* nseqs); 657 658 /** Get number of samples */ 659 pragma(inline, true) auto bcf_hdr_nsamples (bcf_hdr_t *hdr) 660 { 661 return hdr.n[BCF_DT_SAMPLE]; 662 } 663 664 /** The following functions are for internal use and should rarely be called directly */ 665 int bcf_hdr_parse(bcf_hdr_t* hdr, char* htxt); 666 667 /// Synchronize internal header structures 668 /** @param h Header 669 @return 0 on success, -1 on failure 670 671 This function updates the id, sample and contig arrays in the 672 bcf_hdr_t structure so that they point to the same locations as 673 the id, sample and contig dictionaries. 674 */ 675 int bcf_hdr_sync(bcf_hdr_t* h); 676 677 /** 678 * bcf_hdr_parse_line() - parse a single line of VCF textual header 679 * @param h BCF header struct 680 * @param line One or more lines of header text 681 * @param len Filled out with length data parsed from 'line'. 682 * @return bcf_hrec_t* on success; 683 * NULL on error or on end of header text. 684 * NB: to distinguish error from end-of-header, check *len: 685 * *len == 0 indicates @p line did not start with "##" 686 * *len == -1 indicates failure, likely due to out of memory 687 * *len > 0 indicates a malformed header line 688 * 689 * If *len > 0 on exit, it will contain the full length of the line 690 * including any trailing newline (this includes cases where NULL was 691 * returned due to a malformed line). Callers can use this to skip to 692 * the next header line. 693 */ 694 bcf_hrec_t* bcf_hdr_parse_line( 695 const(bcf_hdr_t)* h, 696 const(char)* line, 697 int* len); 698 /// Convert a bcf header record to string form 699 /** 700 * @param hrec Header record 701 * @param str Destination kstring 702 * @return 0 on success; < 0 on error 703 */ 704 int bcf_hrec_format(const(bcf_hrec_t)* hrec, kstring_t* str); 705 706 int bcf_hdr_add_hrec(bcf_hdr_t* hdr, bcf_hrec_t* hrec); 707 708 /** 709 * bcf_hdr_get_hrec() - get header line info 710 * @param type: one of the BCF_HL_* types: FLT,INFO,FMT,CTG,STR,GEN 711 * @param key: the header key for generic lines (e.g. "fileformat"), any field 712 * for structured lines, typically "ID". 713 * @param value: the value which pairs with key. Can be be NULL for BCF_HL_GEN 714 * @param str_class: the class of BCF_HL_STR line (e.g. "ALT" or "SAMPLE"), otherwise NULL 715 */ 716 bcf_hrec_t* bcf_hdr_get_hrec( 717 const(bcf_hdr_t)* hdr, 718 int type, 719 const(char)* key, 720 const(char)* value, 721 const(char)* str_class); 722 723 /// Duplicate a header record 724 /** @param hrec Header record to copy 725 @return A new header record on success; NULL on failure 726 727 The bcf_hrec_t struct returned by a successful call should be freed 728 via bcf_hrec_destroy() when it is no longer needed. 729 */ 730 bcf_hrec_t* bcf_hrec_dup(bcf_hrec_t* hrec); 731 732 /// Add a new header record key 733 /** @param hrec Header record 734 @param str Key name 735 @param len Length of @p str 736 @return 0 on success; -1 on failure 737 */ 738 int bcf_hrec_add_key(bcf_hrec_t* hrec, const(char)* str, size_t len); 739 740 /// Set a header record value 741 /** @param hrec Header record 742 @param i Index of value 743 @param str Value to set 744 @param len Length of @p str 745 @param is_quoted Value should be quoted 746 @return 0 on success; -1 on failure 747 */ 748 int bcf_hrec_set_val( 749 bcf_hrec_t* hrec, 750 int i, 751 const(char)* str, 752 size_t len, 753 int is_quoted); 754 755 /// Lookup header record by key 756 int bcf_hrec_find_key(bcf_hrec_t* hrec, const(char)* key); 757 758 /// Add an IDX header record 759 /** @param hrec Header record 760 @param idx IDX value to add 761 @return 0 on success; -1 on failure 762 */ 763 int hrec_add_idx(bcf_hrec_t* hrec, int idx); 764 765 /// Free up a header record and associated structures 766 /** @param hrec Header record 767 */ 768 void bcf_hrec_destroy(bcf_hrec_t* hrec); 769 770 /************************************************************************** 771 * Individual record querying and manipulation routines 772 **************************************************************************/ 773 774 /** See the description of bcf_hdr_subset() */ 775 int bcf_subset(const(bcf_hdr_t)* h, bcf1_t* v, int n, int* imap); 776 777 /** 778 * bcf_translate() - translate tags ids to be consistent with different header. This function 779 * is useful when lines from multiple VCF need to be combined. 780 * @dst_hdr: the destination header, to be used in bcf_write(), see also bcf_hdr_combine() 781 * @src_hdr: the source header, used in bcf_read() 782 * @src_line: line obtained by bcf_read() 783 */ 784 int bcf_translate( 785 const(bcf_hdr_t)* dst_hdr, 786 bcf_hdr_t* src_hdr, 787 bcf1_t* src_line); 788 789 790 /** 791 * @param rec BCF/VCF record 792 * @return Types of variant present 793 * 794 * The return value will be a bitwise-or of VCF_SNP, VCF_MNP, 795 * VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP. If will return 796 * VCF_REF (i.e. 0) if none of the other types is present. 797 * @deprecated Please use bcf_has_variant_types() instead 798 */ 799 int bcf_get_variant_types(bcf1_t* rec); 800 801 802 /* 803 * @param rec BCF/VCF record 804 * @param ith_allele Allele to check 805 * @return Type of variant present 806 * 807 * The return value will be one of VCF_REF, VCF_SNP, VCF_MNP, 808 * VCF_INDEL, VCF_OTHER, VCF_BND or VCF_OVERLAP. 809 * @deprecated Please use bcf_has_variant_type() instead 810 */ 811 int bcf_get_variant_type(bcf1_t* rec, int ith_allele); 812 813 814 enum bcf_variant_match 815 { 816 bcf_match_exact = 0, 817 bcf_match_overlap = 1, 818 bcf_match_subset = 2 819 } 820 821 822 /* 823 * @param rec BCF/VCF record 824 * @param bitmask Set of variant types to test for 825 * @param mode Match mode 826 * @return >0 if the variant types are present, 827 * 0 if not present, 828 * -1 on error 829 * 830 * @p bitmask should be the bitwise-or of the variant types (VCF_SNP, 831 * VCF_MNP, etc.) to test for. 832 * 833 * The return value is the bitwise-and of the set of types present 834 * and @p bitmask. Callers that want to check for the presence of more 835 * than one type can avoid function call overhead by passing all the 836 * types to be checked for in a single call to this function, in 837 * bcf_match_overlap mode, and then check for them individually in the 838 * returned value. 839 * 840 * As VCF_REF is represented by 0 (i.e. the absence of other variants) 841 * it should be tested for using 842 * bcf_has_variant_types(rec, VCF_REF, bcf_match_exact) 843 * which will return 1 if no other variant type is present, otherwise 0. 844 */ 845 int bcf_has_variant_types(bcf1_t* rec, uint bitmask, bcf_variant_match mode); 846 847 848 /* 849 * @param rec BCF/VCF record 850 * @param ith_allele Allele to check 851 * @param bitmask Set of variant types to test for 852 * @return >0 if one of the variant types is present, 853 * 0 if not present, 854 * -1 on error 855 * 856 * @p bitmask should be the bitwise-or of the variant types (VCF_SNP, 857 * VCF_MNP, etc.) to test for, or VCF_REF on its own. 858 * 859 * The return value is the bitwise-and of the set of types present 860 * and @p bitmask. Callers that want to check for the presence of more 861 * than one type can avoid function call overhead by passing all the 862 * types to be checked for in a single call to this function, and then 863 * check for them individually in the returned value. 864 * 865 * As a special case, if @p bitmask is VCF_REF (i.e. 0), the function 866 * tests for an exact match. The return value will be 1 if the 867 * variant type calculated for the allele is VCF_REF, otherwise if 868 * any other type is present it will be 0. 869 */ 870 int bcf_has_variant_type(bcf1_t* rec, int ith_allele, uint bitmask); 871 872 873 /* 874 * @param rec BCF/VCF record 875 * @param ith_allele Allele index 876 * @return The number of bases affected (negative for deletions), 877 * or bcf_int32_missing on error. 878 */ 879 int bcf_variant_length(bcf1_t* rec, int ith_allele); 880 881 int bcf_is_snp(bcf1_t* v); 882 883 /** 884 * bcf_update_filter() - sets the FILTER column 885 * @flt_ids: The filter IDs to set, numeric IDs returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") 886 * @n: Number of filters. If n==0, all filters are removed 887 */ 888 int bcf_update_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, int* flt_ids, int n); 889 /** 890 * bcf_add_filter() - adds to the FILTER column 891 * @flt_id: filter ID to add, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") 892 * 893 * If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed. 894 */ 895 int bcf_add_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, int flt_id); 896 /** 897 * bcf_remove_filter() - removes from the FILTER column 898 * @flt_id: filter ID to remove, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") 899 * @pass: when set to 1 and no filters are present, set to PASS 900 */ 901 int bcf_remove_filter( 902 const(bcf_hdr_t)* hdr, 903 bcf1_t* line, 904 int flt_id, 905 int pass); 906 /** 907 * Returns 1 if present, 0 if absent, or -1 if filter does not exist. "PASS" and "." can be used interchangeably. 908 */ 909 int bcf_has_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, char* filter); 910 /** 911 * bcf_update_alleles() and bcf_update_alleles_str() - update REF and ALT column 912 * @alleles: Array of alleles 913 * @nals: Number of alleles 914 * @alleles_string: Comma-separated alleles, starting with the REF allele 915 */ 916 int bcf_update_alleles( 917 const(bcf_hdr_t)* hdr, 918 bcf1_t* line, 919 const(char*)* alleles, 920 int nals); 921 922 int bcf_update_alleles_str( 923 const(bcf_hdr_t)* hdr, 924 bcf1_t* line, 925 const(char)* alleles_string); 926 927 /** 928 * bcf_update_id() - sets new ID string 929 * bcf_add_id() - adds to the ID string checking for duplicates 930 */ 931 int bcf_update_id(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* id); 932 933 int bcf_add_id(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* id); 934 935 /** 936 * bcf_update_info_*() - functions for updating INFO fields 937 * @param hdr: the BCF header 938 * @param line: VCF line to be edited 939 * @param key: the INFO tag to be updated 940 * @param values: pointer to the array of values. Pass NULL to remove the tag. 941 * @param n: number of values in the array. When set to 0, the INFO tag is removed 942 * @return 0 on success or negative value on error. 943 * 944 * The @p string in bcf_update_info_flag() is optional, 945 * @p n indicates whether the flag is set or removed. 946 * 947 * Note that updating an END info tag will cause line->rlen to be 948 * updated as a side-effect (removing the tag will set it to the 949 * string length of the REF allele). If line->pos is being changed as 950 * well, it is important that this is done before calling 951 * bcf_update_info_int32() to update the END tag, otherwise rlen will be 952 * set incorrectly. If the new END value is less than or equal to 953 * line->pos, a warning will be printed and line->rlen will be set to 954 * the length of the REF allele. 955 */ 956 pragma(inline, true) { // TODO: rewrite as template 957 auto bcf_update_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 958 { return bcf_update_info(hdr, line, key, values, n, BCF_HT_INT); } 959 auto bcf_update_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 960 { return bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL); } 961 auto bcf_update_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 962 { return bcf_update_info(hdr, line, key, values, n, BCF_HT_FLAG); } 963 auto bcf_update_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values) // @suppress(dscanner.style.undocumented_declaration) 964 { return bcf_update_info(hdr, line, key, values, 1, BCF_HT_STR); } 965 } 966 967 int bcf_update_info ( 968 const(bcf_hdr_t)* hdr, 969 bcf1_t* line, 970 const(char)* key, 971 const(void)* values, 972 int n, 973 int type); 974 975 /// Set or update 64-bit integer INFO values 976 /** 977 * @param hdr: the BCF header 978 * @param line: VCF line to be edited 979 * @param key: the INFO tag to be updated 980 * @param values: pointer to the array of values. Pass NULL to remove the tag. 981 * @param n: number of values in the array. When set to 0, the INFO tag is removed 982 * @return 0 on success or negative value on error. 983 * 984 * This function takes an int64_t values array as input. The data 985 * actually stored will be shrunk to the minimum size that can 986 * accept all of the values. 987 * 988 * INFO values outside of the range BCF_MIN_BT_INT32 to BCF_MAX_BT_INT32 989 * can only be written to VCF files. 990 */ 991 pragma(inline, true) 992 auto bcf_update_info_int64( const(bcf_hdr_t) *hdr, bcf1_t *line, 993 const(char) *key, 994 const(long) *values, int n) 995 { 996 return bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG); 997 } 998 999 /** 1000 * bcf_update_format_*() - functions for updating FORMAT fields 1001 * @values: pointer to the array of values, the same number of elements 1002 * is expected for each sample. Missing values must be padded 1003 * with bcf_*_missing or bcf_*_vector_end values. 1004 * @n: number of values in the array. If n==0, existing tag is removed. 1005 * 1006 * The function bcf_update_format_string() is a higher-level (slower) variant of 1007 * bcf_update_format_char(). The former accepts array of \0-terminated strings 1008 * whereas the latter requires that the strings are collapsed into a single array 1009 * of fixed-length strings. In case of strings with variable length, shorter strings 1010 * can be \0-padded. Note that the collapsed strings passed to bcf_update_format_char() 1011 * are not \0-terminated. 1012 * 1013 * Returns 0 on success or negative value on error. 1014 */ 1015 1016 }/// closing @nogc nothrow 1017 1018 pragma(inline, true) { 1019 auto bcf_update_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(int) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 1020 { return bcf_update_format(hdr, line, key, values, n, BCF_HT_INT); } 1021 auto bcf_update_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(float) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 1022 { return bcf_update_format(hdr, line, key, values, n, BCF_HT_REAL); } 1023 auto bcf_update_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(char) **values, int n) // @suppress(dscanner.style.undocumented_declaration) 1024 { return bcf_update_format(hdr, line, key, values, n, BCF_HT_STR); } 1025 auto bcf_update_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) **gts, int n) // @suppress(dscanner.style.undocumented_declaration) 1026 { return bcf_update_format(hdr, line, toStringz("GT"c), gts, n, BCF_HT_INT); 1027 } 1028 } 1029 1030 @nogc nothrow { 1031 1032 int bcf_update_format_string ( 1033 const(bcf_hdr_t)* hdr, 1034 bcf1_t* line, 1035 const(char)* key, 1036 const(char*)* values, 1037 int n); 1038 1039 int bcf_update_format( 1040 const(bcf_hdr_t)* hdr, 1041 bcf1_t* line, 1042 const(char)* key, 1043 const(void)* values, 1044 int n, 1045 int type); 1046 1047 /// Macros for setting genotypes correctly, for use with bcf_update_genotypes only; idx corresponds 1048 /// to VCF's GT (1-based index to ALT or 0 for the reference allele) and val is the opposite, obtained 1049 /// from bcf_get_genotypes() below. 1050 // TODO: is int appropriate? 1051 pragma(inline, true) { 1052 auto bcf_gt_phased(int idx) { return (((idx)+1)<<1|1); } 1053 /// ditto 1054 auto bcf_gt_unphased(int idx) { return (((idx)+1)<<1); } 1055 /// ditto 1056 auto bcf_gt_is_missing(int val) { return ((val)>>1 ? 0 : 1);} 1057 /// ditto 1058 auto bcf_gt_is_phased(int idx) { return ((idx)&1); } 1059 /// ditto 1060 auto bcf_gt_allele(int val) { return (((val)>>1)-1); } 1061 } 1062 /// ditto 1063 enum int bcf_gt_missing = 0; 1064 1065 /** Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based) */ 1066 pragma(inline, true) { 1067 auto bcf_alleles2gt(int a, int b) { return ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a))); } 1068 /// ditto 1069 void bcf_gt2alleles(int igt, int *a, int *b) 1070 { 1071 int k = 0, dk = 1; // @suppress(dscanner.useless-initializer) 1072 while ( k<igt ) { dk++; k += dk; } 1073 *b = dk - 1; *a = igt - k + *b; 1074 } 1075 } 1076 1077 /** 1078 * bcf_get_fmt() - returns pointer to FORMAT's field data 1079 * @header: for access to BCF_DT_ID dictionary 1080 * @line: VCF line obtained from vcf_parse1 1081 * @fmt: one of GT,PL,... 1082 * 1083 * Returns bcf_fmt_t* if the call succeeded, or returns NULL when the field 1084 * is not available. 1085 */ 1086 bcf_fmt_t* bcf_get_fmt(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* key); 1087 1088 bcf_info_t* bcf_get_info(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* key); 1089 1090 /** 1091 * bcf_get_*_id() - returns pointer to FORMAT/INFO field data given the header index instead of the string ID 1092 * @line: VCF line obtained from vcf_parse1 1093 * @id: The header index for the tag, obtained from bcf_hdr_id2int() 1094 * 1095 * Returns bcf_fmt_t* / bcf_info_t*. These functions do not check if the index is valid 1096 * as their goal is to avoid the header lookup. 1097 */ 1098 bcf_fmt_t* bcf_get_fmt_id(bcf1_t* line, const int id); 1099 1100 bcf_info_t* bcf_get_info_id(bcf1_t* line, const int id); 1101 1102 /** 1103 * bcf_get_info_*() - get INFO values, integers or floats 1104 * @param hdr: BCF header 1105 * @param line: BCF record 1106 * @param tag: INFO tag to retrieve 1107 * @param dst: *dst is pointer to a memory location, can point to NULL 1108 * @param ndst: pointer to the size of allocated memory 1109 * @return >=0 on success 1110 * -1 .. no such INFO tag defined in the header 1111 * -2 .. clash between types defined in the header and encountered in the VCF record 1112 * -3 .. tag is not present in the VCF record 1113 * -4 .. the operation could not be completed (e.g. out of memory) 1114 * 1115 * Returns negative value on error or the number of values (including 1116 * missing values) put in *dst on success. bcf_get_info_string() returns 1117 * on success the number of characters stored excluding the nul- 1118 * terminating byte. bcf_get_info_flag() does not store anything in *dst 1119 * but returns 1 if the flag is set or 0 if not. 1120 * 1121 * *dst will be reallocated if it is not big enough (i.e. *ndst is too 1122 * small) or NULL on entry. The new size will be stored in *ndst. 1123 */ 1124 pragma(inline, true) { 1125 auto bcf_get_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 1126 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); } 1127 auto bcf_get_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 1128 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); } 1129 auto bcf_get_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 1130 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); } 1131 auto bcf_get_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 1132 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_FLAG); } 1133 } 1134 1135 int bcf_get_info_values ( 1136 const(bcf_hdr_t)* hdr, 1137 bcf1_t* line, 1138 const(char)* tag, 1139 void** dst, 1140 int* ndst, 1141 int type); 1142 1143 /// Put integer INFO values into an int64_t array 1144 /** 1145 * @param hdr: BCF header 1146 * @param line: BCF record 1147 * @param tag: INFO tag to retrieve 1148 * @param dst: *dst is pointer to a memory location, can point to NULL 1149 * @param ndst: pointer to the size of allocated memory 1150 * @return >=0 on success 1151 * -1 .. no such INFO tag defined in the header 1152 * -2 .. clash between types defined in the header and encountered in the VCF record 1153 * -3 .. tag is not present in the VCF record 1154 * -4 .. the operation could not be completed (e.g. out of memory) 1155 * 1156 * Returns negative value on error or the number of values (including 1157 * missing values) put in *dst on success. 1158 * 1159 * *dst will be reallocated if it is not big enough (i.e. *ndst is too 1160 * small) or NULL on entry. The new size will be stored in *ndst. 1161 */ 1162 pragma(inline, true) 1163 auto bcf_get_info_int64(const(bcf_hdr_t) *hdr, bcf1_t *line, 1164 const(char) *tag, long **dst, 1165 int *ndst) 1166 { 1167 return bcf_get_info_values(hdr, line, tag, 1168 cast(void **) dst, ndst, BCF_HT_LONG); 1169 } 1170 1171 /** 1172 * bcf_get_format_*() - same as bcf_get_info*() above 1173 * 1174 * The function bcf_get_format_string() is a higher-level (slower) variant of bcf_get_format_char(). 1175 * see the description of bcf_update_format_string() and bcf_update_format_char() above. 1176 * Unlike other bcf_get_format__*() functions, bcf_get_format_string() allocates two arrays: 1177 * a single block of \0-terminated strings collapsed into a single array and an array of pointers 1178 * to these strings. Both arrays must be cleaned by the user. 1179 * 1180 * Returns negative value on error or the number of written values on success. 1181 * 1182 * Use the returned number of written values for accessing valid entries of dst, as ndst is only a 1183 * watermark that can be higher than the returned value, i.e. the end of dst can contain carry-over 1184 * values from previous calls to bcf_get_format_*() on lines with more values per sample. 1185 * 1186 * Example: 1187 * int ndst = 0; char **dst = NULL; 1188 * if ( bcf_get_format_string(hdr, line, "XX", &dst, &ndst) > 0 ) 1189 * for (i=0; i<bcf_hdr_nsamples(hdr); i++) printf("%s\n", dst[i]); 1190 * free(dst[0]); free(dst); 1191 * 1192 * Example: 1193 * int i, j, ngt, nsmpl = bcf_hdr_nsamples(hdr); 1194 * int32_t *gt_arr = NULL, ngt_arr = 0; 1195 * 1196 * ngt = bcf_get_genotypes(hdr, line, >_arr, &ngt_arr); 1197 * if ( ngt<=0 ) return; // GT not present 1198 * 1199 * int max_ploidy = ngt/nsmpl; 1200 * for (i=0; i<nsmpl; i++) 1201 * { 1202 * int32_t *ptr = gt_arr + i*max_ploidy; 1203 * for (j=0; j<max_ploidy; j++) 1204 * { 1205 * // if true, the sample has smaller ploidy 1206 * if ( ptr[j]==bcf_int32_vector_end ) break; 1207 * 1208 * // missing allele 1209 * if ( bcf_gt_is_missing(ptr[j]) ) continue; 1210 * 1211 * // the VCF 0-based allele index 1212 * int allele_index = bcf_gt_allele(ptr[j]); 1213 * 1214 * // is phased? 1215 * int is_phased = bcf_gt_is_phased(ptr[j]); 1216 * 1217 * // .. do something .. 1218 * } 1219 * } 1220 * free(gt_arr); 1221 * 1222 */ 1223 1224 }/// closing @nogc nothrow from line 928 1225 1226 pragma(inline, true) { 1227 auto bcf_get_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1228 { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); } 1229 auto bcf_get_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1230 { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); } 1231 auto bcf_get_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1232 { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); } 1233 auto bcf_get_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1234 { return bcf_get_format_values(hdr, line, toStringz("GT"c), cast(void**) dst, ndst, BCF_HT_INT); } 1235 } 1236 1237 @nogc nothrow { 1238 1239 int bcf_get_format_string( 1240 const(bcf_hdr_t)* hdr, 1241 bcf1_t* line, 1242 const(char)* tag, 1243 char*** dst, 1244 int* ndst); 1245 1246 int bcf_get_format_values( 1247 const(bcf_hdr_t)* hdr, 1248 bcf1_t* line, 1249 const(char)* tag, 1250 void** dst, 1251 int* ndst, 1252 int type); 1253 1254 /************************************************************************** 1255 * Helper functions 1256 **************************************************************************/ 1257 1258 /** 1259 * bcf_hdr_id2int() - Translates string into numeric ID 1260 * bcf_hdr_int2id() - Translates numeric ID into string 1261 * @type: one of BCF_DT_ID, BCF_DT_CTG, BCF_DT_SAMPLE 1262 * @id: tag name, such as: PL, DP, GT, etc. 1263 * 1264 * Returns -1 if string is not in dictionary, otherwise numeric ID which identifies 1265 * fields in BCF records. 1266 */ 1267 int bcf_hdr_id2int(const(bcf_hdr_t)* hdr, int type, const(char)* id); 1268 1269 pragma(inline, true) 1270 auto bcf_hdr_int2id(const(bcf_hdr_t) *hdr, int type, int int_id) 1271 { return hdr.id[type][int_id].key; } 1272 1273 /** 1274 * bcf_hdr_name2id() - Translates sequence names (chromosomes) into numeric ID 1275 * bcf_hdr_id2name() - Translates numeric ID to sequence name 1276 */ 1277 pragma(inline, true) int bcf_hdr_name2id(const(bcf_hdr_t) *hdr, const(char) *id) { return bcf_hdr_id2int(hdr, BCF_DT_CTG, id); } // @suppress(dscanner.style.long_line) 1278 /// ditto 1279 pragma(inline, true) const(char) *bcf_hdr_id2name(const(bcf_hdr_t) *hdr, int rid) { return hdr.id[BCF_DT_CTG][rid].key; } // @suppress(dscanner.style.long_line) 1280 /// ditto 1281 pragma(inline, true) const(char) *bcf_seqname(const(bcf_hdr_t) *hdr, bcf1_t *rec) { return hdr.id[BCF_DT_CTG][rec.rid].key; } // @suppress(dscanner.style.long_line) 1282 1283 /** Return CONTIG name, or "(unknown)" 1284 1285 Like bcf_seqname(), but this function will never return NULL. If 1286 the contig name cannot be found (either because @p hdr was not 1287 supplied or rec->rid was out of range) it returns the string 1288 "(unknown)". 1289 */ 1290 const(char)* bcf_seqname_safe(const(bcf_hdr_t)* hdr, const(bcf1_t)* rec); 1291 1292 /** 1293 * bcf_hdr_id2*() - Macros for accessing bcf_idinfo_t 1294 * @type: one of BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT 1295 * @int_id: return value of bcf_hdr_id2int, must be >=0 1296 * 1297 * The returned values are: 1298 * bcf_hdr_id2length .. whether the number of values is fixed or variable, one of BCF_VL_* 1299 * bcf_hdr_id2number .. the number of values, 0xfffff for variable length fields 1300 * bcf_hdr_id2type .. the field type, one of BCF_HT_* 1301 * bcf_hdr_id2coltype .. the column type, one of BCF_HL_* 1302 * 1303 * Notes: Prior to using the macros, the presence of the info should be 1304 * tested with bcf_hdr_idinfo_exists(). 1305 */ 1306 // TODO: for dict_type and col_type use ENUMs 1307 pragma(inline, true) { 1308 auto bcf_hdr_id2length (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>8 & 0xf); } // @suppress(dscanner.style.long_line) 1309 /// ditto 1310 auto bcf_hdr_id2number (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>12); } // @suppress(dscanner.style.long_line) 1311 /// ditto 1312 uint bcf_hdr_id2type (const(bcf_hdr_t) *hdr, int type, int int_id) { return cast(uint)((hdr).id[BCF_DT_ID][int_id].val.info[type]>>4 & 0xf); } // @suppress(dscanner.style.long_line) 1313 /// ditto 1314 uint bcf_hdr_id2coltype (const(bcf_hdr_t) *hdr, int type, int int_id){ return cast(uint)((hdr).id[BCF_DT_ID][int_id].val.info[type] & 0xf); } // @suppress(dscanner.style.long_line) 1315 /// ditto 1316 auto bcf_hdr_idinfo_exists (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((int_id<0 || bcf_hdr_id2coltype(hdr,type,int_id)==0xf) ? 0 : 1); } // @suppress(dscanner.style.long_line) 1317 /// ditto 1318 auto bcf_hdr_id2hrc (const(bcf_hdr_t) *hdr, int dict_type, int col_type, int int_id) 1319 { return ((hdr).id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val.hrec[(dict_type)==BCF_DT_CTG?0:(col_type)]); // @suppress(dscanner.style.long_line) 1320 } 1321 } 1322 /// Convert BCF FORMAT data to string form 1323 /** 1324 * @param s kstring to write into 1325 * @param n number of items in @p data 1326 * @param type type of items in @p data 1327 * @param data BCF format data 1328 * @return 0 on success 1329 * -1 if out of memory 1330 */ 1331 int bcf_fmt_array(kstring_t* s, int n, int type, void* data); 1332 1333 ubyte* bcf_fmt_sized_array(kstring_t* s, ubyte* ptr); 1334 1335 /// Encode a variable-length char array in BCF format 1336 /** 1337 * @param s kstring to write into 1338 * @param l length of input 1339 * @param a input data to encode 1340 * @return 0 on success; < 0 on error 1341 */ 1342 int bcf_enc_vchar(kstring_t* s, int l, const(char)* a); 1343 1344 /// Encode a variable-length integer array in BCF format 1345 /** 1346 * @param s kstring to write into 1347 * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) 1348 * @param a input data to encode 1349 * @param wsize vector length (<= 0 is equivalent to @p n) 1350 * @return 0 on success; < 0 on error 1351 * @note @p n should be an exact multiple of @p wsize 1352 */ 1353 int bcf_enc_vint(kstring_t* s, int n, int* a, int wsize); 1354 1355 /// Encode a variable-length float array in BCF format 1356 /** 1357 * @param s kstring to write into 1358 * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) 1359 * @param a input data to encode 1360 * @return 0 on success; < 0 on error 1361 */ 1362 int bcf_enc_vfloat(kstring_t* s, int n, float* a); 1363 1364 /************************************************************************** 1365 * BCF index 1366 * 1367 * Note that these functions work with BCFs only. See synced_bcf_reader.h 1368 * which provides (amongst other things) an API to work transparently with 1369 * both indexed BCFs and VCFs. 1370 **************************************************************************/ 1371 1372 alias bcf_itr_destroy = hts_itr_destroy; 1373 1374 } /// closing @nogc nothrow from line 1136 1375 1376 pragma(inline, true) { 1377 /// Generate an iterator for an integer-based range query 1378 auto bcf_itr_queryi(const(hts_idx_t) *idx, int tid, int beg, int end) 1379 { return hts_itr_query(idx, tid, beg, end, &bcf_readrec); } 1380 1381 /// Generate an iterator for a string-based range query 1382 auto bcf_itr_querys(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, const(char) *s) 1383 { return hts_itr_querys(idx, s, cast(hts_name2id_f) &bcf_hdr_name2id, cast(void *) hdr, 1384 &hts_itr_query, &bcf_readrec); } 1385 1386 /// Iterate through the range 1387 /// r should (probably) point to your VCF (BCF) row structure 1388 /// TODO: attempt to define parameter r as bcf1_t *, which is what I think it should be 1389 int bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) { 1390 if (htsfp.is_bgzf) 1391 return hts_itr_next(htsfp.fp.bgzf, itr, r, null); 1392 1393 hts_log_error(__FUNCTION__,"Only bgzf compressed files can be used with iterators"); 1394 errno = EINVAL; 1395 return -2; 1396 } 1397 1398 @nogc nothrow: 1399 1400 /// Load a BCF index 1401 /** @param fn BCF file name 1402 @return The index, or NULL if an error occurred. 1403 @note This only works for BCF files. Consider synced_bcf_reader instead 1404 which works for both BCF and VCF. 1405 */ 1406 auto bcf_index_load(const(char) *fn) { return hts_idx_load(fn, HTS_FMT_CSI); } 1407 1408 /// Get a list (char **) of sequence names from the index -- free only the array, not the values 1409 auto bcf_index_seqnames(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, int *nptr) 1410 { return hts_idx_seqnames(idx, nptr, cast(hts_id2name_f) &bcf_hdr_id2name, cast(void *) hdr); } 1411 } 1412 1413 /// Load a BCF index from a given index file name 1414 /** @param fn Input BAM/BCF/etc filename 1415 @param fnidx The input index filename 1416 @return The index, or NULL if an error occurred. 1417 @note This only works for BCF files. Consider synced_bcf_reader instead 1418 which works for both BCF and VCF. 1419 */ 1420 hts_idx_t* bcf_index_load2(const(char)* fn, const(char)* fnidx); 1421 1422 /// Load a BCF index from a given index file name 1423 /** @param fn Input BAM/BCF/etc filename 1424 @param fnidx The input index filename 1425 @param flags Flags to alter behaviour (see description) 1426 @return The index, or NULL if an error occurred. 1427 @note This only works for BCF files. Consider synced_bcf_reader instead 1428 which works for both BCF and VCF. 1429 1430 The @p flags parameter can be set to a combination of the following 1431 values: 1432 1433 HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes 1434 HTS_IDX_SILENT_FAIL Fail silently if the index is not present 1435 1436 Equivalent to hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags); 1437 */ 1438 hts_idx_t* bcf_index_load3(const(char)* fn, const(char)* fnidx, int flags); 1439 1440 /** 1441 * bcf_index_build() - Generate and save an index file 1442 * @fn: Input VCF(compressed)/BCF filename 1443 * @min_shift: log2(width of the smallest bin), e.g. a value of 14 1444 * imposes a 16k base lower limit on the width of index bins. 1445 * Positive to generate CSI, or 0 to generate TBI. However, a small 1446 * value of min_shift would create a large index, which would lead to 1447 * reduced performance when using the index. A recommended value is 14. 1448 * For BCF files, only the CSI index can be generated. 1449 * 1450 * Returns 0 if successful, or negative if an error occurred. 1451 * 1452 * List of error codes: 1453 * -1 .. indexing failed 1454 * -2 .. opening @fn failed 1455 * -3 .. format not indexable 1456 * -4 .. failed to create and/or save the index 1457 */ 1458 int bcf_index_build(const(char)* fn, int min_shift); 1459 1460 /** 1461 * bcf_index_build2() - Generate and save an index to a specific file 1462 * @fn: Input VCF/BCF filename 1463 * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn 1464 * @min_shift: Positive to generate CSI, or 0 to generate TBI 1465 * 1466 * Returns 0 if successful, or negative if an error occurred. 1467 * 1468 * List of error codes: 1469 * -1 .. indexing failed 1470 * -2 .. opening @fn failed 1471 * -3 .. format not indexable 1472 * -4 .. failed to create and/or save the index 1473 */ 1474 int bcf_index_build2(const(char)* fn, const(char)* fnidx, int min_shift); 1475 1476 /** 1477 * bcf_index_build3() - Generate and save an index to a specific file 1478 * @fn: Input VCF/BCF filename 1479 * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn 1480 * @min_shift: Positive to generate CSI, or 0 to generate TBI 1481 * @n_threads: Number of VCF/BCF decoder threads 1482 * 1483 * Returns 0 if successful, or negative if an error occurred. 1484 * 1485 * List of error codes: 1486 * -1 .. indexing failed 1487 * -2 .. opening @fn failed 1488 * -3 .. format not indexable 1489 * -4 .. failed to create and/or save the index 1490 */ 1491 int bcf_index_build3( 1492 const(char)* fn, 1493 const(char)* fnidx, 1494 int min_shift, 1495 int n_threads); 1496 1497 /// Initialise fp->idx for the current format type, for VCF and BCF files. 1498 /** @param fp File handle for the data file being written. 1499 @param h BCF header structured (needed for BAI and CSI). 1500 @param min_shift CSI bin size (CSI default is 14). 1501 @param fnidx Filename to write index to. This pointer must remain valid 1502 until after bcf_idx_save is called. 1503 @return 0 on success, <0 on failure. 1504 @note This must be called after the header has been written, but before 1505 any other data. 1506 */ 1507 int bcf_idx_init(htsFile* fp, bcf_hdr_t* h, int min_shift, const(char)* fnidx); 1508 1509 /// Writes the index initialised with bcf_idx_init to disk. 1510 /** @param fp File handle for the data file being written. 1511 @return 0 on success, <0 on failure. 1512 */ 1513 int bcf_idx_save(htsFile* fp); 1514 1515 /******************* 1516 * Typed value I/O * 1517 *******************/ 1518 1519 /** 1520 Note that in contrast with BCFv2.1 specification, HTSlib implementation 1521 allows missing values in vectors. For integer types, the values 0x80, 1522 0x8000, 0x80000000 are interpreted as missing values and 0x81, 0x8001, 1523 0x80000001 as end-of-vector indicators. Similarly for floats, the value of 1524 0x7F800001 is interpreted as a missing value and 0x7F800002 as an 1525 end-of-vector indicator. 1526 Note that the end-of-vector byte is not part of the vector. 1527 1528 This trial BCF version (v2.2) is compatible with the VCF specification and 1529 enables to handle correctly vectors with different ploidy in presence of 1530 missing values. 1531 */ 1532 enum bcf_int8_vector_end = -127; /* INT8_MIN + 1 */ 1533 enum bcf_int16_vector_end = -32_767; /* INT16_MIN + 1 */ 1534 enum bcf_int32_vector_end = -2_147_483_647; /* INT32_MIN + 1 */ 1535 enum bcf_int64_vector_end = -9_223_372_036_854_775_807L; /* INT64_MIN + 1 */ 1536 enum bcf_str_vector_end = 0; 1537 enum bcf_int8_missing = -128; /* INT8_MIN */ 1538 enum bcf_int16_missing = -32_767 - 1; /* INT16_MIN */ 1539 enum bcf_int32_missing = -2_147_483_647 - 1; /* INT32_MIN */ 1540 enum bcf_int64_missing = -9_223_372_036_854_775_807L - 1L; /* INT64_MIN */ 1541 enum bcf_str_missing = 0x07; 1542 1543 // Limits on BCF values stored in given types. Max values are the same 1544 // as for the underlying type. Min values are slightly different as 1545 // the last 8 values for each type were reserved by BCFv2.2. 1546 enum BCF_MAX_BT_INT8 = 0x7f; /* INT8_MAX */ 1547 enum BCF_MAX_BT_INT16 = 0x7fff; /* INT16_MAX */ 1548 enum BCF_MAX_BT_INT32 = 0x7fffffff; /* INT32_MAX */ 1549 enum BCF_MIN_BT_INT8 = -120; /* INT8_MIN + 8 */ 1550 enum BCF_MIN_BT_INT16 = -32_760; /* INT16_MIN + 8 */ 1551 enum BCF_MIN_BT_INT32 = -2_147_483_640; /* INT32_MIN + 8 */ 1552 1553 extern __gshared uint bcf_float_vector_end; 1554 extern __gshared uint bcf_float_missing; 1555 version(LDC) pragma(inline, true): 1556 version(GNU) pragma(inline, true): 1557 /** u wot */ 1558 void bcf_float_set(float *ptr, uint32_t value) 1559 { 1560 union U { uint32_t i; float f; } 1561 U u; 1562 u.i = value; 1563 *ptr = u.f; 1564 } 1565 1566 /// float vector macros 1567 void bcf_float_set_vector_end(float x) { bcf_float_set(&x, bcf_float_vector_end); } 1568 /// ditto 1569 void bcf_float_set_missing(float x) { bcf_float_set(&x, bcf_float_missing); } 1570 1571 /** u wot */ 1572 pragma(inline, true) 1573 int bcf_float_is_missing(float f) 1574 { 1575 union U { uint32_t i; float f; } 1576 U u; 1577 u.f = f; 1578 return u.i==bcf_float_missing ? 1 : 0; 1579 } 1580 /// ditto 1581 pragma(inline, true) 1582 int bcf_float_is_vector_end(float f) 1583 { 1584 union U { uint32_t i; float f; } 1585 U u; 1586 u.f = f; 1587 return u.i==bcf_float_vector_end ? 1 : 0; 1588 } 1589 1590 /// (Undocumented) Format GT field 1591 pragma(inline, true) 1592 int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) 1593 { 1594 uint32_t e = 0; 1595 void branch(T)() // gets a closure over e (was #define macro) 1596 if (is(T == int8_t) || is(T == int16_t) || is(T == int32_t)) 1597 { 1598 static if (is(T == int8_t)) 1599 auto vector_end = bcf_int8_vector_end; 1600 else static if (is(T == int16_t)) 1601 auto vector_end = bcf_int16_vector_end; 1602 else 1603 auto vector_end = bcf_int32_vector_end; 1604 1605 T *ptr = cast(T*) (fmt.p + (isample * fmt.size)); 1606 for (int i=0; i<fmt.n && ptr[i] != vector_end; i++) 1607 { 1608 if ( i ) e |= kputc("/|"[ptr[i]&1], str) < 0; 1609 if ( !(ptr[i]>>1) ) e |= kputc('.', str) < 0; 1610 else e |= kputw((ptr[i]>>1) - 1, str) < 0; 1611 } 1612 if (i == 0) e |= kputc('.', str) < 0; 1613 } 1614 switch (fmt.type) { 1615 case BCF_BT_INT8: branch!int8_t; break; 1616 case BCF_BT_INT16: branch!int16_t; break; 1617 case BCF_BT_INT32: branch!int32_t; break; 1618 case BCF_BT_NULL: e |= kputc('.', str) < 0; break; 1619 default: hts_log_error("Unexpected type %d", fmt.type); return -2; 1620 } 1621 1622 return e == 0 ? 0 : -1; 1623 } 1624 1625 1626 pragma(inline, true) 1627 int bcf_enc_size(kstring_t *s, int size, int type) 1628 { 1629 uint32_t e = 0; 1630 if (size >= 15) { 1631 e |= kputc(15<<4|type, s) < 0; 1632 if (size >= 128) { 1633 if (size >= 32_768) { 1634 int32_t x = size; 1635 e |= kputc(1<<4|BCF_BT_INT32, s) < 0; 1636 e |= kputsn(cast(char*)&x, 4, s) < 0; 1637 } else { 1638 int16_t x = size; 1639 e |= kputc(1<<4|BCF_BT_INT16, s) < 0; 1640 e |= kputsn(cast(char*)&x, 2, s) < 0; 1641 } 1642 } else { 1643 e |= kputc(1<<4|BCF_BT_INT8, s) < 0; 1644 e |= kputc(size, s) < 0; 1645 } 1646 } else e |= kputc(size<<4|type, s) < 0; 1647 return e == 0 ? 0 : -1; 1648 } 1649 1650 1651 /// Undocumented Encode integer type? 1652 pragma(inline, true) 1653 int bcf_enc_inttype(long x) 1654 { 1655 if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) return BCF_BT_INT8; 1656 if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) return BCF_BT_INT16; 1657 return BCF_BT_INT32; 1658 } 1659 1660 /// Undocumented Encode integer variant 1 1661 pragma(inline, true) 1662 int bcf_enc_int1(kstring_t *s, int32_t x) 1663 { 1664 uint32_t e = 0; 1665 if (x == bcf_int32_vector_end) { 1666 e |= bcf_enc_size(s, 1, BCF_BT_INT8); 1667 e |= kputc(bcf_int8_vector_end, s) < 0; 1668 } else if (x == bcf_int32_missing) { 1669 e |= bcf_enc_size(s, 1, BCF_BT_INT8); 1670 e |= kputc(bcf_int8_missing, s) < 0; 1671 } else if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) { 1672 e |= bcf_enc_size(s, 1, BCF_BT_INT8); 1673 e |= kputc(x, s) < 0; 1674 } else if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) { 1675 int16_t z = x; 1676 e |= bcf_enc_size(s, 1, BCF_BT_INT16); 1677 e |= kputsn(cast(char*)&z, 2, s) < 0; 1678 } else { 1679 int32_t z = x; 1680 e |= bcf_enc_size(s, 1, BCF_BT_INT32); 1681 e |= kputsn(cast(char*)&z, 4, s) < 0; 1682 } 1683 return e == 0 ? 0 : -1; 1684 } 1685 /// Return the value of a single typed integer. 1686 /** @param p Pointer to input data block. 1687 @param type One of the BCF_BT_INT* type codes 1688 @param[out] q Location to store an updated value for p 1689 @return The integer value, or zero if @p type is not valid. 1690 1691 If @p type is not one of BCF_BT_INT8, BCF_BT_INT16, BCF_BT_INT32 or 1692 BCF_BT_INT64, zero will be returned and @p *q will not be updated. 1693 Otherwise, the integer value will be returned and @p *q will be set 1694 to the memory location immediately following the integer value. 1695 1696 Cautious callers can detect invalid type codes by checking that *q has 1697 actually been updated. 1698 */ 1699 pragma(inline, true) 1700 int64_t bcf_dec_int1(const(ubyte) *p, int type, ubyte **q) 1701 { 1702 if (type == BCF_BT_INT8) { 1703 *q = cast(ubyte*)p + 1; 1704 return le_to_i8(p); 1705 } else if (type == BCF_BT_INT16) { 1706 *q = cast(ubyte*)p + 2; 1707 return le_to_i16(p); 1708 } else if (type == BCF_BT_INT32) { 1709 *q = cast(ubyte*)p + 4; 1710 return le_to_i32(p); 1711 } else if (type == BCF_BT_INT64) { 1712 *q = cast(ubyte*)p + 4; 1713 return le_to_i64(p); 1714 } else { // Invalid type. 1715 return 0; 1716 } 1717 } 1718 1719 /// Return the value of a single typed integer from a byte stream. 1720 /** @param p Pointer to input data block. 1721 @param[out] q Location to store an updated value for p 1722 @return The integer value, or zero if the type code was not valid. 1723 1724 Reads a one-byte type code from @p p, and uses it to decode an integer 1725 value from the following bytes in @p p. 1726 1727 If the type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero 1728 will be returned and @p *q will unchanged. Otherwise, the integer value will 1729 be returned and @p *q will be set to the memory location immediately following 1730 the integer value. 1731 1732 Cautious callers can detect invalid type codes by checking that *q has 1733 actually been updated. 1734 */ 1735 pragma(inline, true) 1736 long bcf_dec_typed_int1 (const(ubyte)* p, ubyte** q) 1737 { 1738 return bcf_dec_int1(p + 1, *p&0xf, q); 1739 } 1740 1741 pragma(inline, true) 1742 int bcf_dec_size (const(ubyte)* p, ubyte** q, int* type) 1743 { 1744 *type = *p & 0xf; 1745 if (*p>>4 != 15) { 1746 *q = cast(ubyte*)p + 1; 1747 return *p>>4; 1748 } else return bcf_dec_typed_int1(p + 1, q); 1749 }