1 /// @file htslib/vcf.h 2 /// High-level VCF/BCF variant calling file operations. 3 /* 4 Copyright (C) 2012, 2013 Broad Institute. 5 Copyright (C) 2012-2020 Genome Research Ltd. 6 7 Author: Heng Li <lh3@sanger.ac.uk> 8 9 Permission is hereby granted, free of charge, to any person obtaining a copy 10 of this software and associated documentation files (the "Software"), to deal 11 in the Software without restriction, including without limitation the rights 12 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 copies of the Software, and to permit persons to whom the Software is 14 furnished to do so, subject to the following conditions: 15 16 The above copyright notice and this permission notice shall be included in 17 all copies or substantial portions of the Software. 18 19 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 DEALINGS IN THE SOFTWARE. */ 26 27 /* 28 todo: 29 - make the function names consistent 30 - provide calls to abstract away structs as much as possible 31 */ 32 /// Section numbers refer to VCF Specification v4.2: https://samtools.github.io/hts-specs/VCFv4.2.pdf 33 module htslib.vcf; 34 35 import std.bitmanip; 36 import std.string: toStringz; 37 import core.stdc.errno : errno, EINVAL; 38 import core.stdc.config; 39 40 import htslib.hts; 41 import htslib.hts_log; 42 import htslib.hts_endian; 43 import htslib.kstring : kstring_t; 44 import htslib.bgzf : BGZF; 45 46 @system: 47 extern (C): 48 @nogc nothrow { 49 50 /* Included only for backwards compatibility with e.g. bcftools 1.10 */ 51 52 /***************** 53 * Header struct * 54 *****************/ 55 56 enum BCF_HL_FLT = 0; /// header line: FILTER 57 enum BCF_HL_INFO = 1;/// header line: INFO 58 enum BCF_HL_FMT = 2; /// header line: FORMAT 59 enum BCF_HL_CTG = 3; /// header line: contig 60 enum BCF_HL_STR = 4; /// header line: structured header line TAG=<A=..,B=..> 61 enum BCF_HL_GEN = 5; /// header line: generic header line 62 63 enum BCF_HT_FLAG = 0; /// header type: FLAG// header type 64 enum BCF_HT_INT = 1; /// header type: INTEGER 65 enum BCF_HT_REAL = 2; /// header type: REAL 66 enum BCF_HT_STR = 3; /// header type: STRING 67 enum BCF_HT_LONG = BCF_HT_INT | 0x100; // BCF_HT_INT, but for int64_t values; VCF only! 68 69 enum BCF_VL_FIXED = 0; /// variable length: fixed (?)// variable length 70 enum BCF_VL_VAR = 1; /// variable length: variable 71 enum BCF_VL_A = 2; /// variable length: ? 72 enum BCF_VL_G = 3; /// variable length: ? 73 enum BCF_VL_R = 4; /// variable length: ? 74 75 /* === Dictionary === 76 77 The header keeps three dictionaries. The first keeps IDs in the 78 "FILTER/INFO/FORMAT" lines, the second keeps the sequence names and lengths 79 in the "contig" lines and the last keeps the sample names. bcf_hdr_t::dict[] 80 is the actual hash table, which is opaque to the end users. In the hash 81 table, the key is the ID or sample name as a C string and the value is a 82 bcf_idinfo_t struct. bcf_hdr_t::id[] points to key-value pairs in the hash 83 table in the order that they appear in the VCF header. bcf_hdr_t::n[] is the 84 size of the hash table or, equivalently, the length of the id[] arrays. 85 */ 86 87 enum BCF_DT_ID = 0; /// dictionary type: ID 88 enum BCF_DT_CTG = 1; /// dictionary type: CONTIG 89 enum BCF_DT_SAMPLE = 2;/// dictionary type: SAMPLE 90 91 /// Structured representation of a header line (§1.2) 92 struct bcf_hrec_t // @suppress(dscanner.style.phobos_naming_convention) 93 { 94 int type; /// One of the BCF_HL_* type 95 char* key; /// The part before '=', i.e. FILTER/INFO/FORMAT/contig/fileformat etc. 96 char* value; /// Set only for generic lines, NULL for FILTER/INFO, etc. 97 int nkeys; /// Number of structured fields 98 char** keys; /// The key=value pairs 99 char** vals; /// The key=value pairs 100 } 101 102 /// ID Dictionary entry 103 struct bcf_idinfo_t 104 { 105 ulong[3] info; /** stores Number:20, var:4, Type:4, ColType:4 in info[0..2] 106 for BCF_HL_FLT,INFO,FMT and contig length in info[0] for BCF_HL_CTG */ 107 bcf_hrec_t*[3] hrec; /// pointers to header lines for [FILTER, INFO, FORMAT] in order 108 int id; /// primary key 109 } 110 111 /// ID Dictionary k/v 112 struct bcf_idpair_t // @suppress(dscanner.style.phobos_naming_convention) 113 { 114 const(char)* key; /// header dictionary FILTER/INFO/FORMAT ID key 115 const(bcf_idinfo_t)* val; /// header dictionary FILTER/INFO/FORMAT ID entry 116 } 117 118 /// Structured repreentation of VCF header (§1.2) 119 /// Note that bcf_hdr_t structs must always be created via bcf_hdr_init() 120 struct bcf_hdr_t // @suppress(dscanner.style.phobos_naming_convention) 121 { 122 int[3] n; /// n:the size of the dictionary block in use, (allocated size, m, is below to preserve ABI) 123 bcf_idpair_t*[3] id;/// ID dictionary {FILTER/INFO/FORMAT, contig, sample} ID key/entry 124 void*[3] dict; /// hash table 125 char** samples; /// ?list of samples 126 bcf_hrec_t** hrec; /// Structured representation of this header line 127 int nhrec; /// # of header records 128 int dirty; /// ? 129 int ntransl; /// for bcf_translate() 130 int*[2] transl; /// for bcf_translate() 131 int nsamples_ori; /// for bcf_hdr_set_samples() 132 ubyte* keep_samples; /// ? 133 kstring_t mem; /// ? 134 int[3] m; /// m: allocated size of the dictionary block in use (see n above) 135 } 136 137 /// Lookup table used in bcf_record_check 138 /// MAINTAINER: in C header is [] 139 extern __gshared ubyte[] bcf_type_shift; 140 141 /************** 142 * VCF record * 143 **************/ 144 145 enum BCF_BT_NULL = 0; /// null 146 enum BCF_BT_INT8 = 1;/// int8 147 enum BCF_BT_INT16 = 2;/// int16 148 enum BCF_BT_INT32 = 3;/// int32 149 enum BCF_BT_INT64 = 4;/// Unofficial, for internal use only per htslib headers 150 enum BCF_BT_FLOAT = 5; /// float (32?) 151 enum BCF_BT_CHAR = 7;/// char (8 bit) 152 153 enum VCF_REF = 0; /// ref (e.g. in a gVCF) 154 enum VCF_SNP = 1;/// SNP 155 enum VCF_MNP = 2;/// MNP 156 enum VCF_INDEL = 4;/// INDEL 157 enum VCF_OTHER = 8;/// other (e.g. SV) 158 enum VCF_BND = 16; // /// breakend 159 enum VCF_OVERLAP = 32;/// overlapping deletion, ALT=* 160 161 /// variant type record embedded in bcf_dec_t 162 /// variant type and the number of bases affected, negative for deletions 163 struct bcf_variant_t // @suppress(dscanner.style.phobos_naming_convention) 164 { 165 int type; /// variant type and the number of bases affected, negative for deletions 166 int n; /// variant type and the number of bases affected, negative for deletions 167 } 168 169 /// FORMAT field data (§1.4.2 Genotype fields) 170 struct bcf_fmt_t // @suppress(dscanner.style.phobos_naming_convention) 171 { 172 import std.bitmanip : bitfields; 173 174 int id; /// id: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$id].key 175 int n;/// n: number of values per-sample; size: number of bytes per-sample; type: one of BCF_BT_* types 176 int size;/// size: number of bytes per-sample; type: one of BCF_BT_* types 177 int type; /// type: one of BCF_BT_* types 178 ubyte* p; /// same as vptr and vptr_* in bcf_info_t below 179 uint p_len; 180 181 mixin(bitfields!( 182 uint, "p_off", 31, 183 bool, "p_free", 1)); 184 } 185 186 /// INFO field data (§1.4.1 Fixed fields, (8) INFO) 187 struct bcf_info_t // @suppress(dscanner.style.phobos_naming_convention) 188 { 189 import std.bitmanip : bitfields; 190 191 int key; /// key: numeric tag id, the corresponding string is bcf_hdr_t::id[BCF_DT_ID][$key].key 192 int type; /// type: one of BCF_BT_* types 193 194 /// integer value 195 /// float value 196 union V1 197 { 198 long i; 199 float f; 200 } 201 202 V1 v1; /// only set if $len==1; for easier access 203 ubyte* vptr; /// pointer to data array in bcf1_t->shared.s, excluding the size+type and tag id bytes 204 uint vptr_len; 205 206 mixin(bitfields!( 207 uint, "vptr_off", 31, 208 uint, "vptr_free", 1)); /// length of the vptr block or, when set, of the vptr_mod block, excluding offset 209 /// vptr offset, i.e., the size of the INFO key plus size+type bytes 210 /// indicates that vptr-vptr_off must be freed; set only when modified and the new 211 /// data block is bigger than the original 212 int len; /// vector length, 1 for scalars 213 } 214 215 enum BCF1_DIRTY_ID = 1; /// ID was edited 216 enum BCF1_DIRTY_ALS = 2; /// Allele(s) was edited 217 enum BCF1_DIRTY_FLT = 4; /// FILTER was edited 218 enum BCF1_DIRTY_INF = 8; /// INFO was edited 219 220 /// Variable-length data from a VCF record 221 struct bcf_dec_t // @suppress(dscanner.style.phobos_naming_convention) 222 { 223 /// allocated size (high-water mark); do not change 224 int m_fmt; 225 int m_info; 226 int m_id; 227 int m_als; 228 int m_allele; 229 int m_flt; 230 int n_flt; /// Number of FILTER fields 231 int* flt; /// FILTER keys in the dictionary 232 char* id;/// ID 233 char* als; /// REF+ALT block (\0-seperated) 234 char** allele; /// allele[0] is the REF (allele[] pointers to the als block); all null terminated 235 bcf_info_t* info; /// INFO 236 bcf_fmt_t* fmt; /// FORMAT and individual sample 237 bcf_variant_t* var; /// $var and $var_type set only when set_variant_types called 238 int n_var;/// variant number(???) 239 int var_type;/// variant type (TODO: make enum) 240 int shared_dirty; /// if set, shared.s must be recreated on BCF output (TODO: make enum) 241 int indiv_dirty; /// if set, indiv.s must be recreated on BCF output (TODO: make enum) 242 } 243 244 enum BCF_ERR_CTG_UNDEF = 1; /// BCF error: undefined contig 245 enum BCF_ERR_TAG_UNDEF = 2;/// BCF error: undefined tag 246 enum BCF_ERR_NCOLS = 4;/// BCF error: 247 enum BCF_ERR_LIMITS = 8;/// BCF error: 248 enum BCF_ERR_CHAR = 16;/// BCF error: 249 enum BCF_ERR_CTG_INVALID = 32;/// BCF error: 250 enum BCF_ERR_TAG_INVALID = 64;/// BCF error: 251 252 /** 253 The bcf1_t structure corresponds to one VCF/BCF line. Reading from VCF file 254 is slower because the string is first to be parsed, packed into BCF line 255 (done in vcf_parse), then unpacked into internal bcf1_t structure. If it 256 is known in advance that some of the fields will not be required (notably 257 the sample columns), parsing of these can be skipped by setting max_unpack 258 appropriately. 259 Similarly, it is fast to output a BCF line because the columns (kept in 260 shared.s, indiv.s, etc.) are written directly by bcf_write, whereas a VCF 261 line must be formatted in vcf_format. 262 */ 263 struct bcf1_t 264 { 265 import std.bitmanip : bitfields; 266 267 hts_pos_t pos; /// POS 268 hts_pos_t rlen; /// length of REF 269 int rid; /// CHROM 270 float qual; 271 272 mixin(bitfields!( 273 uint, "n_info", 16, 274 uint, "n_allele", 16, 275 uint, "n_fmt", 8, 276 uint, "n_sample", 24)); /// QUAL 277 278 kstring_t shared_; 279 kstring_t indiv; 280 bcf_dec_t d; /// lazy evaluation: $d is not generated by bcf_read(), but by explicitly calling bcf_unpack() 281 int max_unpack; /// Set to BCF_UN_STR, BCF_UN_FLT, or BCF_UN_INFO to boost performance of vcf_parse when some of the fields won't be needed 282 int unpacked; /// remember what has been unpacked to allow calling bcf_unpack() repeatedly without redoing the work 283 int[3] unpack_size; /// the original block size of ID, REF+ALT and FILTER 284 int errcode; /// one of BCF_ERR_* codes 285 } 286 287 /******* 288 * API * 289 *******/ 290 291 /*********************************************************************** 292 * BCF and VCF I/O 293 * 294 * A note about naming conventions: htslib internally represents VCF 295 * records as bcf1_t data structures, therefore most functions are 296 * prefixed with bcf_. There are a few exceptions where the functions must 297 * be aware of both BCF and VCF worlds, such as bcf_parse vs vcf_parse. In 298 * these cases, functions prefixed with bcf_ are more general and work 299 * with both BCF and VCF. 300 * 301 ***********************************************************************/ 302 303 /** These macros are defined only for consistency with other parts of htslib */ 304 alias bcf_init1 = bcf_init; 305 alias bcf_read1 = bcf_read; 306 alias vcf_read1 = vcf_read; 307 alias bcf_write1 = bcf_write; 308 alias vcf_write1 = vcf_write; 309 alias bcf_destroy1 = bcf_destroy; 310 alias bcf_empty1 = bcf_empty; 311 alias vcf_parse1 = vcf_parse; 312 alias bcf_clear1 = bcf_clear; 313 alias vcf_format1 = vcf_format; 314 315 /** 316 * bcf_hdr_init() - create an empty BCF header. 317 * @param mode "r" or "w" 318 * 319 * When opened for writing, the mandatory fileFormat and 320 * FILTER=PASS lines are added automatically. 321 * 322 * The bcf_hdr_t struct returned by a successful call should be freed 323 * via bcf_hdr_destroy() when it is no longer needed. 324 */ 325 bcf_hdr_t* bcf_hdr_init(const(char)* mode); 326 327 /** Destroy a BCF header struct */ 328 void bcf_hdr_destroy(bcf_hdr_t* h); 329 330 /** Allocate and initialize a bcf1_t object. 331 * 332 * The bcf1_t struct returned by a successful call should be freed 333 * via bcf_destroy() when it is no longer needed. 334 */ 335 bcf1_t* bcf_init(); 336 337 /** Deallocate a bcf1_t object */ 338 void bcf_destroy(bcf1_t* v); 339 340 /** 341 * Same as bcf_destroy() but frees only the memory allocated by bcf1_t, 342 * not the bcf1_t object itself. 343 */ 344 void bcf_empty(bcf1_t* v); 345 346 /** 347 * Make the bcf1_t object ready for next read. Intended mostly for 348 * internal use, the user should rarely need to call this function 349 * directly. 350 */ 351 void bcf_clear(bcf1_t* v); 352 353 /** bcf_open and vcf_open mode: please see hts_open() in hts.h */ 354 alias vcfFile = htsFile; 355 alias bcf_open = hts_open; 356 alias vcf_open = hts_open; 357 alias bcf_close = hts_close; 358 alias vcf_close = hts_close; 359 360 /// Read a VCF or BCF header 361 /** @param fp The file to read the header from 362 @return Pointer to a populated header structure on success; 363 NULL on failure 364 365 The bcf_hdr_t struct returned by a successful call should be freed 366 via bcf_hdr_destroy() when it is no longer needed. 367 */ 368 bcf_hdr_t* bcf_hdr_read(htsFile* fp); 369 370 /** 371 * bcf_hdr_set_samples() - for more efficient VCF parsing when only one/few samples are needed 372 * @param samples samples to include or exclude from file or as a comma-separated string. 373 * LIST|FILE .. select samples in list/file 374 * ^LIST|FILE .. exclude samples from list/file 375 * - .. include all samples 376 * NULL .. exclude all samples 377 * @param is_file @p samples is a file (1) or a comma-separated list (0) 378 * 379 * The bottleneck of VCF reading is parsing of genotype fields. If the 380 * reader knows in advance that only subset of samples is needed (possibly 381 * no samples at all), the performance of bcf_read() can be significantly 382 * improved by calling bcf_hdr_set_samples after bcf_hdr_read(). 383 * The function bcf_read() will subset the VCF/BCF records automatically 384 * with the notable exception when reading records via bcf_itr_next(). 385 * In this case, bcf_subset_format() must be called explicitly, because 386 * bcf_readrec() does not see the header. 387 * 388 * Returns 0 on success, -1 on error or a positive integer if the list 389 * contains samples not present in the VCF header. In such a case, the 390 * return value is the index of the offending sample. 391 */ 392 int bcf_hdr_set_samples(bcf_hdr_t* hdr, const(char)* samples, int is_file); 393 394 int bcf_subset_format(const(bcf_hdr_t)* hdr, bcf1_t* rec); 395 396 /// Write a VCF or BCF header 397 /** @param fp Output file 398 @param h The header to write 399 @return 0 on success; -1 on failure 400 */ 401 int bcf_hdr_write(htsFile* fp, bcf_hdr_t* h); 402 403 /** 404 * Parse VCF line contained in kstring and populate the bcf1_t struct 405 * The line must not end with \n or \r characters. 406 */ 407 int vcf_parse(kstring_t* s, const(bcf_hdr_t)* h, bcf1_t* v); 408 409 /** 410 * Complete the file opening mode, according to its extension. 411 * @param mode Preallocated mode string to be completed. 412 * @param fn File name to be opened. 413 * @param format Format string (vcf|bcf|vcf.gz) 414 * @return 0 on success; -1 on failure 415 */ 416 int vcf_open_mode(char* mode, const(char)* fn, const(char)* format); 417 418 /** The opposite of vcf_parse. It should rarely be called directly, see vcf_write */ 419 int vcf_format(const(bcf_hdr_t)* h, const(bcf1_t)* v, kstring_t* s); 420 421 /// Read next VCF or BCF record 422 /** @param fp The file to read the record from 423 @param h The header for the vcf/bcf file 424 @param v The bcf1_t structure to populate 425 @return 0 on success; -1 on end of file; < -1 on critical error 426 427 On errors which are not critical for reading, such as missing header 428 definitions in vcf files, zero will be returned but v->errcode will have been 429 set to one of BCF_ERR* codes and must be checked before calling bcf_write(). 430 */ 431 int bcf_read(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v); 432 433 /** 434 * bcf_unpack() - unpack/decode a BCF record (fills the bcf1_t::d field) 435 * 436 * Note that bcf_unpack() must be called even when reading VCF. It is safe 437 * to call the function repeatedly, it will not unpack the same field 438 * twice. 439 */ 440 enum BCF_UN_STR = 1; // up to ALT inclusive 441 enum BCF_UN_FLT = 2; // up to FILTER 442 enum BCF_UN_INFO = 4; // up to INFO 443 enum BCF_UN_SHR = BCF_UN_STR | BCF_UN_FLT | BCF_UN_INFO; // all shared information 444 enum BCF_UN_FMT = 8; // unpack format and each sample 445 enum BCF_UN_IND = BCF_UN_FMT; // a synonym of BCF_UN_FMT 446 enum BCF_UN_ALL = BCF_UN_SHR | BCF_UN_FMT; // everything 447 int bcf_unpack(bcf1_t* b, int which); 448 449 /* 450 * bcf_dup() - create a copy of BCF record. 451 * 452 * Note that bcf_unpack() must be called on the returned copy as if it was 453 * obtained from bcf_read(). Also note that bcf_dup() calls bcf_sync1(src) 454 * internally to reflect any changes made by bcf_update_* functions. 455 * 456 * The bcf1_t struct returned by a successful call should be freed 457 * via bcf_destroy() when it is no longer needed. 458 */ 459 bcf1_t* bcf_dup(bcf1_t* src); 460 461 bcf1_t* bcf_copy(bcf1_t* dst, bcf1_t* src); 462 463 /// Write one VCF or BCF record. The type is determined at the open() call. 464 /** @param fp The file to write to 465 @param h The header for the vcf/bcf file 466 @param v The bcf1_t structure to write 467 @return 0 on success; -1 on error 468 */ 469 int bcf_write(htsFile* fp, bcf_hdr_t* h, bcf1_t* v); 470 471 /** 472 * The following functions work only with VCFs and should rarely be called 473 * directly. Usually one wants to use their bcf_* alternatives, which work 474 * transparently with both VCFs and BCFs. 475 */ 476 /// Read a VCF format header 477 /** @param fp The file to read the header from 478 @return Pointer to a populated header structure on success; 479 NULL on failure 480 481 Use bcf_hdr_read() instead. 482 483 The bcf_hdr_t struct returned by a successful call should be freed 484 via bcf_hdr_destroy() when it is no longer needed. 485 */ 486 bcf_hdr_t* vcf_hdr_read(htsFile* fp); 487 488 /// Write a VCF format header 489 /** @param fp Output file 490 @param h The header to write 491 @return 0 on success; -1 on failure 492 493 Use bcf_hdr_write() instead 494 */ 495 int vcf_hdr_write(htsFile* fp, const(bcf_hdr_t)* h); 496 497 /// Read a record from a VCF file 498 /** @param fp The file to read the record from 499 @param h The header for the vcf file 500 @param v The bcf1_t structure to populate 501 @return 0 on success; -1 on end of file; < -1 on error 502 503 Use bcf_read() instead 504 */ 505 int vcf_read(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v); 506 507 /// Write a record to a VCF file 508 /** @param fp The file to write to 509 @param h The header for the vcf file 510 @param v The bcf1_t structure to write 511 @return 0 on success; -1 on error 512 513 Use bcf_write() instead 514 */ 515 int vcf_write(htsFile* fp, const(bcf_hdr_t)* h, bcf1_t* v); 516 517 /** Helper function for the bcf_itr_next() macro; internal use, ignore it */ 518 int bcf_readrec( 519 BGZF* fp, 520 void* null_, 521 void* v, 522 int* tid, 523 hts_pos_t* beg, 524 hts_pos_t* end); 525 526 /// Write a line to a VCF file 527 /** @param line Line to write 528 @param fp File to write it to 529 @return 0 on success; -1 on failure 530 531 @note No checks are done on the line being added, apart from 532 ensuring that it ends with a newline. This function 533 should therefore be used with care. 534 */ 535 int vcf_write_line(htsFile* fp, kstring_t* line); 536 537 /************************************************************************** 538 * Header querying and manipulation routines 539 **************************************************************************/ 540 541 /** Create a new header using the supplied template 542 * 543 * The bcf_hdr_t struct returned by a successful call should be freed 544 * via bcf_hdr_destroy() when it is no longer needed. 545 * @return NULL on failure, header otherwise 546 */ 547 bcf_hdr_t* bcf_hdr_dup(const(bcf_hdr_t)* hdr); 548 549 /** 550 * Copy header lines from src to dst if not already present in dst. See also bcf_translate(). 551 * Returns 0 on success or sets a bit on error: 552 * 1 .. conflicting definitions of tag length 553 * // todo 554 */ 555 deprecated("Please use bcf_hdr_merge instead") 556 int bcf_hdr_combine(bcf_hdr_t* dst, const(bcf_hdr_t)* src); 557 558 /** 559 * bcf_hdr_merge() - copy header lines from src to dst, see also bcf_translate() 560 * @param dst: the destination header to be merged into, NULL on the first pass 561 * @param src: the source header 562 * @return NULL on failure, header otherwise 563 * 564 * Notes: 565 * - use as: 566 * bcf_hdr_t *dst = NULL; 567 * for (i=0; i<nsrc; i++) dst = bcf_hdr_merge(dst,src[i]); 568 * 569 * - bcf_hdr_merge() replaces bcf_hdr_combine() which had a problem when 570 * combining multiple BCF headers. The current bcf_hdr_combine() 571 * does not have this problem, but became slow when used for many files. 572 */ 573 bcf_hdr_t* bcf_hdr_merge(bcf_hdr_t* dst, const(bcf_hdr_t)* src); 574 575 /** 576 * bcf_hdr_add_sample() - add a new sample. 577 * @param sample: sample name to be added 578 * 579 * Note: 580 * After all samples have been added, the internal header structure must be updated 581 * by calling bcf_hdr_sync(). This is normally done automatically by the first bcf_hdr_write() 582 * or bcf_write() call. Otherwise, the caller must force the update by calling bcf_hdr_sync() 583 * explicitly. 584 */ 585 int bcf_hdr_add_sample(bcf_hdr_t* hdr, const(char)* sample); 586 587 /** Read VCF header from a file and update the header */ 588 int bcf_hdr_set(bcf_hdr_t* hdr, const(char)* fname); 589 590 /// Appends formatted header text to _str_. 591 /** If _is_bcf_ is zero, `IDX` fields are discarded. 592 * @return 0 if successful, or negative if an error occurred 593 * @since 1.4 594 */ 595 int bcf_hdr_format(const(bcf_hdr_t)* hdr, int is_bcf, kstring_t* str); 596 597 /** Returns formatted header (newly allocated string) and its length, 598 * excluding the terminating \0. If is_bcf parameter is unset, IDX 599 * fields are discarded. 600 * @deprecated Use bcf_hdr_format() instead as it can handle huge headers. 601 */ 602 deprecated("use bcf_hdr_format() instead") 603 char* bcf_hdr_fmt_text(const(bcf_hdr_t)* hdr, int is_bcf, int* len); 604 605 /** Append new VCF header line, returns 0 on success */ 606 int bcf_hdr_append(bcf_hdr_t* h, const(char)* line); 607 608 int bcf_hdr_printf(bcf_hdr_t* h, const(char)* format, ...); 609 610 /** VCF version, e.g. VCFv4.2 */ 611 const(char)* bcf_hdr_get_version(const(bcf_hdr_t)* hdr); 612 613 /// Set version in bcf header 614 /** 615 @param hdr BCF header struct 616 @param version Version to set, e.g. "VCFv4.3" 617 @return 0 on success; < 0 on error 618 */ 619 int bcf_hdr_set_version(bcf_hdr_t* hdr, const(char)* version_); 620 621 /** 622 * bcf_hdr_remove() - remove VCF header tag 623 * @param type: one of BCF_HL_* 624 * @param key: tag name or NULL to remove all tags of the given type 625 */ 626 void bcf_hdr_remove(bcf_hdr_t* h, int type, const(char)* key); 627 628 /** 629 * bcf_hdr_subset() - creates a new copy of the header removing unwanted samples 630 * @param n: number of samples to keep 631 * @param samples: names of the samples to keep 632 * @param imap: mapping from index in @samples to the sample index in the original file 633 * @return NULL on failure, header otherwise 634 * 635 * Sample names not present in h0 are ignored. The number of unmatched samples can be checked 636 * by comparing n and bcf_hdr_nsamples(out_hdr). 637 * This function can be used to reorder samples. 638 * See also bcf_subset() which subsets individual records. 639 * The bcf_hdr_t struct returned by a successful call should be freed 640 * via bcf_hdr_destroy() when it is no longer needed. 641 */ 642 /// NOTE: char *const* samples really exmplifies what I hate about C pointers 643 /// My interpretation of this is it is equivalent to char **samples, but that the outer pointer is const 644 /// which in D would be const(char *)*samples. I don't know what it implies about constancy of *samples or samples. 645 bcf_hdr_t* bcf_hdr_subset( 646 const(bcf_hdr_t)* h0, 647 int n, 648 const(char*)* samples, 649 int* imap); 650 651 /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */ 652 const(char*)* bcf_hdr_seqnames(const(bcf_hdr_t)* h, int* nseqs); 653 654 /** Get number of samples */ 655 pragma(inline, true) auto bcf_hdr_nsamples (bcf_hdr_t *hdr) 656 { 657 return hdr.n[BCF_DT_SAMPLE]; 658 } 659 660 /** The following functions are for internal use and should rarely be called directly */ 661 int bcf_hdr_parse(bcf_hdr_t* hdr, char* htxt); 662 663 /// Synchronize internal header structures 664 /** @param h Header 665 @return 0 on success, -1 on failure 666 667 This function updates the id, sample and contig arrays in the 668 bcf_hdr_t structure so that they point to the same locations as 669 the id, sample and contig dictionaries. 670 */ 671 int bcf_hdr_sync(bcf_hdr_t* h); 672 673 /** 674 * bcf_hdr_parse_line() - parse a single line of VCF textual header 675 * @param h BCF header struct 676 * @param line One or more lines of header text 677 * @param len Filled out with length data parsed from 'line'. 678 * @return bcf_hrec_t* on success; 679 * NULL on error or on end of header text. 680 * NB: to distinguish error from end-of-header, check *len: 681 * *len == 0 indicates @p line did not start with "##" 682 * *len == -1 indicates failure, likely due to out of memory 683 * *len > 0 indicates a malformed header line 684 * 685 * If *len > 0 on exit, it will contain the full length of the line 686 * including any trailing newline (this includes cases where NULL was 687 * returned due to a malformed line). Callers can use this to skip to 688 * the next header line. 689 */ 690 bcf_hrec_t* bcf_hdr_parse_line( 691 const(bcf_hdr_t)* h, 692 const(char)* line, 693 int* len); 694 /// Convert a bcf header record to string form 695 /** 696 * @param hrec Header record 697 * @param str Destination kstring 698 * @return 0 on success; < 0 on error 699 */ 700 int bcf_hrec_format(const(bcf_hrec_t)* hrec, kstring_t* str); 701 702 int bcf_hdr_add_hrec(bcf_hdr_t* hdr, bcf_hrec_t* hrec); 703 704 /** 705 * bcf_hdr_get_hrec() - get header line info 706 * @param type: one of the BCF_HL_* types: FLT,INFO,FMT,CTG,STR,GEN 707 * @param key: the header key for generic lines (e.g. "fileformat"), any field 708 * for structured lines, typically "ID". 709 * @param value: the value which pairs with key. Can be be NULL for BCF_HL_GEN 710 * @param str_class: the class of BCF_HL_STR line (e.g. "ALT" or "SAMPLE"), otherwise NULL 711 */ 712 bcf_hrec_t* bcf_hdr_get_hrec( 713 const(bcf_hdr_t)* hdr, 714 int type, 715 const(char)* key, 716 const(char)* value, 717 const(char)* str_class); 718 719 /// Duplicate a header record 720 /** @param hrec Header record to copy 721 @return A new header record on success; NULL on failure 722 723 The bcf_hrec_t struct returned by a successful call should be freed 724 via bcf_hrec_destroy() when it is no longer needed. 725 */ 726 bcf_hrec_t* bcf_hrec_dup(bcf_hrec_t* hrec); 727 728 /// Add a new header record key 729 /** @param hrec Header record 730 @param str Key name 731 @param len Length of @p str 732 @return 0 on success; -1 on failure 733 */ 734 int bcf_hrec_add_key(bcf_hrec_t* hrec, const(char)* str, size_t len); 735 736 /// Set a header record value 737 /** @param hrec Header record 738 @param i Index of value 739 @param str Value to set 740 @param len Length of @p str 741 @param is_quoted Value should be quoted 742 @return 0 on success; -1 on failure 743 */ 744 int bcf_hrec_set_val( 745 bcf_hrec_t* hrec, 746 int i, 747 const(char)* str, 748 size_t len, 749 int is_quoted); 750 751 /// Lookup header record by key 752 int bcf_hrec_find_key(bcf_hrec_t* hrec, const(char)* key); 753 754 /// Add an IDX header record 755 /** @param hrec Header record 756 @param idx IDX value to add 757 @return 0 on success; -1 on failure 758 */ 759 int hrec_add_idx(bcf_hrec_t* hrec, int idx); 760 761 /// Free up a header record and associated structures 762 /** @param hrec Header record 763 */ 764 void bcf_hrec_destroy(bcf_hrec_t* hrec); 765 766 /************************************************************************** 767 * Individual record querying and manipulation routines 768 **************************************************************************/ 769 770 /** See the description of bcf_hdr_subset() */ 771 int bcf_subset(const(bcf_hdr_t)* h, bcf1_t* v, int n, int* imap); 772 773 /** 774 * bcf_translate() - translate tags ids to be consistent with different header. This function 775 * is useful when lines from multiple VCF need to be combined. 776 * @dst_hdr: the destination header, to be used in bcf_write(), see also bcf_hdr_combine() 777 * @src_hdr: the source header, used in bcf_read() 778 * @src_line: line obtained by bcf_read() 779 */ 780 int bcf_translate( 781 const(bcf_hdr_t)* dst_hdr, 782 bcf_hdr_t* src_hdr, 783 bcf1_t* src_line); 784 785 /** 786 * bcf_get_variant_type[s]() - returns one of VCF_REF, VCF_SNP, etc 787 */ 788 int bcf_get_variant_types(bcf1_t* rec); 789 790 int bcf_get_variant_type(bcf1_t* rec, int ith_allele); 791 792 int bcf_is_snp(bcf1_t* v); 793 794 /** 795 * bcf_update_filter() - sets the FILTER column 796 * @flt_ids: The filter IDs to set, numeric IDs returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") 797 * @n: Number of filters. If n==0, all filters are removed 798 */ 799 int bcf_update_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, int* flt_ids, int n); 800 /** 801 * bcf_add_filter() - adds to the FILTER column 802 * @flt_id: filter ID to add, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") 803 * 804 * If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed. 805 */ 806 int bcf_add_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, int flt_id); 807 /** 808 * bcf_remove_filter() - removes from the FILTER column 809 * @flt_id: filter ID to remove, numeric ID returned by bcf_hdr_id2int(hdr, BCF_DT_ID, "PASS") 810 * @pass: when set to 1 and no filters are present, set to PASS 811 */ 812 int bcf_remove_filter( 813 const(bcf_hdr_t)* hdr, 814 bcf1_t* line, 815 int flt_id, 816 int pass); 817 /** 818 * Returns 1 if present, 0 if absent, or -1 if filter does not exist. "PASS" and "." can be used interchangeably. 819 */ 820 int bcf_has_filter(const(bcf_hdr_t)* hdr, bcf1_t* line, char* filter); 821 /** 822 * bcf_update_alleles() and bcf_update_alleles_str() - update REF and ALT column 823 * @alleles: Array of alleles 824 * @nals: Number of alleles 825 * @alleles_string: Comma-separated alleles, starting with the REF allele 826 */ 827 int bcf_update_alleles( 828 const(bcf_hdr_t)* hdr, 829 bcf1_t* line, 830 const(char*)* alleles, 831 int nals); 832 833 int bcf_update_alleles_str( 834 const(bcf_hdr_t)* hdr, 835 bcf1_t* line, 836 const(char)* alleles_string); 837 838 /** 839 * bcf_update_id() - sets new ID string 840 * bcf_add_id() - adds to the ID string checking for duplicates 841 */ 842 int bcf_update_id(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* id); 843 844 int bcf_add_id(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* id); 845 846 /** 847 * bcf_update_info_*() - functions for updating INFO fields 848 * @param hdr: the BCF header 849 * @param line: VCF line to be edited 850 * @param key: the INFO tag to be updated 851 * @param values: pointer to the array of values. Pass NULL to remove the tag. 852 * @param n: number of values in the array. When set to 0, the INFO tag is removed 853 * @return 0 on success or negative value on error. 854 * 855 * The @p string in bcf_update_info_flag() is optional, 856 * @p n indicates whether the flag is set or removed. 857 * 858 * Note that updating an END info tag will cause line->rlen to be 859 * updated as a side-effect (removing the tag will set it to the 860 * string length of the REF allele). If line->pos is being changed as 861 * well, it is important that this is done before calling 862 * bcf_update_info_int32() to update the END tag, otherwise rlen will be 863 * set incorrectly. If the new END value is less than or equal to 864 * line->pos, a warning will be printed and line->rlen will be set to 865 * the length of the REF allele. 866 */ 867 pragma(inline, true) { // TODO: rewrite as template 868 auto bcf_update_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 869 { return bcf_update_info(hdr, line, key, values, n, BCF_HT_INT); } 870 auto bcf_update_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 871 { return bcf_update_info(hdr, line, key, values, n, BCF_HT_REAL); } 872 auto bcf_update_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 873 { return bcf_update_info(hdr, line, key, values, n, BCF_HT_FLAG); } 874 auto bcf_update_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(void) *values) // @suppress(dscanner.style.undocumented_declaration) 875 { return bcf_update_info(hdr, line, key, values, 1, BCF_HT_STR); } 876 } 877 878 int bcf_update_info ( 879 const(bcf_hdr_t)* hdr, 880 bcf1_t* line, 881 const(char)* key, 882 const(void)* values, 883 int n, 884 int type); 885 886 /// Set or update 64-bit integer INFO values 887 /** 888 * @param hdr: the BCF header 889 * @param line: VCF line to be edited 890 * @param key: the INFO tag to be updated 891 * @param values: pointer to the array of values. Pass NULL to remove the tag. 892 * @param n: number of values in the array. When set to 0, the INFO tag is removed 893 * @return 0 on success or negative value on error. 894 * 895 * This function takes an int64_t values array as input. The data 896 * actually stored will be shrunk to the minimum size that can 897 * accept all of the values. 898 * 899 * INFO values outside of the range BCF_MIN_BT_INT32 to BCF_MAX_BT_INT32 900 * can only be written to VCF files. 901 */ 902 pragma(inline, true) 903 auto bcf_update_info_int64( const(bcf_hdr_t) *hdr, bcf1_t *line, 904 const(char) *key, 905 const(long) *values, int n) 906 { 907 return bcf_update_info(hdr, line, key, values, n, BCF_HT_LONG); 908 } 909 910 /** 911 * bcf_update_format_*() - functions for updating FORMAT fields 912 * @values: pointer to the array of values, the same number of elements 913 * is expected for each sample. Missing values must be padded 914 * with bcf_*_missing or bcf_*_vector_end values. 915 * @n: number of values in the array. If n==0, existing tag is removed. 916 * 917 * The function bcf_update_format_string() is a higher-level (slower) variant of 918 * bcf_update_format_char(). The former accepts array of \0-terminated strings 919 * whereas the latter requires that the strings are collapsed into a single array 920 * of fixed-length strings. In case of strings with variable length, shorter strings 921 * can be \0-padded. Note that the collapsed strings passed to bcf_update_format_char() 922 * are not \0-terminated. 923 * 924 * Returns 0 on success or negative value on error. 925 */ 926 927 }/// closing @nogc nothrow 928 929 pragma(inline, true) { 930 auto bcf_update_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(int) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 931 { return bcf_update_format(hdr, line, key, values, n, BCF_HT_INT); } 932 auto bcf_update_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(float) *values, int n) // @suppress(dscanner.style.undocumented_declaration) 933 { return bcf_update_format(hdr, line, key, values, n, BCF_HT_REAL); } 934 auto bcf_update_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, const(char) **values, int n) // @suppress(dscanner.style.undocumented_declaration) 935 { return bcf_update_format(hdr, line, key, values, n, BCF_HT_STR); } 936 auto bcf_update_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) **gts, int n) // @suppress(dscanner.style.undocumented_declaration) 937 { return bcf_update_format(hdr, line, toStringz("GT"c), gts, n, BCF_HT_INT); 938 } 939 } 940 941 @nogc nothrow { 942 943 int bcf_update_format_string ( 944 const(bcf_hdr_t)* hdr, 945 bcf1_t* line, 946 const(char)* key, 947 const(char*)* values, 948 int n); 949 950 int bcf_update_format( 951 const(bcf_hdr_t)* hdr, 952 bcf1_t* line, 953 const(char)* key, 954 const(void)* values, 955 int n, 956 int type); 957 958 /// Macros for setting genotypes correctly, for use with bcf_update_genotypes only; idx corresponds 959 /// to VCF's GT (1-based index to ALT or 0 for the reference allele) and val is the opposite, obtained 960 /// from bcf_get_genotypes() below. 961 // TODO: is int appropriate? 962 pragma(inline, true) { 963 auto bcf_gt_phased(int idx) { return (((idx)+1)<<1|1); } 964 /// ditto 965 auto bcf_gt_unphased(int idx) { return (((idx)+1)<<1); } 966 /// ditto 967 auto bcf_gt_is_missing(int val) { return ((val)>>1 ? 0 : 1);} 968 /// ditto 969 auto bcf_gt_is_phased(int idx) { return ((idx)&1); } 970 /// ditto 971 auto bcf_gt_allele(int val) { return (((val)>>1)-1); } 972 } 973 /// ditto 974 enum int bcf_gt_missing = 0; 975 976 /** Conversion between alleles indexes to Number=G genotype index (assuming diploid, all 0-based) */ 977 pragma(inline, true) { 978 auto bcf_alleles2gt(int a, int b) { return ((a)>(b)?((a)*((a)+1)/2+(b)):((b)*((b)+1)/2+(a))); } 979 /// ditto 980 void bcf_gt2alleles(int igt, int *a, int *b) 981 { 982 int k = 0, dk = 1; // @suppress(dscanner.useless-initializer) 983 while ( k<igt ) { dk++; k += dk; } 984 *b = dk - 1; *a = igt - k + *b; 985 } 986 } 987 988 /** 989 * bcf_get_fmt() - returns pointer to FORMAT's field data 990 * @header: for access to BCF_DT_ID dictionary 991 * @line: VCF line obtained from vcf_parse1 992 * @fmt: one of GT,PL,... 993 * 994 * Returns bcf_fmt_t* if the call succeeded, or returns NULL when the field 995 * is not available. 996 */ 997 bcf_fmt_t* bcf_get_fmt(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* key); 998 999 bcf_info_t* bcf_get_info(const(bcf_hdr_t)* hdr, bcf1_t* line, const(char)* key); 1000 1001 /** 1002 * bcf_get_*_id() - returns pointer to FORMAT/INFO field data given the header index instead of the string ID 1003 * @line: VCF line obtained from vcf_parse1 1004 * @id: The header index for the tag, obtained from bcf_hdr_id2int() 1005 * 1006 * Returns bcf_fmt_t* / bcf_info_t*. These functions do not check if the index is valid 1007 * as their goal is to avoid the header lookup. 1008 */ 1009 bcf_fmt_t* bcf_get_fmt_id(bcf1_t* line, const int id); 1010 1011 bcf_info_t* bcf_get_info_id(bcf1_t* line, const int id); 1012 1013 /** 1014 * bcf_get_info_*() - get INFO values, integers or floats 1015 * @param hdr: BCF header 1016 * @param line: BCF record 1017 * @param tag: INFO tag to retrieve 1018 * @param dst: *dst is pointer to a memory location, can point to NULL 1019 * @param ndst: pointer to the size of allocated memory 1020 * @return >=0 on success 1021 * -1 .. no such INFO tag defined in the header 1022 * -2 .. clash between types defined in the header and encountered in the VCF record 1023 * -3 .. tag is not present in the VCF record 1024 * -4 .. the operation could not be completed (e.g. out of memory) 1025 * 1026 * Returns negative value on error or the number of values (including 1027 * missing values) put in *dst on success. bcf_get_info_string() returns 1028 * on success the number of characters stored excluding the nul- 1029 * terminating byte. bcf_get_info_flag() does not store anything in *dst 1030 * but returns 1 if the flag is set or 0 if not. 1031 * 1032 * *dst will be reallocated if it is not big enough (i.e. *ndst is too 1033 * small) or NULL on entry. The new size will be stored in *ndst. 1034 */ 1035 pragma(inline, true) { 1036 auto bcf_get_info_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 1037 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); } 1038 auto bcf_get_info_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 1039 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); } 1040 auto bcf_get_info_string(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 1041 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); } 1042 auto bcf_get_info_flag(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) 1043 { return bcf_get_info_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_FLAG); } 1044 } 1045 1046 int bcf_get_info_values ( 1047 const(bcf_hdr_t)* hdr, 1048 bcf1_t* line, 1049 const(char)* tag, 1050 void** dst, 1051 int* ndst, 1052 int type); 1053 1054 /// Put integer INFO values into an int64_t array 1055 /** 1056 * @param hdr: BCF header 1057 * @param line: BCF record 1058 * @param tag: INFO tag to retrieve 1059 * @param dst: *dst is pointer to a memory location, can point to NULL 1060 * @param ndst: pointer to the size of allocated memory 1061 * @return >=0 on success 1062 * -1 .. no such INFO tag defined in the header 1063 * -2 .. clash between types defined in the header and encountered in the VCF record 1064 * -3 .. tag is not present in the VCF record 1065 * -4 .. the operation could not be completed (e.g. out of memory) 1066 * 1067 * Returns negative value on error or the number of values (including 1068 * missing values) put in *dst on success. 1069 * 1070 * *dst will be reallocated if it is not big enough (i.e. *ndst is too 1071 * small) or NULL on entry. The new size will be stored in *ndst. 1072 */ 1073 pragma(inline, true) 1074 auto bcf_get_info_int64(const(bcf_hdr_t) *hdr, bcf1_t *line, 1075 const(char) *tag, long **dst, 1076 int *ndst) 1077 { 1078 return bcf_get_info_values(hdr, line, tag, 1079 cast(void **) dst, ndst, BCF_HT_LONG); 1080 } 1081 1082 /** 1083 * bcf_get_format_*() - same as bcf_get_info*() above 1084 * 1085 * The function bcf_get_format_string() is a higher-level (slower) variant of bcf_get_format_char(). 1086 * see the description of bcf_update_format_string() and bcf_update_format_char() above. 1087 * Unlike other bcf_get_format__*() functions, bcf_get_format_string() allocates two arrays: 1088 * a single block of \0-terminated strings collapsed into a single array and an array of pointers 1089 * to these strings. Both arrays must be cleaned by the user. 1090 * 1091 * Returns negative value on error or the number of written values on success. 1092 * 1093 * Use the returned number of written values for accessing valid entries of dst, as ndst is only a 1094 * watermark that can be higher than the returned value, i.e. the end of dst can contain carry-over 1095 * values from previous calls to bcf_get_format_*() on lines with more values per sample. 1096 * 1097 * Example: 1098 * int ndst = 0; char **dst = NULL; 1099 * if ( bcf_get_format_string(hdr, line, "XX", &dst, &ndst) > 0 ) 1100 * for (i=0; i<bcf_hdr_nsamples(hdr); i++) printf("%s\n", dst[i]); 1101 * free(dst[0]); free(dst); 1102 * 1103 * Example: 1104 * int i, j, ngt, nsmpl = bcf_hdr_nsamples(hdr); 1105 * int32_t *gt_arr = NULL, ngt_arr = 0; 1106 * 1107 * ngt = bcf_get_genotypes(hdr, line, >_arr, &ngt_arr); 1108 * if ( ngt<=0 ) return; // GT not present 1109 * 1110 * int max_ploidy = ngt/nsmpl; 1111 * for (i=0; i<nsmpl; i++) 1112 * { 1113 * int32_t *ptr = gt_arr + i*max_ploidy; 1114 * for (j=0; j<max_ploidy; j++) 1115 * { 1116 * // if true, the sample has smaller ploidy 1117 * if ( ptr[j]==bcf_int32_vector_end ) break; 1118 * 1119 * // missing allele 1120 * if ( bcf_gt_is_missing(ptr[j]) ) continue; 1121 * 1122 * // the VCF 0-based allele index 1123 * int allele_index = bcf_gt_allele(ptr[j]); 1124 * 1125 * // is phased? 1126 * int is_phased = bcf_gt_is_phased(ptr[j]); 1127 * 1128 * // .. do something .. 1129 * } 1130 * } 1131 * free(gt_arr); 1132 * 1133 */ 1134 1135 }/// closing @nogc nothrow from line 928 1136 1137 pragma(inline, true) { 1138 auto bcf_get_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1139 { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_INT); } 1140 auto bcf_get_format_float(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1141 { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_REAL); } 1142 auto bcf_get_format_char(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *tag, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1143 { return bcf_get_format_values(hdr, line, tag, cast(void**) dst, ndst, BCF_HT_STR); } 1144 auto bcf_get_genotypes(const(bcf_hdr_t) *hdr, bcf1_t *line, void **dst, int *ndst) // @suppress(dscanner.style.undocumented_declaration) // @suppress(dscanner.style.long_line) 1145 { return bcf_get_format_values(hdr, line, toStringz("GT"c), cast(void**) dst, ndst, BCF_HT_INT); } 1146 } 1147 1148 @nogc nothrow { 1149 1150 int bcf_get_format_string( 1151 const(bcf_hdr_t)* hdr, 1152 bcf1_t* line, 1153 const(char)* tag, 1154 char*** dst, 1155 int* ndst); 1156 1157 int bcf_get_format_values( 1158 const(bcf_hdr_t)* hdr, 1159 bcf1_t* line, 1160 const(char)* tag, 1161 void** dst, 1162 int* ndst, 1163 int type); 1164 1165 /************************************************************************** 1166 * Helper functions 1167 **************************************************************************/ 1168 1169 /** 1170 * bcf_hdr_id2int() - Translates string into numeric ID 1171 * bcf_hdr_int2id() - Translates numeric ID into string 1172 * @type: one of BCF_DT_ID, BCF_DT_CTG, BCF_DT_SAMPLE 1173 * @id: tag name, such as: PL, DP, GT, etc. 1174 * 1175 * Returns -1 if string is not in dictionary, otherwise numeric ID which identifies 1176 * fields in BCF records. 1177 */ 1178 int bcf_hdr_id2int(const(bcf_hdr_t)* hdr, int type, const(char)* id); 1179 1180 pragma(inline, true) 1181 auto bcf_hdr_int2id(const(bcf_hdr_t) *hdr, int type, int int_id) 1182 { return hdr.id[type][int_id].key; } 1183 1184 /** 1185 * bcf_hdr_name2id() - Translates sequence names (chromosomes) into numeric ID 1186 * bcf_hdr_id2name() - Translates numeric ID to sequence name 1187 */ 1188 pragma(inline, true) int bcf_hdr_name2id(const(bcf_hdr_t) *hdr, const(char) *id) { return bcf_hdr_id2int(hdr, BCF_DT_CTG, id); } // @suppress(dscanner.style.long_line) 1189 /// ditto 1190 pragma(inline, true) const(char) *bcf_hdr_id2name(const(bcf_hdr_t) *hdr, int rid) { return hdr.id[BCF_DT_CTG][rid].key; } // @suppress(dscanner.style.long_line) 1191 /// ditto 1192 pragma(inline, true) const(char) *bcf_seqname(const(bcf_hdr_t) *hdr, bcf1_t *rec) { return hdr.id[BCF_DT_CTG][rec.rid].key; } // @suppress(dscanner.style.long_line) 1193 1194 /** Return CONTIG name, or "(unknown)" 1195 1196 Like bcf_seqname(), but this function will never return NULL. If 1197 the contig name cannot be found (either because @p hdr was not 1198 supplied or rec->rid was out of range) it returns the string 1199 "(unknown)". 1200 */ 1201 const(char)* bcf_seqname_safe(const(bcf_hdr_t)* hdr, const(bcf1_t)* rec); 1202 1203 /** 1204 * bcf_hdr_id2*() - Macros for accessing bcf_idinfo_t 1205 * @type: one of BCF_HL_FLT, BCF_HL_INFO, BCF_HL_FMT 1206 * @int_id: return value of bcf_hdr_id2int, must be >=0 1207 * 1208 * The returned values are: 1209 * bcf_hdr_id2length .. whether the number of values is fixed or variable, one of BCF_VL_* 1210 * bcf_hdr_id2number .. the number of values, 0xfffff for variable length fields 1211 * bcf_hdr_id2type .. the field type, one of BCF_HT_* 1212 * bcf_hdr_id2coltype .. the column type, one of BCF_HL_* 1213 * 1214 * Notes: Prior to using the macros, the presence of the info should be 1215 * tested with bcf_hdr_idinfo_exists(). 1216 */ 1217 // TODO: for dict_type and col_type use ENUMs 1218 pragma(inline, true) { 1219 auto bcf_hdr_id2length (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>8 & 0xf); } // @suppress(dscanner.style.long_line) 1220 /// ditto 1221 auto bcf_hdr_id2number (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((hdr).id[BCF_DT_ID][int_id].val.info[type]>>12); } // @suppress(dscanner.style.long_line) 1222 /// ditto 1223 uint bcf_hdr_id2type (const(bcf_hdr_t) *hdr, int type, int int_id) { return cast(uint)((hdr).id[BCF_DT_ID][int_id].val.info[type]>>4 & 0xf); } // @suppress(dscanner.style.long_line) 1224 /// ditto 1225 uint bcf_hdr_id2coltype (const(bcf_hdr_t) *hdr, int type, int int_id){ return cast(uint)((hdr).id[BCF_DT_ID][int_id].val.info[type] & 0xf); } // @suppress(dscanner.style.long_line) 1226 /// ditto 1227 auto bcf_hdr_idinfo_exists (const(bcf_hdr_t) *hdr, int type, int int_id) { return ((int_id<0 || bcf_hdr_id2coltype(hdr,type,int_id)==0xf) ? 0 : 1); } // @suppress(dscanner.style.long_line) 1228 /// ditto 1229 auto bcf_hdr_id2hrc (const(bcf_hdr_t) *hdr, int dict_type, int col_type, int int_id) 1230 { return ((hdr).id[(dict_type)==BCF_DT_CTG?BCF_DT_CTG:BCF_DT_ID][int_id].val.hrec[(dict_type)==BCF_DT_CTG?0:(col_type)]); // @suppress(dscanner.style.long_line) 1231 } 1232 } 1233 /// Convert BCF FORMAT data to string form 1234 /** 1235 * @param s kstring to write into 1236 * @param n number of items in @p data 1237 * @param type type of items in @p data 1238 * @param data BCF format data 1239 * @return 0 on success 1240 * -1 if out of memory 1241 */ 1242 int bcf_fmt_array(kstring_t* s, int n, int type, void* data); 1243 1244 ubyte* bcf_fmt_sized_array(kstring_t* s, ubyte* ptr); 1245 1246 /// Encode a variable-length char array in BCF format 1247 /** 1248 * @param s kstring to write into 1249 * @param l length of input 1250 * @param a input data to encode 1251 * @return 0 on success; < 0 on error 1252 */ 1253 int bcf_enc_vchar(kstring_t* s, int l, const(char)* a); 1254 1255 /// Encode a variable-length integer array in BCF format 1256 /** 1257 * @param s kstring to write into 1258 * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) 1259 * @param a input data to encode 1260 * @param wsize vector length (<= 0 is equivalent to @p n) 1261 * @return 0 on success; < 0 on error 1262 * @note @p n should be an exact multiple of @p wsize 1263 */ 1264 int bcf_enc_vint(kstring_t* s, int n, int* a, int wsize); 1265 1266 /// Encode a variable-length float array in BCF format 1267 /** 1268 * @param s kstring to write into 1269 * @param n total number of items in @p a (<= 0 to encode BCF_BT_NULL) 1270 * @param a input data to encode 1271 * @return 0 on success; < 0 on error 1272 */ 1273 int bcf_enc_vfloat(kstring_t* s, int n, float* a); 1274 1275 /************************************************************************** 1276 * BCF index 1277 * 1278 * Note that these functions work with BCFs only. See synced_bcf_reader.h 1279 * which provides (amongst other things) an API to work transparently with 1280 * both indexed BCFs and VCFs. 1281 **************************************************************************/ 1282 1283 alias bcf_itr_destroy = hts_itr_destroy; 1284 1285 } /// closing @nogc nothrow from line 1136 1286 1287 pragma(inline, true) { 1288 /// Generate an iterator for an integer-based range query 1289 auto bcf_itr_queryi(const(hts_idx_t) *idx, int tid, int beg, int end) 1290 { return hts_itr_query(idx, tid, beg, end, &bcf_readrec); } 1291 1292 /// Generate an iterator for a string-based range query 1293 auto bcf_itr_querys(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, const(char) *s) 1294 { return hts_itr_querys(idx, s, cast(hts_name2id_f) &bcf_hdr_name2id, cast(void *) hdr, 1295 &hts_itr_query, &bcf_readrec); } 1296 1297 /// Iterate through the range 1298 /// r should (probably) point to your VCF (BCF) row structure 1299 /// TODO: attempt to define parameter r as bcf1_t *, which is what I think it should be 1300 int bcf_itr_next(htsFile *htsfp, hts_itr_t *itr, void *r) { 1301 if (htsfp.is_bgzf) 1302 return hts_itr_next(htsfp.fp.bgzf, itr, r, null); 1303 1304 hts_log_error(__FUNCTION__,"Only bgzf compressed files can be used with iterators"); 1305 errno = EINVAL; 1306 return -2; 1307 } 1308 1309 @nogc nothrow: 1310 1311 /// Load a BCF index 1312 /** @param fn BCF file name 1313 @return The index, or NULL if an error occurred. 1314 @note This only works for BCF files. Consider synced_bcf_reader instead 1315 which works for both BCF and VCF. 1316 */ 1317 auto bcf_index_load(const(char) *fn) { return hts_idx_load(fn, HTS_FMT_CSI); } 1318 1319 /// Get a list (char **) of sequence names from the index -- free only the array, not the values 1320 auto bcf_index_seqnames(const(hts_idx_t) *idx, const(bcf_hdr_t) *hdr, int *nptr) 1321 { return hts_idx_seqnames(idx, nptr, cast(hts_id2name_f) &bcf_hdr_id2name, cast(void *) hdr); } 1322 } 1323 1324 /// Load a BCF index from a given index file name 1325 /** @param fn Input BAM/BCF/etc filename 1326 @param fnidx The input index filename 1327 @return The index, or NULL if an error occurred. 1328 @note This only works for BCF files. Consider synced_bcf_reader instead 1329 which works for both BCF and VCF. 1330 */ 1331 hts_idx_t* bcf_index_load2(const(char)* fn, const(char)* fnidx); 1332 1333 /// Load a BCF index from a given index file name 1334 /** @param fn Input BAM/BCF/etc filename 1335 @param fnidx The input index filename 1336 @param flags Flags to alter behaviour (see description) 1337 @return The index, or NULL if an error occurred. 1338 @note This only works for BCF files. Consider synced_bcf_reader instead 1339 which works for both BCF and VCF. 1340 1341 The @p flags parameter can be set to a combination of the following 1342 values: 1343 1344 HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes 1345 HTS_IDX_SILENT_FAIL Fail silently if the index is not present 1346 1347 Equivalent to hts_idx_load3(fn, fnidx, HTS_FMT_CSI, flags); 1348 */ 1349 hts_idx_t* bcf_index_load3(const(char)* fn, const(char)* fnidx, int flags); 1350 1351 /** 1352 * bcf_index_build() - Generate and save an index file 1353 * @fn: Input VCF(compressed)/BCF filename 1354 * @min_shift: log2(width of the smallest bin), e.g. a value of 14 1355 * imposes a 16k base lower limit on the width of index bins. 1356 * Positive to generate CSI, or 0 to generate TBI. However, a small 1357 * value of min_shift would create a large index, which would lead to 1358 * reduced performance when using the index. A recommended value is 14. 1359 * For BCF files, only the CSI index can be generated. 1360 * 1361 * Returns 0 if successful, or negative if an error occurred. 1362 * 1363 * List of error codes: 1364 * -1 .. indexing failed 1365 * -2 .. opening @fn failed 1366 * -3 .. format not indexable 1367 * -4 .. failed to create and/or save the index 1368 */ 1369 int bcf_index_build(const(char)* fn, int min_shift); 1370 1371 /** 1372 * bcf_index_build2() - Generate and save an index to a specific file 1373 * @fn: Input VCF/BCF filename 1374 * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn 1375 * @min_shift: Positive to generate CSI, or 0 to generate TBI 1376 * 1377 * Returns 0 if successful, or negative if an error occurred. 1378 * 1379 * List of error codes: 1380 * -1 .. indexing failed 1381 * -2 .. opening @fn failed 1382 * -3 .. format not indexable 1383 * -4 .. failed to create and/or save the index 1384 */ 1385 int bcf_index_build2(const(char)* fn, const(char)* fnidx, int min_shift); 1386 1387 /** 1388 * bcf_index_build3() - Generate and save an index to a specific file 1389 * @fn: Input VCF/BCF filename 1390 * @fnidx: Output filename, or NULL to add .csi/.tbi to @fn 1391 * @min_shift: Positive to generate CSI, or 0 to generate TBI 1392 * @n_threads: Number of VCF/BCF decoder threads 1393 * 1394 * Returns 0 if successful, or negative if an error occurred. 1395 * 1396 * List of error codes: 1397 * -1 .. indexing failed 1398 * -2 .. opening @fn failed 1399 * -3 .. format not indexable 1400 * -4 .. failed to create and/or save the index 1401 */ 1402 int bcf_index_build3( 1403 const(char)* fn, 1404 const(char)* fnidx, 1405 int min_shift, 1406 int n_threads); 1407 1408 /// Initialise fp->idx for the current format type, for VCF and BCF files. 1409 /** @param fp File handle for the data file being written. 1410 @param h BCF header structured (needed for BAI and CSI). 1411 @param min_shift CSI bin size (CSI default is 14). 1412 @param fnidx Filename to write index to. This pointer must remain valid 1413 until after bcf_idx_save is called. 1414 @return 0 on success, <0 on failure. 1415 @note This must be called after the header has been written, but before 1416 any other data. 1417 */ 1418 int bcf_idx_init(htsFile* fp, bcf_hdr_t* h, int min_shift, const(char)* fnidx); 1419 1420 /// Writes the index initialised with bcf_idx_init to disk. 1421 /** @param fp File handle for the data file being written. 1422 @return 0 on success, <0 on failure. 1423 */ 1424 int bcf_idx_save(htsFile* fp); 1425 1426 /******************* 1427 * Typed value I/O * 1428 *******************/ 1429 1430 /** 1431 Note that in contrast with BCFv2.1 specification, HTSlib implementation 1432 allows missing values in vectors. For integer types, the values 0x80, 1433 0x8000, 0x80000000 are interpreted as missing values and 0x81, 0x8001, 1434 0x80000001 as end-of-vector indicators. Similarly for floats, the value of 1435 0x7F800001 is interpreted as a missing value and 0x7F800002 as an 1436 end-of-vector indicator. 1437 Note that the end-of-vector byte is not part of the vector. 1438 1439 This trial BCF version (v2.2) is compatible with the VCF specification and 1440 enables to handle correctly vectors with different ploidy in presence of 1441 missing values. 1442 */ 1443 enum bcf_int8_vector_end = -127; /* INT8_MIN + 1 */ 1444 enum bcf_int16_vector_end = -32_767; /* INT16_MIN + 1 */ 1445 enum bcf_int32_vector_end = -2_147_483_647; /* INT32_MIN + 1 */ 1446 enum bcf_int64_vector_end = -9_223_372_036_854_775_807L; /* INT64_MIN + 1 */ 1447 enum bcf_str_vector_end = 0; 1448 enum bcf_int8_missing = -128; /* INT8_MIN */ 1449 enum bcf_int16_missing = -32_767 - 1; /* INT16_MIN */ 1450 enum bcf_int32_missing = -2_147_483_647 - 1; /* INT32_MIN */ 1451 enum bcf_int64_missing = -9_223_372_036_854_775_807L - 1L; /* INT64_MIN */ 1452 enum bcf_str_missing = 0x07; 1453 1454 // Limits on BCF values stored in given types. Max values are the same 1455 // as for the underlying type. Min values are slightly different as 1456 // the last 8 values for each type were reserved by BCFv2.2. 1457 enum BCF_MAX_BT_INT8 = 0x7f; /* INT8_MAX */ 1458 enum BCF_MAX_BT_INT16 = 0x7fff; /* INT16_MAX */ 1459 enum BCF_MAX_BT_INT32 = 0x7fffffff; /* INT32_MAX */ 1460 enum BCF_MIN_BT_INT8 = -120; /* INT8_MIN + 8 */ 1461 enum BCF_MIN_BT_INT16 = -32_760; /* INT16_MIN + 8 */ 1462 enum BCF_MIN_BT_INT32 = -2_147_483_640; /* INT32_MIN + 8 */ 1463 1464 extern __gshared uint bcf_float_vector_end; 1465 extern __gshared uint bcf_float_missing; 1466 version(LDC) pragma(inline, true): 1467 version(GNU) pragma(inline, true): 1468 /** u wot */ 1469 void bcf_float_set(float *ptr, uint32_t value) 1470 { 1471 union U { uint32_t i; float f; } 1472 U u; 1473 u.i = value; 1474 *ptr = u.f; 1475 } 1476 1477 /// float vector macros 1478 void bcf_float_set_vector_end(float x) { bcf_float_set(&x, bcf_float_vector_end); } 1479 /// ditto 1480 void bcf_float_set_missing(float x) { bcf_float_set(&x, bcf_float_missing); } 1481 1482 /** u wot */ 1483 pragma(inline, true) 1484 int bcf_float_is_missing(float f) 1485 { 1486 union U { uint32_t i; float f; } 1487 U u; 1488 u.f = f; 1489 return u.i==bcf_float_missing ? 1 : 0; 1490 } 1491 /// ditto 1492 pragma(inline, true) 1493 int bcf_float_is_vector_end(float f) 1494 { 1495 union U { uint32_t i; float f; } 1496 U u; 1497 u.f = f; 1498 return u.i==bcf_float_vector_end ? 1 : 0; 1499 } 1500 1501 /// (Undocumented) Format GT field 1502 pragma(inline, true) 1503 int bcf_format_gt(bcf_fmt_t *fmt, int isample, kstring_t *str) 1504 { 1505 uint32_t e = 0; 1506 void branch(T)() // gets a closure over e (was #define macro) 1507 if (is(T == int8_t) || is(T == int16_t) || is(T == int32_t)) 1508 { 1509 static if (is(T == int8_t)) 1510 auto vector_end = bcf_int8_vector_end; 1511 else static if (is(T == int16_t)) 1512 auto vector_end = bcf_int16_vector_end; 1513 else 1514 auto vector_end = bcf_int32_vector_end; 1515 1516 T *ptr = cast(T*) (fmt.p + (isample * fmt.size)); 1517 for (int i=0; i<fmt.n && ptr[i] != vector_end; i++) 1518 { 1519 if ( i ) e |= kputc("/|"[ptr[i]&1], str) < 0; 1520 if ( !(ptr[i]>>1) ) e |= kputc('.', str) < 0; 1521 else e |= kputw((ptr[i]>>1) - 1, str) < 0; 1522 } 1523 if (i == 0) e |= kputc('.', str) < 0; 1524 } 1525 switch (fmt.type) { 1526 case BCF_BT_INT8: branch!int8_t; break; 1527 case BCF_BT_INT16: branch!int16_t; break; 1528 case BCF_BT_INT32: branch!int32_t; break; 1529 case BCF_BT_NULL: e |= kputc('.', str) < 0; break; 1530 default: hts_log_error("Unexpected type %d", fmt.type); return -2; 1531 } 1532 1533 return e == 0 ? 0 : -1; 1534 } 1535 1536 1537 pragma(inline, true) 1538 int bcf_enc_size(kstring_t *s, int size, int type) 1539 { 1540 uint32_t e = 0; 1541 if (size >= 15) { 1542 e |= kputc(15<<4|type, s) < 0; 1543 if (size >= 128) { 1544 if (size >= 32_768) { 1545 int32_t x = size; 1546 e |= kputc(1<<4|BCF_BT_INT32, s) < 0; 1547 e |= kputsn(cast(char*)&x, 4, s) < 0; 1548 } else { 1549 int16_t x = size; 1550 e |= kputc(1<<4|BCF_BT_INT16, s) < 0; 1551 e |= kputsn(cast(char*)&x, 2, s) < 0; 1552 } 1553 } else { 1554 e |= kputc(1<<4|BCF_BT_INT8, s) < 0; 1555 e |= kputc(size, s) < 0; 1556 } 1557 } else e |= kputc(size<<4|type, s) < 0; 1558 return e == 0 ? 0 : -1; 1559 } 1560 1561 1562 /// Undocumented Encode integer type? 1563 pragma(inline, true) 1564 int bcf_enc_inttype(long x) 1565 { 1566 if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) return BCF_BT_INT8; 1567 if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) return BCF_BT_INT16; 1568 return BCF_BT_INT32; 1569 } 1570 1571 /// Undocumented Encode integer variant 1 1572 pragma(inline, true) 1573 int bcf_enc_int1(kstring_t *s, int32_t x) 1574 { 1575 uint32_t e = 0; 1576 if (x == bcf_int32_vector_end) { 1577 e |= bcf_enc_size(s, 1, BCF_BT_INT8); 1578 e |= kputc(bcf_int8_vector_end, s) < 0; 1579 } else if (x == bcf_int32_missing) { 1580 e |= bcf_enc_size(s, 1, BCF_BT_INT8); 1581 e |= kputc(bcf_int8_missing, s) < 0; 1582 } else if (x <= BCF_MAX_BT_INT8 && x >= BCF_MIN_BT_INT8) { 1583 e |= bcf_enc_size(s, 1, BCF_BT_INT8); 1584 e |= kputc(x, s) < 0; 1585 } else if (x <= BCF_MAX_BT_INT16 && x >= BCF_MIN_BT_INT16) { 1586 int16_t z = x; 1587 e |= bcf_enc_size(s, 1, BCF_BT_INT16); 1588 e |= kputsn(cast(char*)&z, 2, s) < 0; 1589 } else { 1590 int32_t z = x; 1591 e |= bcf_enc_size(s, 1, BCF_BT_INT32); 1592 e |= kputsn(cast(char*)&z, 4, s) < 0; 1593 } 1594 return e == 0 ? 0 : -1; 1595 } 1596 /// Return the value of a single typed integer. 1597 /** @param p Pointer to input data block. 1598 @param type One of the BCF_BT_INT* type codes 1599 @param[out] q Location to store an updated value for p 1600 @return The integer value, or zero if @p type is not valid. 1601 1602 If @p type is not one of BCF_BT_INT8, BCF_BT_INT16, BCF_BT_INT32 or 1603 BCF_BT_INT64, zero will be returned and @p *q will not be updated. 1604 Otherwise, the integer value will be returned and @p *q will be set 1605 to the memory location immediately following the integer value. 1606 1607 Cautious callers can detect invalid type codes by checking that *q has 1608 actually been updated. 1609 */ 1610 pragma(inline, true) 1611 int64_t bcf_dec_int1(const(ubyte) *p, int type, ubyte **q) 1612 { 1613 if (type == BCF_BT_INT8) { 1614 *q = cast(ubyte*)p + 1; 1615 return le_to_i8(p); 1616 } else if (type == BCF_BT_INT16) { 1617 *q = cast(ubyte*)p + 2; 1618 return le_to_i16(p); 1619 } else if (type == BCF_BT_INT32) { 1620 *q = cast(ubyte*)p + 4; 1621 return le_to_i32(p); 1622 } else if (type == BCF_BT_INT64) { 1623 *q = cast(ubyte*)p + 4; 1624 return le_to_i64(p); 1625 } else { // Invalid type. 1626 return 0; 1627 } 1628 } 1629 1630 /// Return the value of a single typed integer from a byte stream. 1631 /** @param p Pointer to input data block. 1632 @param[out] q Location to store an updated value for p 1633 @return The integer value, or zero if the type code was not valid. 1634 1635 Reads a one-byte type code from @p p, and uses it to decode an integer 1636 value from the following bytes in @p p. 1637 1638 If the type is not one of BCF_BT_INT8, BCF_BT_INT16 or BCF_BT_INT32, zero 1639 will be returned and @p *q will unchanged. Otherwise, the integer value will 1640 be returned and @p *q will be set to the memory location immediately following 1641 the integer value. 1642 1643 Cautious callers can detect invalid type codes by checking that *q has 1644 actually been updated. 1645 */ 1646 pragma(inline, true) 1647 long bcf_dec_typed_int1 (const(ubyte)* p, ubyte** q) 1648 { 1649 return bcf_dec_int1(p + 1, *p&0xf, q); 1650 } 1651 1652 pragma(inline, true) 1653 int bcf_dec_size (const(ubyte)* p, ubyte** q, int* type) 1654 { 1655 *type = *p & 0xf; 1656 if (*p>>4 != 15) { 1657 *q = cast(ubyte*)p + 1; 1658 return *p>>4; 1659 } else return bcf_dec_typed_int1(p + 1, q); 1660 }