1 /** Module provides VCF Reader/Writer 2 3 VCF version 4.2 (including BCF) reader and writer, including 4 a model with convenience functions for the header (metadata) 5 and individual VCF/BCF records (rows). 6 7 Specifications: https://samtools.github.io/hts-specs/VCFv4.2.pdf 8 9 */ 10 module dhtslib.vcf.record; 11 12 import core.stdc.string; 13 import core.vararg; 14 15 import std.conv: to, ConvException; 16 import std.format: format; 17 import std.range; 18 import std.string: fromStringz, toStringz; 19 import std.utf : toUTFz; 20 import std.algorithm : map; 21 import std.array : array; 22 import std.traits; 23 import std.meta : staticIndexOf; 24 25 import dhtslib.vcf; 26 import dhtslib.coordinates; 27 import dhtslib.memory; 28 import htslib.hts_log; 29 import htslib.kstring; 30 import htslib.vcf; 31 32 alias BCFRecord = VCFRecord; 33 34 /// Struct to aid in conversion of VCF info data 35 /// into D types 36 struct InfoField 37 { 38 /// String identifier of info field 39 string key; 40 /// BCF TYPE indicator 41 BcfRecordType type; 42 /// number of data elements 43 int len; 44 /// reference of info field data 45 ubyte[] data; 46 47 /// VCFRecord refct 48 Bcf1 line; 49 50 /// ctor from VCFRecord 51 this(string key, bcf_info_t * info, Bcf1 line) 52 { 53 this.line = line; 54 this(key, info); 55 } 56 57 /// string, info field ctor 58 this(string key, bcf_info_t * info) 59 { 60 /// get type 61 this.type = cast(BcfRecordType)(info.type & 0b1111); 62 /// get len 63 this.len = info.len; 64 /// copy data 65 this.data = info.vptr[0.. info.vptr_len]; 66 /// double check our lengths 67 debug{ 68 final switch(this.type){ 69 case BcfRecordType.Int8: 70 assert(data.length == this.len * byte.sizeof); 71 break; 72 case BcfRecordType.Int16: 73 assert(data.length == this.len * short.sizeof); 74 break; 75 case BcfRecordType.Int32: 76 assert(data.length == this.len * int.sizeof); 77 break; 78 case BcfRecordType.Int64: 79 assert(data.length == this.len * long.sizeof); 80 break; 81 case BcfRecordType.Float: 82 assert(data.length == this.len * float.sizeof); 83 break; 84 case BcfRecordType.Char: 85 assert(data.length == this.len * char.sizeof); 86 break; 87 case BcfRecordType.Null: 88 assert(data.length == 0); 89 } 90 } 91 } 92 93 /// convert to a D type array if int, float, or bool type array 94 T[] to(T: T[])() 95 if(isIntegral!T || is(T == float) || isBoolean!T) 96 { 97 static if(isIntegral!T){ 98 /// if we select the correct type just slice and return 99 if(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)) 100 return (cast(T *)this.data.ptr)[0 .. T.sizeof * this.len]; 101 /// if we select type that is too small log error and return 102 if(RecordTypeSizes[this.type] > T.sizeof) 103 { 104 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof)); 105 return []; 106 } 107 T[] ret; 108 /// if we select type that is bigger slice, convert and return 109 switch(this.type){ 110 case BcfRecordType.Int8: 111 ret = ((cast(byte *)this.data.ptr)[0 .. this.len]).to!(T[]); 112 break; 113 case BcfRecordType.Int16: 114 ret = ((cast(short *)this.data.ptr)[0 .. short.sizeof * this.len]).to!(T[]); 115 break; 116 case BcfRecordType.Int32: 117 ret = ((cast(int *)this.data.ptr)[0 .. int.sizeof * this.len]).to!(T[]); 118 break; 119 case BcfRecordType.Int64: 120 ret = ((cast(long *)this.data.ptr)[0 .. long.sizeof * this.len]).to!(T[]); 121 break; 122 default: 123 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(this.type, T.stringof)); 124 return []; 125 } 126 return ret; 127 }else{ 128 /// if we select type the wrong type error and return 129 if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType))) 130 { 131 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof)); 132 return []; 133 } 134 return (cast(T *)this.data.ptr)[0 .. T.sizeof * this.len]; 135 } 136 } 137 138 /// convert to a D type if int, float, or bool type 139 T to(T)() 140 if(isIntegral!T || is(T == float) || isBoolean!T) 141 { 142 /// if bool return true 143 static if(isBoolean!T) 144 return true; 145 else{ 146 /// if info field has > 1 element error and return 147 if(this.len != 1){ 148 hts_log_error(__FUNCTION__, "This info field has a length of %d not 1".format(this.len)); 149 return T.init; 150 } 151 static if(isIntegral!T){ 152 /// if we select type that is too small log error and return 153 if(RecordTypeSizes[this.type] > T.sizeof) 154 { 155 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof)); 156 return T.init; 157 } 158 /// just gonna use the array impl and grab index 0 159 return this.to!(T[])[0]; 160 }else{ 161 /// if we select type the wrong type error and return 162 if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType))) 163 { 164 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof)); 165 return T.init; 166 } 167 /// just gonna use the array impl and grab index 0 168 return this.to!(T[])[0]; 169 } 170 } 171 } 172 173 /// convert to a string 174 T to(T)() 175 if(isSomeString!T) 176 { 177 /// if we select type the wrong type error and return 178 if(!(this.type == BcfRecordType.Char)) 179 { 180 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof)); 181 return T.init; 182 } 183 /// otherwise slice and dup 184 return cast(T)(cast(char *)this.data.ptr)[0 .. this.len].dup; 185 } 186 } 187 188 /// Struct to aid in conversion of VCF format data 189 /// into D types 190 struct FormatField 191 { 192 /// String identifier of info field 193 string key; 194 /// BCF TYPE indicator 195 BcfRecordType type; 196 /// number of samples 197 int nSamples; 198 /// number of data elements per sample 199 int n; 200 /// number of bytes per sample 201 int size; 202 /// reference of info field data 203 ubyte[] data; 204 205 /// VCFRecord refct 206 Bcf1 line; 207 208 /// ctor from VCFRecord 209 this(string key, bcf_fmt_t * fmt, Bcf1 line) 210 { 211 this.line = line; 212 this(key, fmt); 213 } 214 215 /// string and format ctor 216 this(string key, bcf_fmt_t * fmt) 217 { 218 /// set all our data 219 this.key = key; 220 this.type = cast(BcfRecordType)(fmt.type & 0b1111); 221 this.n = fmt.n; 222 this.nSamples = fmt.p_len / fmt.size; 223 this.data = fmt.p[0 .. fmt.p_len].dup; 224 this.size = fmt.size; 225 /// double check our work 226 debug{ 227 final switch(this.type){ 228 case BcfRecordType.Int8: 229 assert(data.length == this.nSamples * this.n * byte.sizeof); 230 break; 231 case BcfRecordType.Int16: 232 assert(data.length == this.nSamples * this.n * short.sizeof); 233 break; 234 case BcfRecordType.Int32: 235 assert(data.length == this.nSamples * this.n * int.sizeof); 236 break; 237 case BcfRecordType.Int64: 238 assert(data.length == this.nSamples * this.n * long.sizeof); 239 break; 240 case BcfRecordType.Float: 241 assert(data.length == this.nSamples * this.n * float.sizeof); 242 break; 243 case BcfRecordType.Char: 244 assert(data.length == this.nSamples * this.n * char.sizeof); 245 break; 246 case BcfRecordType.Null: 247 assert(data.length == 0); 248 } 249 } 250 } 251 252 /// convert to a D type array if int, float, bool, or string type array 253 /// very similar to InfoField.to 254 /// This returns chunks as we separated FORMAT values by sample 255 auto to(T)() 256 if(isIntegral!T || is(T == float) || isBoolean!T || isSomeString!T) 257 { 258 T[] ret; 259 static if(isIntegral!T){ 260 if(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)){ 261 ret = (cast(T *)this.data.ptr)[0 .. this.n * this.nSamples * T.sizeof]; 262 return ret.chunks(this.n); 263 } 264 if(RecordTypeSizes[this.type] > T.sizeof) 265 { 266 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof)); 267 return ret.chunks(this.n); 268 } 269 switch(this.type){ 270 case BcfRecordType.Int8: 271 ret = ((cast(byte *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]); 272 break; 273 case BcfRecordType.Int16: 274 ret = ((cast(short *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]); 275 break; 276 case BcfRecordType.Int32: 277 ret = ((cast(int *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]); 278 break; 279 case BcfRecordType.Int64: 280 ret = ((cast(long *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]); 281 break; 282 default: 283 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(this.type, T.stringof)); 284 return ret.chunks(this.n); 285 } 286 }else static if(isSomeString!T){ 287 if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType))) 288 { 289 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof)); 290 return ret.chunks(this.n); 291 } 292 auto ptr = this.data.ptr; 293 for(auto i=0; i < nSamples; i++) 294 { 295 ret ~= (cast(char *)ptr)[i*size .. (i+1) * size].idup; 296 } 297 }else{ 298 if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType))) 299 { 300 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof)); 301 return ret.chunks(this.n); 302 } 303 ret = (cast(T *)this.data.ptr)[0 .. this.n * this.nSamples * T.sizeof]; 304 } 305 return ret.chunks(this.n); 306 307 } 308 } 309 310 311 312 /** Wrapper around bcf1_t 313 314 Because it uses bcf1_t internally, it must conform to the BCF2 part 315 of the VCFv4.2 specs, rather than the loosey-goosey VCF specs. i.e., 316 INFO, CONTIG, FILTER records must exist in the header. 317 318 TODO: Does this need to be kept in a consistent state? 319 Ideally, VCFWriter would reject invalid ones, but we are informed 320 that it is invalid (e.g. if contig not found) while building this 321 struct; bcf_write1 will actually segfault, unfortunately. I'd like 322 to avoid expensive validate() calls for every record before writing 323 if possible, which means keeping this consistent. However, not 324 sure what to do if error occurs when using the setters herein? 325 326 2019-01-23 struct->class to mirror SAMRecord -- faster if reference type? 327 328 2019-01-23 WIP: getters for chrom, pos, id, ref, alt are complete (untested) 329 330 After parsing a BCF or VCF line, bcf1_t must be UnpackLeveled. (not applicable when building bcf1_t from scratch) 331 Depending on information needed, this can be done to various levels with performance tradeoff. 332 Unpacking symbols: 333 UNPACK.ALL: all 334 UNPACK.SHR: all shared information (UNPACK.ALT|UNPACK.FILT|UNPACK.INFO) 335 336 UnpackLevel.ALT 337 / UnpackLevel.FILT 338 | / UnpackLevel.INFO 339 | | / ____________________________ UnpackLevel.FMT 340 V V V / | | | 341 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 ... 342 343 */ 344 struct VCFRecord 345 { 346 Bcf1 line; /// htslib structured record TODO: change to 'b' for better internal consistency? (vcf.h/c actually use line quite a bit in fn params) 347 348 VCFHeader vcfheader; /// corresponding header (required); 349 350 /** VCFRecord 351 352 Construct a bcf/vcf record, backed by bcf1_t, from: an existing bcf1_t, parameters, or a VCF line. 353 354 Internal backing by bcf1_t means it must conform to the BCF2 rules -- i.e., header must contain 355 appropriate INFO, CONTIG, and FILTER lines. 356 357 Protip: specifying alternate MAX_UNPACK can speed it tremendously 358 as it will not UnpackLevel all fields, only up to those requested (see htslib.vcf) 359 For example, UnpackLevel.ALT is up to ALT inclusive, and UnpackLevel.ALT is up to FILTER 360 */ 361 this(T)(T h, bcf1_t *b, UnpackLevel MAX_UNPACK = UnpackLevel.None) 362 if(is(T == VCFHeader) || is(T == bcf_hdr_t *)) 363 { 364 static if (is(T == VCFHeader)) this.vcfheader = h; 365 else static if (is(T == bcf_hdr_t *)) this.vcfheader = VCFHeader(Bcf_hdr_t(h)); // double free() bug if we don't own bcf_hdr_t h 366 else static if (is(T == bcf_hdr_t)) assert(0); // ferret out situations that will lead to free() crashes 367 else assert(0); 368 369 this.line = Bcf1(b); 370 371 // Now it must be UNPACKed 372 // Protip: specifying alternate MAX_UNPACK can speed it tremendously 373 immutable int ret = bcf_unpack(this.line, MAX_UNPACK); // unsure what to do c̄ return value // @suppress(dscanner.suspicious.unused_variable) 374 } 375 /// ditto 376 this(SS)(VCFHeader vcfhdr, string chrom, int pos, string id, string _ref, string alt, float qual, SS filter, ) 377 if (isSomeString!SS || is(SS == string[])) 378 { 379 this.line = Bcf1(bcf_init1()); 380 this.vcfheader = vcfhdr; 381 382 this.chrom = chrom; 383 this.pos = pos; 384 this.id = id; 385 386 this.setAlleles(_ref, alt); 387 388 this.qual = qual; 389 this.filter = filter; 390 } 391 /// ditto 392 this(VCFHeader vcfhdr, string line, UnpackLevel MAX_UNPACK = UnpackLevel.None) 393 { 394 this.vcfheader = vcfhdr; 395 396 kstring_t kline; 397 auto dupline = line.dup; // slower, but safer than a cast // @suppress(dscanner.suspicious.unmodified) 398 399 kline.l = dupline.length; 400 kline.m = dupline.length; 401 kline.s = dupline.ptr; 402 403 this.line = Bcf1(bcf_init1()); 404 this.line.max_unpack = MAX_UNPACK; 405 406 auto ret = vcf_parse(&kline, this.vcfheader.hdr, this.line); 407 if (ret < 0) { 408 hts_log_error(__FUNCTION__, "vcf_parse returned < 0 -- code error or malformed VCF line"); 409 } else { 410 ret = bcf_unpack(this.line, MAX_UNPACK); // unsure what to do c̄ return value 411 } 412 } 413 414 /// ensure that vcf variable length data is unpacked to at least desired level 415 pragma(inline, true) 416 void unpack(UnpackLevel unpackLevel) 417 { 418 if (this.line.max_unpack < unpackLevel) bcf_unpack(this.line, unpackLevel); 419 } 420 421 //----- FIXED FIELDS -----// 422 423 /* CHROM */ 424 /// Get chromosome (CHROM) 425 @property 426 string chrom() 427 { 428 return fromStringz(bcf_hdr_id2name(this.vcfheader.hdr, this.line.rid)).idup; 429 } 430 /// Set chromosome (CHROM) 431 @property 432 void chrom(const(char)[] c) 433 { 434 auto rid = bcf_hdr_name2id(this.vcfheader.hdr, toStringz(c)); 435 if (rid == -1) { 436 hts_log_error(__FUNCTION__, format("contig not found: %s", c)); 437 throw new Exception("contig not found"); 438 } 439 else line.rid = rid; 440 } 441 442 443 /* POS */ 444 /** Get position (POS, column 2) 445 * 446 * NB: internally BCF is uzing 0 based coordinates; we only show +1 when printing a VCF line with toString (which calls vcf_format) 447 */ 448 @property 449 ZB pos() 450 out(coord) { assert(coord >= 0); } 451 do 452 { 453 return ZB(this.line.pos); 454 } 455 /// Set position (POS, column 2) 456 @property 457 void pos(ZB p) 458 in { assert(p >= 0); } 459 do 460 { 461 // TODO: should we check for pos >= 0 && pos < contig length? Could really hamper performance. 462 // TODO: if writing out the file with invalid POS values crashes htslib, will consider it 463 this.line.pos = p.pos; 464 } 465 466 467 /* ID */ 468 /// Get ID string 469 @property string id() 470 { 471 this.unpack(UnpackLevel.AltAllele); 472 return fromStringz(this.line.d.id).idup; 473 } 474 /// Sets new ID string; comma-separated list allowed but no dup checking performed 475 @property int id(const(char)[] id) 476 { 477 // bcf_update_id expects null pointer instead of empty string to mean "no value" 478 if (id == "") return bcf_update_id(this.vcfheader.hdr, this.line, null); 479 else return bcf_update_id(this.vcfheader.hdr, this.line, toStringz(id)); 480 } 481 /// Append an ID (column 3) to the record. 482 /// 483 /// NOTE: htslib performs duplicate checking 484 int addID(const(char)[] id) 485 { 486 if(id == "") return 0; 487 return bcf_add_id(this.vcfheader.hdr, this.line, toStringz(id)); 488 } 489 490 491 /* Alleles (REF, ALT) 492 493 Internally, alleles are stored as a \0 separated array: 494 [C \0 T \0 T T \0.. \0] 495 ^ ^ ^ ^ 496 | | L L ALT_1 = "TT" 497 | L ALT_0 498 L REF 499 500 TODO: Getters and setters poorly or inconsistently (?) named at this time or there are too many overloads? 501 TODO: need non-overwriting setter for ref and alt alleles 502 TODO: some of these may be inefficent; since they may be used in hot inner loops, pls optimize 503 */ 504 /// REF allele length 505 @property long refLen() 506 { 507 version(DigitalMars) pragma(inline); 508 version(LDC) pragma(inline, true); 509 version(GNU) pragma(inline, true); 510 return this.line.rlen; 511 } 512 513 /// Coordinate range of the reference allele 514 @property ZBHO coordinates() 515 { 516 return ZBHO(this.pos, this.pos + this.refLen); 517 } 518 519 /// All alleles getter (array) 520 @property string[] allelesAsArray() 521 { 522 this.unpack(UnpackLevel.AltAllele); 523 string[] ret; 524 ret.length = this.line.n_allele; // n=0, no reference; n=1, ref but no alt 525 foreach(int i; 0 .. this.line.n_allele) // ref allele is index 0 526 { 527 ret[i] = fromStringz(this.line.d.allele[i]).idup; 528 } 529 return ret; 530 } 531 /// Reference allele getter 532 @property string refAllele() 533 { 534 this.unpack(UnpackLevel.AltAllele); 535 if (this.line.n_allele < 1) return ""; // a valid record could have no ref (or alt) alleles 536 else return fromStringz(this.line.d.als).idup; 537 } 538 // NB WIP: there could be zero, or multiple alt alleles 539 /+@property string altAlleles() 540 { 541 if (!this.line.unpacked) bcf_unpack(this.line, UnpackLevel.ALT); 542 return fromStringz(this.line.d.als) 543 }+/ 544 /// Alternate alleles getter version 1: ["A", "ACTG", ...] 545 @property string[] altAllelesAsArray() 546 { 547 this.unpack(UnpackLevel.AltAllele); 548 549 string[] ret; 550 if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt 551 return this.allelesAsArray[1 .. $]; // trim off REF 552 } 553 /// Alternate alleles getter version 2: "A,ACTG,..." 554 @property string altAllelesAsString() 555 { 556 this.unpack(UnpackLevel.AltAllele); 557 558 string ret; 559 if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt 560 561 char[] tmp; 562 tmp.length = this.line.d.m_allele; // max allocated size in htslib 563 564 char *a = this.line.d.allele[1]; // ref allele is index 0 565 const char *last = this.line.d.allele[this.line.n_allele - 1]; // pointer to start of last allele 566 size_t i = 0; 567 568 assert( this.line.d.allele[1] == (this.line.d.als + this.line.rlen + 1) ); // ensue data structure is as we think (+1: past ref's term \0) 569 570 while (i < this.line.d.m_allele) // safety stop at max allocated size 571 { 572 if (*a) tmp[i] = *a; 573 else { // '\0' 574 if (a < last) tmp[i] = ','; 575 else break; 576 } 577 i++; 578 a++; 579 } 580 tmp.length = i; 581 582 return tmp.idup; 583 } 584 /// Set alleles; comma-separated list 585 @property void alleles(string a) 586 { 587 this.unpack(UnpackLevel.AltAllele); 588 if (a == "") { 589 this.line.n_allele = 0; 590 if (this.line.d.m_allele) this.line.d.als[0] = '\0'; // if storage allocated, zero out the REF 591 } 592 else 593 bcf_update_alleles_str(this.vcfheader.hdr, this.line, toStringz(a)); 594 } 595 /// Set alleles; array 596 @property void alleles(string[] a) 597 { 598 this.unpack(UnpackLevel.AltAllele); 599 if (a.length == 0) { 600 this.line.n_allele = 0; 601 if (this.line.d.m_allele) this.line.d.als[0] = '\0'; // if storage allocated, zero out the REF 602 } 603 else { 604 const(char)*[64] allelesp; // will fail if a locus has more than 63 ALT alleles :-O 605 foreach(i; 0 .. a.length ) { 606 // In order to zero out all alleles, pass a zero-length allele to the string version, 607 // or a zero-length array to this version. Zero length string member of nonempty allele array is an error. 608 if (a[i].length == 0) { 609 hts_log_error(__FUNCTION__, "Zero-length allele in nonempty allele array. Setting to '.'"); 610 allelesp[i] = ".".ptr; 611 } 612 else allelesp[i] = toStringz(a[i]); 613 } 614 bcf_update_alleles(this.vcfheader.hdr, this.line, &allelesp[0], cast(int)a.length); 615 } 616 } 617 618 /// Set REF allele only 619 void setRefAllele(string r) 620 { 621 // first, get REF 622 this.unpack(UnpackLevel.AltAllele); 623 // a valid record could have no ref (or alt) alleles 624 auto alleles = [toUTFz!(char *)(r)]; 625 if (this.line.n_allele < 2) // if none or 1 (=REF only), just add the REF we receieved 626 bcf_update_alleles(this.vcfheader.hdr, this.line, alleles.ptr, 1); 627 else { 628 // length of REF allele is allele[1] - allele[0], (minus one more for \0) 629 // TODO ** TODO ** : there is a property line.refLen already 630 const auto reflen = (this.line.d.allele[1] - this.line.d.allele[0]) - 1; 631 alleles ~= this.altAllelesAsArray.map!(toUTFz!(char *)).array; 632 bcf_update_alleles(this.vcfheader.hdr, this.line, alleles.ptr, cast(int)alleles.length); 633 } 634 } 635 /// Set alleles; alt can be comma separated 636 void setAlleles(string _ref, string alt) 637 { 638 immutable string alleles = _ref ~ "," ~ alt ~ "\0"; 639 bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr); 640 } 641 /// Set alleles; min. 2 alleles (ref, alt1); unlimited alts may be specified 642 void setAlleles(string _ref, string alt, ...) 643 { 644 string alleles = _ref ~ "," ~ alt; 645 646 for (int i = 0; i < _arguments.length; i++) 647 { 648 alleles ~= "," ~ va_arg!string(_argptr); 649 } 650 651 alleles ~= "\0"; 652 653 bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr); 654 } 655 656 657 /* Quality (QUAL) */ 658 /// Get variant quality (QUAL, column 6) 659 @property float qual() 660 out(result) { assert(result >= 0); } 661 do 662 { 663 return this.line.qual; 664 } 665 /// Set variant quality (QUAL, column 6) 666 @property void qual(float q) 667 in { assert(q >= 0); } 668 do { this.line.qual = q; } 669 670 671 /* FILTER */ 672 /// Get FILTER column (nothing in htslib sadly) 673 @property string filter() 674 { 675 const(char)[] ret; 676 677 this.unpack(UnpackLevel.Filter); 678 679 if (this.line.d.n_flt) { 680 for(int i; i< this.line.d.n_flt; i++) { 681 if (i) ret ~= ";"; 682 ret ~= fromStringz(this.vcfheader.hdr.id[BCF_DT_ID][ this.line.d.flt[0] ].key); 683 } 684 } else { 685 ret = "."; 686 } 687 688 return ret.idup; 689 } 690 /// Remove all entries in FILTER 691 void removeAllFilters() 692 { 693 // always returns zero 694 bcf_update_filter(this.vcfheader.hdr, this.line, null, 0); 695 } 696 /// Set the FILTER column to f 697 @property void filter(string f) 698 { 699 this.filter([f]); 700 } 701 /// Set the FILTER column to f0,f1,f2... 702 /// TODO: determine definitiely whether "." is replaced with "PASS" 703 @property void filter(string[] fs) 704 { 705 int[] fids; 706 foreach(f; fs) { 707 const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f)); 708 if (fid == -1){ 709 hts_log_warning(__FUNCTION__, format("ignoring filter not found in header: %s", f.empty() ? "(empty)" : f)); 710 continue; 711 } 712 else fids ~= fid; 713 } 714 if (fids.length > 0) 715 bcf_update_filter(this.vcfheader.hdr, this.line, fids.ptr, cast(int)fids.length); 716 else 717 hts_log_warning(__FUNCTION__, "No FILTER update was performed due to empty list"); 718 } 719 /// Add a filter; from htslib: 720 /// "If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed." 721 int addFilter(string f) 722 { 723 724 if(f == "") hts_log_warning(__FUNCTION__, "filter was empty"); 725 const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f)); 726 if (fid == -1){ 727 hts_log_warning(__FUNCTION__, format("ignoring filter not found in header: %s", f.empty() ? "(empty)" : f)); 728 return -1; 729 } 730 731 return bcf_add_filter(this.vcfheader.hdr, this.line, fid); 732 } 733 /// Remove a filter by name 734 int removeFilter(string f) 735 { 736 const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f)); 737 if (fid == -1){ 738 hts_log_warning(__FUNCTION__, format("filter not found in header (ignoring): %s", f) ); 739 return -1; 740 } 741 return removeFilter(fid); 742 } 743 /// Remove a filter by numeric id 744 int removeFilter(int fid) 745 { 746 return bcf_remove_filter(this.vcfheader.hdr, this.line, fid, 0); 747 } 748 /// Determine whether FILTER is present. log warning if filter does not exist. "PASS" and "." can be used interchangeably. 749 bool hasFilter(string filter) 750 { 751 char[] f = filter.dup ~ '\0'; 752 //const auto id = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, f.ptr); 753 const auto ret = bcf_has_filter(this.vcfheader.hdr, this.line, f.ptr); 754 755 if (ret > 0) return true; 756 else if (ret == 0) return false; 757 else { 758 hts_log_warning(__FUNCTION__, format("FILTER %s does not exist in the header", filter)); 759 return false; 760 } 761 } 762 763 764 /** Update INFO (pan-sample info; column 8) 765 * 766 * Add a tag:value to the INFO column 767 * NOTE: tag must already exist in the header 768 * 769 * Templated on data type, calls one of bcf_update_info_{int32,float,string,flag} 770 * Both singletons and arrays are supported. 771 */ 772 void addInfo(T)(string tag, T data) 773 if(isSomeString!T || ((isIntegral!T || isFloatingPoint!T || isBoolean!T) && !isArray!T)) 774 { 775 int ret = -1; 776 777 static if(isIntegral!T) { 778 auto integer = cast(int) data; 779 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1); 780 } 781 782 else static if(isFloatingPoint!T) { 783 auto flt = cast(float) data; // simply passing "2.0" (or whatever) => data is a double 784 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1); 785 } 786 787 else static if(isSomeString!T) 788 ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), toStringz(data)); 789 790 else static if(isBoolean!T) { 791 immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 792 ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set); 793 } 794 795 if (ret == -1) 796 hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data)); 797 } 798 /// ditto 799 void addInfo(T)(string tag, T data) 800 if(isIntegral!(ElementType!T) || isFloatingPoint!(ElementType!T)) // otherwise string::immutable(char)[] will match the other template 801 { 802 int ret = -1; 803 804 static if(isIntegral!(ElementType!T)) { 805 auto integer = data.to!(int[]); 806 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), integer.ptr, cast(int)data.length); 807 } 808 809 else static if(isFloatingPoint!(ElementType!T)) { 810 auto flt = data.to!(float[]); // simply passing "2.0" (or whatever) => data is a double 811 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), flt.ptr, cast(int)data.length); 812 } 813 814 if (ret == -1) 815 hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data)); 816 } 817 818 /** Update FORMAT (sample info; column 9+) 819 * 820 * Templated on data type, calls one of bc_update_format_{int32,float,string,flag} 821 */ 822 void addFormat(T)(string tag, T data) 823 if(!isArray!T || isSomeString!T) 824 { 825 int ret = -1; 826 827 static if(isIntegral!T) { 828 auto integer = data.to!int; 829 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1); 830 } 831 832 else static if(isFloatingPoint!T) { 833 auto flt = data.to!float; // simply passing "2.0" (or whatever) => data is a double 834 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1); 835 } 836 837 else static if(isSomeString!T){ 838 auto strs = [data].map!(toUTFz!(char*)).array; 839 ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), strs.ptr, 1); 840 } 841 842 else static if(isBoolean!T) { 843 immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 844 ret = bcf_update_format_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set); 845 } 846 847 if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data)); 848 } 849 /// ditto 850 void addFormat(T)(string tag, T[] data) 851 if((!isArray!T || isSomeString!T) && !is(T == immutable(char))) 852 { 853 int ret = -1; 854 855 static if(isIntegral!T) { 856 auto integer = data.to!(int[]); 857 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), 858 integer.ptr, cast(int)data.length); 859 } 860 861 else static if(isFloatingPoint!T) { 862 auto flt = data.to!(float[]); // simply passing "2.0" (or whatever) => data is a double 863 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), 864 flt.ptr, cast(int)data.length); 865 } 866 867 else static if(isSomeString!T){ 868 auto strs = data.map!(toUTFz!(char*)).array; 869 ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), strs.ptr, cast(int)strs.length); 870 } 871 872 if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data)); 873 874 } 875 876 /// remove an info section 877 void removeInfo(string tag) 878 { 879 bcf_update_info(this.vcfheader.hdr,this.line, toStringz(tag),null,0,HeaderTypes.None); 880 } 881 882 /// remove a format section 883 void removeFormat(string tag) 884 { 885 bcf_update_format(this.vcfheader.hdr,this.line, toStringz(tag),null,0,HeaderTypes.None); 886 } 887 888 889 /// add INFO or FORMAT key:value pairs to a record 890 /// add a single datapoint OR vector of values, OR, values to each sample (if lineType == FORMAT) 891 void add(HeaderRecordType lineType, T)(const(char)[] tag, T data) 892 if((lineType == HeaderRecordType.Info || lineType == HeaderRecordType.Format) && 893 (isIntegral!T || isIntegral!(ElementType!T) || 894 isFloatingPoint!T || isFloatingPoint!(ElementType!T) || 895 isSomeString!T || isSomeString!(ElementType!T) || 896 isBoolean!T || isBoolean!(ElementType!T))) 897 { 898 int ret = -1; 899 int len; 900 901 static if (!isDynamicArray!T) { 902 len = 1; 903 static if (isIntegral!T) auto d = data.to!int; 904 else static if (isFloatingPoint!T) auto d = data.to!float; 905 else static if (isSomeString!T) auto d = toStringz(data); 906 else static if (isBoolean!T) immutable int d = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 907 else static assert(0); 908 909 const auto ptr = &d; 910 } 911 else static if (isDynamicArray!T) { 912 assert(data.length < int.max); 913 len = cast(int) data.length; 914 static if (isIntegral!(ElementType!T)) auto d = data.to!(int[]); 915 else static if (isFloatingPoint!(ElementType!T)) auto d = data.to!(float[]); 916 else static if (isSomeString!(ElementType!T)) { 917 char[] d; 918 foreach(s; data) { 919 d ~= s ~ "\0"; 920 } 921 if(d.length == 0) d ~= "\0"; // TODO replace with asserts on data length earlier 922 } 923 else static if (isBoolean!(ElementType!T)) { 924 int[] d; 925 foreach(b; data) { 926 d ~= (b ? 1 : 0); 927 } 928 } 929 930 const auto ptr = d.ptr; 931 } 932 else static assert(0); 933 934 static if (lineType == HeaderRecordType.Info) { 935 static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int)) 936 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 937 else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float)) 938 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 939 else static if (isSomeString!T || isSomeString!(ElementType!T)) 940 ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr); 941 else static if (is(T == bool) || is(ElementType!T == bool)) 942 ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 943 else static assert(0, "Type not recognized for INFO tag"); 944 } 945 else static if (lineType == HeaderRecordType.Format) { 946 static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int)) 947 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 948 else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float)) 949 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 950 else static if (isSomeString!T || isSomeString!(ElementType!T)) 951 ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 952 else static assert(0, "Type not recognized for FORMAT tag"); 953 } 954 else static assert(0); 955 956 if (ret == -1) 957 hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data)); 958 } 959 960 /// returns a hashmap of info data by field 961 InfoField[string] getInfos() 962 { 963 /// UnpackLevel 964 this.unpack(UnpackLevel.Info); 965 966 /// copy some data 967 InfoField[string] infoMap; 968 bcf_info_t[] infos = this.line.d.info[0..this.line.n_info].dup; 969 970 foreach (bcf_info_t info; infos) 971 { 972 /// if variable data is null then value is set for deletion 973 /// skip 974 if(!info.vptr) continue; 975 auto key = fromStringz(this.vcfheader.hdr.id[HeaderDictTypes.Id][info.key].key).idup; 976 infoMap[key] = InfoField(key, &info, this.line); 977 } 978 return infoMap; 979 } 980 981 /// returns a hashmap of format data by field 982 FormatField[string] getFormats() 983 { 984 this.unpack(UnpackLevel.Format); 985 FormatField[string] fmtMap; 986 bcf_fmt_t[] fmts = this.line.d.fmt[0..this.line.n_fmt].dup; 987 foreach (bcf_fmt_t fmt; fmts) 988 { 989 /// if variable data is null then value is set for deletion 990 /// skip 991 if(!fmt.p) continue; 992 auto key = fromStringz(this.vcfheader.hdr.id[HeaderDictTypes.Id][fmt.id].key).idup; 993 fmtMap[key] = FormatField(key, &fmt, this.line); 994 } 995 return fmtMap; 996 } 997 998 /// Return a string representation of the VCFRecord (i.e. as would appear in .vcf) 999 /// 1000 /// As a bonus, there is a kstring_t memory leak 1001 string toString() const 1002 { 1003 kstring_t s; 1004 1005 const int ret = vcf_format(this.vcfheader.hdr, this.line, &s); 1006 if (ret) 1007 { 1008 hts_log_error(__FUNCTION__, 1009 format("vcf_format returned nonzero (%d) (likely EINVAL, invalid bcf1_t struct?)", ret)); 1010 return "[VCFRecord vcf_format parse_error]"; 1011 } 1012 1013 return cast(string) s.s[0 .. s.l]; 1014 } 1015 } 1016 1017 1018 1019 1020 1021 /// 1022 debug(dhtslib_unittest) 1023 unittest 1024 { 1025 import std.exception: assertThrown; 1026 import std.stdio: writeln, writefln; 1027 import dhtslib.vcf.writer; 1028 1029 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 1030 1031 1032 auto vw = VCFWriter("/dev/null"); 1033 1034 vw.addHeaderLineRaw("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">"); 1035 vw.addHeaderLineKV("INFO", "<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">"); 1036 // ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency"> 1037 vw.vcfhdr.addHeaderLine!(HeaderRecordType.Info)("AF", HeaderLengths.OnePerAltAllele, HeaderTypes.Integer, "Number of Samples With Data"); 1038 vw.addHeaderLineRaw("##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>"); // @suppress(dscanner.style.long_line) 1039 vw.addHeaderLineRaw("##FILTER=<ID=q10,Description=\"Quality below 10\">"); 1040 HeaderRecord hrec; 1041 hrec.setHeaderRecordType(HeaderRecordType.Format); 1042 hrec.setID("AF"); 1043 hrec.setLength(HeaderLengths.OnePerAltAllele); 1044 hrec.setValueType(HeaderTypes.Float); 1045 hrec.setDescription("Allele Freq"); 1046 vw.vcfhdr.addHeaderRecord(hrec); 1047 1048 assert(vw.vcfhdr.getHeaderRecord(HeaderRecordType.Format, "ID","AF")["ID"] == "AF"); 1049 vw.addHeaderLineRaw("##FORMAT=<ID=CH,Number=3,Type=String,Description=\"test\">"); 1050 1051 // Exercise header 1052 assert(vw.vcfhdr.nsamples == 0); 1053 vw.addSample("NA12878"); 1054 assert(vw.vcfhdr.nsamples == 1); 1055 1056 vw.writeHeader(); 1057 1058 auto r = new VCFRecord(vw.vcfhdr, bcf_init1()); 1059 1060 r.chrom = "20"; 1061 assert(r.chrom == "20"); 1062 assertThrown(r.chrom = "chr20"); // headerline chromosome is "20" not "chr20" 1063 1064 r.pos = Coordinate!(Basis.zero)(999_999); 1065 assert(r.pos == Coordinate!(Basis.zero)(999_999)); 1066 r.pos = Coordinate!(Basis.zero)(62_435_964 + 1); // Exceeds contig length 1067 1068 // Test ID field 1069 // note that in an empty freshly initialized bcf1_t, the ID field is "" rather than "." 1070 // Will consider adding initialization code 1071 //writefln("ID: %s", r.id); 1072 r.id = ""; 1073 assert(r.id == "."); 1074 r.id = "."; 1075 assert(r.id == "."); 1076 r.id = "rs001"; 1077 assert(r.id == "rs001"); 1078 r.addID("rs999"); 1079 assert(r.id == "rs001;rs999"); 1080 r.addID("rs001"); // test duplicate checking 1081 assert(r.id == "rs001;rs999"); 1082 1083 // Test REF/ALT allele setters and getters 1084 // many overloads to test for good coverage 1085 // Test matrix: Set {zero, one, two, three} alleles * {set by string, set by array} = 8 test cases 1086 // * also, there is setAlleles(ref, alt) and setAlleles(ref, alt1, ...) 1087 // * TODO: will also need to retreive alt alleles as array and string 1088 1089 string[] alleles_array; 1090 1091 // Zero alleles 1092 r.alleles(""); 1093 const auto zs = r.allelesAsArray; 1094 const auto zsr = r.refAllele; 1095 assert(zs.length == 0); 1096 assert(zsr == ""); 1097 1098 r.alleles(alleles_array); 1099 const auto za = r.allelesAsArray; 1100 const auto zar = r.refAllele; 1101 assert(za.length == 0); 1102 assert(zar == ""); 1103 1104 // One allele 1105 r.alleles("C"); 1106 const auto os = r.allelesAsArray; 1107 const auto osr = r.refAllele; 1108 assert(os.length == 1 && os[0] == "C"); 1109 assert(osr == "C"); 1110 1111 r.alleles(["C"]); 1112 const auto oa = r.allelesAsArray; 1113 const auto oar = r.refAllele; 1114 assert(oa.length == 1 && oa[0] == "C"); 1115 assert(oar == "C"); 1116 1117 // Two alleles 1118 r.alleles("C,T"); 1119 const auto ts = r.allelesAsArray; 1120 const auto tsr= r.refAllele; 1121 assert(ts == ["C", "T"]); 1122 assert(tsr== "C"); 1123 1124 r.alleles(["C", "T"]); 1125 const auto ta = r.allelesAsArray; 1126 const auto tar= r.refAllele; 1127 assert(ta == ["C", "T"]); 1128 assert(tar== "C"); 1129 1130 const taaa = r.altAllelesAsArray; 1131 const taas = r.altAllelesAsString; 1132 assert(taaa == ["T"]); 1133 assert(taas == "T"); 1134 1135 // alternate setter for >= 2 alleles 1136 r.setAlleles("A", "G"); 1137 assert(r.allelesAsArray == ["A", "G"]); 1138 1139 // Three alleles 1140 r.alleles("A,C,T"); 1141 assert(r.allelesAsArray == ["A", "C", "T"]); 1142 assert(r.refAllele == "A"); 1143 assert(r.altAllelesAsString == "C,T"); 1144 1145 // alternate the alleles for testing purposes 1146 r.alleles(["G", "A", "C"]); 1147 assert(r.allelesAsArray == ["G", "A", "C"]); 1148 assert(r.refAllele == "G"); 1149 assert(r.altAllelesAsString == "A,C"); 1150 1151 r.setAlleles("A", "C", "T"); 1152 assert(r.allelesAsArray == ["A", "C", "T"]); 1153 assert(r.refAllele == "A"); 1154 assert(r.altAllelesAsString == "C,T"); 1155 1156 r.setRefAllele("G"); 1157 assert(r.allelesAsArray == ["G", "C", "T"]); 1158 assert(r.refAllele == "G"); 1159 assert(r.altAllelesAsString == "C,T"); 1160 1161 r.setRefAllele("A"); 1162 assert(r.allelesAsArray == ["A", "C", "T"]); 1163 assert(r.refAllele == "A"); 1164 assert(r.altAllelesAsString == "C,T"); 1165 1166 1167 1168 // Test QUAL 1169 r.qual = 1.0; 1170 assert(r.qual == 1.0); 1171 // now test setting qual without UnpackLeveling 1172 // TODO: see https://forum.dlang.org/post/hebouvswxlslqhovzaia@forum.dlang.org, once resolved (or once factory function written), 1173 // add template value param UnpackLevel.ALT 1174 auto rr = new VCFRecord(vw.vcfhdr, "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;AF=0.017\n"); // @suppress(dscanner.style.long_line) 1175 rr.qual = 3.0; 1176 assert(rr.qual == 3.0); 1177 1178 1179 // Test FILTER 1180 assert(r.hasFilter("PASS")); 1181 assert(r.hasFilter(".")); 1182 // add q10 filter (is in header) 1183 r.filter = "q10"; 1184 assert(r.filter == "q10"); 1185 assert(!r.hasFilter("PASS")); // if has another filter, no longer PASS (unless added explicitly) 1186 assert(!r.hasFilter(".")); // if has another filter, no longer has . (which is PASS, or no filter [spec conflict?]) 1187 1188 // add q30 filteR (not in header) 1189 r.filter = "q30"; 1190 assert(r.filter == "q10"); // i.e., unchanged 1191 1192 assert(r.hasFilter("q10")); 1193 assert(!r.hasFilter("q30")); 1194 1195 r.removeAllFilters(); 1196 assert(r.hasFilter(".")); 1197 r.addFilter("q10"); 1198 assert(r.hasFilter("q10")); 1199 r.removeFilter("q10"); 1200 assert(r.hasFilter("PASS")); 1201 assert(r.toString == "20\t62435966\trs001;rs999\tA\tC,T\t1\t.\t.\n"); 1202 1203 r.addFormat("AF",[0.1, 0.5]); 1204 assert(r.toString == "20\t62435966\trs001;rs999\tA\tC,T\t1\t.\t.\tAF\t0.1,0.5\n"); 1205 1206 rr.addFormat("AF",[0.1]); 1207 writeln(rr.toString); 1208 assert(rr.toString == "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;AF=0\tAF\t0.1\n"); 1209 1210 1211 assert(rr.getFormats["AF"].to!float[0][0] == 0.1f); 1212 1213 rr.addInfo("AF",0.5); 1214 assert(rr.getInfos["AF"].to!float == 0.5f); 1215 1216 rr.removeInfo("AF"); 1217 1218 rr.removeFormat("AF"); 1219 1220 rr.addFormat("CH", "test"); 1221 1222 1223 writeln(rr.toString); 1224 writeln(rr.getFormats["CH"].to!string); 1225 assert(rr.getFormats["CH"].to!string[0][0] == "test"); 1226 1227 assert(rr.toString == "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11\tCH\ttest\n"); 1228 1229 assert(rr.coordinates == ZBHO(17329,17330)); 1230 vw.writeRecord(*r); 1231 vw.writeRecord(vw.vcfhdr.hdr, r.line); 1232 1233 1234 // Finally, print the records: 1235 writefln("VCF records via toString:\n%s%s", (*r), (*rr)); 1236 1237 writeln("\ndhtslib.vcf: all tests passed\n"); 1238 } 1239 1240 debug(dhtslib_unittest) unittest 1241 { 1242 import dhtslib.util; 1243 import std.stdio; 1244 import htslib.hts_log; 1245 import std.algorithm : map, count; 1246 import std.array : array; 1247 import std.path : buildPath, dirName; 1248 import std.math : approxEqual, isNaN; 1249 import dhtslib.tabix; 1250 import dhtslib.vcf; 1251 hts_set_log_level(htsLogLevel.HTS_LOG_INFO); 1252 hts_log_info(__FUNCTION__, "Testing VCFReader"); 1253 hts_log_info(__FUNCTION__, "Loading test file"); 1254 auto fn = buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","tabix","vcf_file.vcf.gz"); 1255 auto tbx = TabixIndexedFile(fn); 1256 auto reg = getIntervalFromString("1:3000151-3062916"); 1257 auto vcf = VCFReader(tbx, reg.contig, reg.interval); 1258 assert(!vcf.empty); 1259 1260 VCFRecord rec = vcf.front; 1261 assert(rec.getInfos["AN"].to!int == 4); 1262 rec.addInfo("AN", rec.getInfos["AN"].to!int + 1); 1263 assert(rec.getInfos["AN"].to!int == 5); 1264 assert(rec.getInfos["AN"].to!byte == 5); 1265 assert(rec.getInfos["AN"].to!short == 5); 1266 assert(rec.getInfos["AN"].to!long == 5); 1267 1268 assert(rec.getInfos["AN"].to!uint == 5); 1269 assert(rec.getInfos["AN"].to!ubyte == 5); 1270 assert(rec.getInfos["AN"].to!ushort == 5); 1271 assert(rec.getInfos["AN"].to!ulong == 5); 1272 1273 assert(rec.getInfos["AN"].to!float.isNaN); 1274 1275 rec.addInfo("AN", 128); 1276 assert(rec.getInfos["AN"].to!int == 128); 1277 assert(rec.getInfos["AN"].to!byte == 0); 1278 assert(rec.getInfos["AN"].to!short == 128); 1279 assert(rec.getInfos["AN"].to!long == 128); 1280 1281 rec.addInfo("AN", 32768); 1282 assert(rec.getInfos["AN"].to!int == 32768); 1283 assert(rec.getInfos["AN"].to!byte == 0); 1284 assert(rec.getInfos["AN"].to!short == 0); 1285 assert(rec.getInfos["AN"].to!long == 32768); 1286 1287 assert(rec.getInfos["AN"].to!long == 32768); 1288 1289 // rec.addInfo("AN", 2147483648); 1290 // assert(rec.getInfos["AN"].to!int == 0); 1291 // assert(rec.getInfos["AN"].to!byte == 0); 1292 // assert(rec.getInfos["AN"].to!short == 0); 1293 // assert(rec.getInfos["AN"].to!long == 2147483648); 1294 1295 vcf.popFront; 1296 rec = vcf.front; 1297 assert(rec.refAllele == "GTTT"); 1298 assert(rec.getInfos["INDEL"].to!bool == true); 1299 1300 assert(rec.getInfos["DP4"].to!(int[]) == [1, 2, 3, 4]); 1301 assert(rec.getInfos["DP4"].to!(byte[]) == [1, 2, 3, 4]); 1302 assert(rec.getInfos["DP4"].to!(short[]) == [1, 2, 3, 4]); 1303 assert(rec.getInfos["DP4"].to!(long[]) == [1, 2, 3, 4]); 1304 1305 assert(rec.getInfos["DP4"].to!(uint[]) == [1, 2, 3, 4]); 1306 assert(rec.getInfos["DP4"].to!(ubyte[]) == [1, 2, 3, 4]); 1307 assert(rec.getInfos["DP4"].to!(ushort[]) == [1, 2, 3, 4]); 1308 assert(rec.getInfos["DP4"].to!(ulong[]) == [1, 2, 3, 4]); 1309 1310 assert(rec.getInfos["DP4"].to!(float[]) == []); 1311 rec.addInfo("DP4", [2, 2, 3, 4]); 1312 assert(rec.getInfos["DP4"].to!(int[]) == [2, 2, 3, 4]); 1313 1314 assert(rec.getFormats["GQ"].to!int.array == [[409], [409]]); 1315 1316 rec.addFormat("GQ", [32768,32768]); 1317 1318 assert(rec.getFormats["GQ"].to!byte.array == []); 1319 assert(rec.getFormats["GQ"].to!short.array == []); 1320 assert(rec.getFormats["GQ"].to!int[0][0] == 32768); 1321 1322 1323 assert(rec.getInfos["STR"].to!string == "test"); 1324 1325 assert(rec.getInfos["DP4"].to!string == ""); 1326 1327 assert(rec.getFormats["GT"].to!int.array == [[2, 4], [2, 4]]); 1328 1329 1330 vcf.popFront; 1331 rec = vcf.front; 1332 1333 auto fmts = rec.getFormats; 1334 auto sam = vcf.vcfhdr.getSampleId("A"); 1335 assert(fmts["GT"].to!int[sam] == [2, 4]); 1336 sam = vcf.vcfhdr.getSampleId("B"); 1337 assert(fmts["GT"].to!int[sam] == [6, -127]); 1338 }