1 /** 2 3 VCFv4.2 (including BCF) reader and writer. 4 5 Specifications: https://samtools.github.io/hts-specs/VCFv4.2.pdf 6 7 */ 8 module dhtslib.vcf; 9 10 import core.stdc.string; 11 import core.vararg; 12 13 import std.conv: to, ConvException; 14 import std.datetime; 15 import std.format: format; 16 import std.range: ElementType; 17 import std.string: fromStringz, toStringz; 18 import std.traits: isArray, isDynamicArray, isBoolean, isIntegral, isFloatingPoint, isNumeric, isSomeString; 19 import std.traits: Unqual; 20 21 import dhtslib.htslib.hts_log; 22 import dhtslib.htslib.kstring; 23 import dhtslib.htslib.vcf; 24 25 alias BCFRecord = VCFRecord; 26 alias BCFWriter = VCFWriter; 27 28 /** Wrapper around bcf_hdr_t 29 30 Q: Why do we have VCFHeader, but not SAMHeader? 31 A: Most (all)? of the SAM (bam1_t) manipulation functions do not require a ptr to the header, 32 whereas almost all of the VCF (bcf1_t) manipulation functions do. Therefore, we track bcf_hdr_t* 33 inside each VCFRecord; this is wrapped by VCFHeader for future convenience (for example, 34 now we have @property nsamples; may move the tag reader and writer functions here?) 35 36 In order to avoid double free()'ing an instance bcf_hdr_t, 37 this wrapper will be the authoritative holder of of bcf_hdr_t ptrs, 38 it shall be passed around by reference, and copies are disabled. 39 */ 40 struct VCFHeader 41 { 42 /// Pointer to htslib BCF/VCF header struct; will be freed from VCFHeader dtor 43 bcf_hdr_t *hdr; 44 45 // Copies have to be disabled to avoid double free() 46 @disable this(this); 47 48 ~this() 49 { 50 // Deallocate header 51 if (this.hdr != null) bcf_hdr_destroy(this.hdr); 52 } 53 54 invariant 55 { 56 assert(this.hdr != null); 57 } 58 59 /// List of contigs in the header 60 @property string[] sequences() 61 { 62 import core.stdc.stdlib : free; 63 int nseqs; 64 65 /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */ 66 //const(char) **bcf_hdr_seqnames(const(bcf_hdr_t) *h, int *nseqs); 67 const(char) **ary = bcf_hdr_seqnames(this.hdr, &nseqs); 68 if (!nseqs) return []; 69 70 string[] ret; 71 ret.reserve(nseqs); 72 73 for(int i; i < nseqs; i++) { 74 ret ~= fromStringz(ary[i]).idup; 75 } 76 77 free(ary); 78 return ret; 79 } 80 81 /// Number of samples in the header 82 pragma(inline, true) 83 @property int nsamples() { return bcf_hdr_nsamples(this.hdr); } 84 85 } 86 87 /** Wrapper around bcf1_t 88 89 Because it uses bcf1_t internally, it must conform to the BCF2 part 90 of the VCFv4.2 specs, rather than the loosey-goosey VCF specs. i.e., 91 INFO, CONTIG, FILTER records must exist in the header. 92 93 TODO: Does this need to be kept in a consistent state? 94 Ideally, VCFWriter would reject invalid ones, but we are informed 95 that it is invalid (e.g. if contig not found) while building this 96 struct; bcf_write1 will actually segfault, unfortunately. I'd like 97 to avoid expensive validate() calls for every record before writing 98 if possible, which means keeping this consistent. However, not 99 sure what to do if error occurs when using the setters herein? 100 101 2019-01-23 struct->class to mirror SAMRecord -- faster if reference type? 102 103 2019-01-23 WIP: getters for chrom, pos, id, ref, alt are complete (untested) 104 105 After parsing a BCF or VCF line, bcf1_t must be unpacked. (not applicable when building bcf1_t from scratch) 106 Depending on information needed, this can be done to various levels with performance tradeoff. 107 Unpacking symbols: 108 BCF_UN_ALL: all 109 BCF_UN_SHR: all shared information (BCF_UN_STR|BCF_UN_FLT|BCF_UN_INFO) 110 111 BCF_UN_STR 112 / BCF_UN_FLT 113 | / BCF_UN_INFO 114 | | / ____________________________ BCF_UN_FMT 115 V V V / | | | 116 #CHROM POS ID REF ALT QUAL FILTER INFO FORMAT NA00001 NA00002 NA00003 ... 117 118 */ 119 class VCFRecord 120 { 121 bcf1_t* line; /// htslib structured record TODO: change to 'b' for better internal consistency? (vcf.h/c actually use line quite a bit in fn params) 122 123 VCFHeader *vcfheader; /// corresponding header (required); 124 /// is ptr to avoid copying struct containing ptr to bcf_hdr_t (leads to double free()) 125 126 /** VCFRecord 127 128 Construct a bcf/vcf record, backed by bcf1_t, from: an existing bcf1_t, parameters, or a VCF line. 129 130 Internal backing by bcf1_t means it must conform to the BCF2 rules -- i.e., header must contain 131 appropriate INFO, CONTIG, and FILTER lines. 132 133 Protip: specifying alternate MAX_UNPACK can speed it tremendously 134 as it will not unpack all fields, only up to those requested (see htslib.vcf) 135 For example, BCF_UN_STR is up to ALT inclusive, and BCF_UN_STR is up to FILTER 136 */ 137 this(T)(T *h, bcf1_t *b, int MAX_UNPACK = BCF_UN_ALL) 138 if(is(T == VCFHeader) || is(T == bcf_hdr_t)) 139 { 140 static if (is(T == VCFHeader)) this.vcfheader = h; 141 //else static if (is(T == bcf_hdr_t)) this.vcfheader = new VCFHeader(h); // double free() bug if we don't own bcf_hdr_t h 142 else static if (is(T == bcf_hdr_t)) assert(0); // ferret out situations that will lead to free() crashes 143 else assert(0); 144 145 this.line = b; 146 147 // Now it must be unpacked 148 // Protip: specifying alternate MAX_UNPACK can speed it tremendously 149 immutable int ret = bcf_unpack(this.line, MAX_UNPACK); // unsure what to do c̄ return value // @suppress(dscanner.suspicious.unused_variable) 150 } 151 /// ditto 152 this(SS)(VCFHeader *vcfhdr, string chrom, int pos, string id, string _ref, string alt, float qual, SS filter, ) 153 if (isSomeString!SS || is(SS == string[])) 154 { 155 this.line = bcf_init1(); 156 this.vcfheader = vcfhdr; 157 158 this.chrom = chrom; 159 this.pos = pos; 160 this.id = id; 161 162 this.setAlleles(_ref, alt); 163 164 this.qual = qual; 165 this.filter = filter; 166 } 167 /// ditto 168 this(VCFHeader *vcfhdr, string line, int MAX_UNPACK = BCF_UN_ALL) 169 { 170 this.vcfheader = vcfhdr; 171 172 kstring_t kline; 173 auto dupline = line.dup; // slower, but safer than a cast // @suppress(dscanner.suspicious.unmodified) 174 175 kline.l = dupline.length; 176 kline.m = dupline.length; 177 kline.s = dupline.ptr; 178 179 this.line = bcf_init1(); 180 this.line.max_unpack = MAX_UNPACK; 181 182 auto ret = vcf_parse(&kline, this.vcfheader.hdr, this.line); 183 if (ret < 0) { 184 hts_log_error(__FUNCTION__, "vcf_parse returned < 0 -- code error or malformed VCF line"); 185 } else { 186 ret = bcf_unpack(this.line, MAX_UNPACK); // unsure what to do c̄ return value 187 } 188 } 189 190 /// disable copying to prevent double-free (which should not come up except when writeln'ing) 191 //@disable this(this); // commented out as class has no postblit ctor (relevant when VCFRecord was struct) 192 /// dtor 193 ~this() 194 { 195 if (this.line) bcf_destroy1(this.line); 196 } 197 198 //////// FIXED FIELDS //////// 199 200 /* CHROM */ 201 /// Get chromosome (CHROM) 202 @property 203 string chrom() 204 { 205 if (!this.line.unpacked) 206 bcf_unpack(this.line, BCF_UN_STR); 207 208 return fromStringz(bcf_hdr_id2name(this.vcfheader.hdr, this.line.rid)).idup; 209 } 210 /// Set chromosome (CHROM) 211 @property 212 void chrom(const(char)[] c) 213 { 214 auto rid = bcf_hdr_name2id(this.vcfheader.hdr, toStringz(c)); 215 if (rid == -1) { 216 hts_log_error(__FUNCTION__, format("contig not found: %s", c)); 217 throw new Exception("contig not found"); 218 } 219 else line.rid = rid; 220 } 221 222 223 /* POS */ 224 /// Get position (POS) 225 // NB: internally BCF is uzing 0 based coordinates; we only show +1 when printing a VCF line with toString (which calls vcf_format) 226 @property 227 int pos() 228 out(coord) { assert(coord >= 0); } 229 do 230 { 231 if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR); 232 return this.line.pos; 233 } 234 /// Set position (POS) 235 @property 236 void pos(int p) 237 in { assert(p >= 0); } 238 do 239 { 240 // TODO: should we check for pos >= 0 && pos < contig length? Could really hamper performance. 241 // TODO: if writing out the file with invalid POS values crashes htslib, will consider it 242 this.line.pos = p; 243 } 244 245 246 /* ID */ 247 /// Get ID string 248 @property string id() 249 { 250 if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR); 251 return fromStringz(this.line.d.id).idup; 252 } 253 /// Sets new ID string; comma-separated list allowed but no dup checking performed 254 @property int id(const(char)[] id) 255 { 256 // bcf_update_id expects null pointer instead of empty string to mean "no value" 257 if (id == "") return bcf_update_id(this.vcfheader.hdr, this.line, null); 258 else return bcf_update_id(this.vcfheader.hdr, this.line, toStringz(id)); 259 } 260 /// Append an ID (htslib performs duplicate checking) 261 int addID(const(char)[] id) 262 { 263 if(id == "") return 0; 264 return bcf_add_id(this.vcfheader.hdr, this.line, toStringz(id)); 265 } 266 267 268 /* Alleles (REF, ALT) 269 270 Internally, alleles are stored as a \0 separated array: 271 [C \0 T \0 T T \0.. \0] 272 ^ ^ ^ ^ 273 | | L L ALT_1 = "TT" 274 | L ALT_0 275 L REF 276 277 TODO: Getters and setters poorly or inconsistently (?) named at this time or there are too many overloads? 278 TODO: need non-overwriting setter for ref and alt alleles 279 TODO: some of these may be inefficent; since they may be used in hot inner loops, pls optimize 280 */ 281 /// REF allele length 282 @property int refLen() 283 { 284 version(DigitalMars) pragma(inline); 285 version(LDC) pragma(inline, true); 286 version(GNU) pragma(inline, true); 287 return this.line.rlen; 288 } 289 /// All alleles getter (array) 290 @property string[] allelesAsArray() 291 { 292 string[] ret; 293 ret.length = this.line.n_allele; // n=0, no reference; n=1, ref but no alt 294 foreach(int i; 0 .. this.line.n_allele) // ref allele is index 0 295 { 296 ret[i] = fromStringz(this.line.d.allele[i]).idup; 297 } 298 return ret; 299 } 300 /// Reference allele getter 301 @property string refAllele() 302 { 303 if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR); 304 if (this.line.n_allele < 1) return ""; // a valid record could have no ref (or alt) alleles 305 else return fromStringz(this.line.d.als).idup; 306 } 307 // NB WIP: there could be zero, or multiple alt alleles 308 /+@property string altAlleles() 309 { 310 if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR); 311 return fromStringz(this.line.d.als) 312 }+/ 313 /// Alternate alleles getter version 1: ["A", "ACTG", ...] 314 @property string[] altAllelesAsArray() 315 { 316 if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR); 317 318 string[] ret; 319 if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt 320 return this.allelesAsArray[1 .. $]; // trim off REF 321 } 322 /// Alternate alleles getter version 2: "A,ACTG,..." 323 @property string altAllelesAsString() 324 { 325 if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR); 326 327 string ret; 328 if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt 329 330 char[] tmp; 331 tmp.length = this.line.d.m_allele; // max allocated size in htslib 332 333 char *a = this.line.d.allele[1]; // ref allele is index 0 334 const char *last = this.line.d.allele[this.line.n_allele - 1]; // pointer to start of last allele 335 size_t i = 0; 336 337 assert( this.line.d.allele[1] == (this.line.d.als + this.line.rlen + 1) ); // ensue data structure is as we think (+1: past ref's term \0) 338 339 while (i < this.line.d.m_allele) // safety stop at max allocated size 340 { 341 if (*a) tmp[i] = *a; 342 else { // '\0' 343 if (a < last) tmp[i] = ','; 344 else break; 345 } 346 i++; 347 a++; 348 } 349 tmp.length = i; 350 351 return tmp.idup; 352 } 353 /// Set alleles; comma-separated list 354 @property void alleles(string a) 355 { 356 if (a == "") { 357 this.line.n_allele = 0; 358 if (this.line.d.m_allele) this.line.d.als[0] = '\0'; // if storage allocated, zero out the REF 359 } 360 else 361 bcf_update_alleles_str(this.vcfheader.hdr, this.line, toStringz(a)); 362 } 363 /// Set alleles; array 364 @property void alleles(string[] a) 365 { 366 if (a.length == 0) { 367 this.line.n_allele = 0; 368 if (this.line.d.m_allele) this.line.d.als[0] = '\0'; // if storage allocated, zero out the REF 369 } 370 else { 371 const(char)*[64] allelesp; // will fail if a locus has more than 63 ALT alleles :-O 372 foreach(i; 0 .. a.length ) { 373 // In order to zero out all alleles, pass a zero-length allele to the string version, 374 // or a zero-length array to this version. Zero length string member of nonempty allele array is an error. 375 if (a[i].length == 0) { 376 hts_log_error(__FUNCTION__, "Zero-length allele in nonempty allele array. Setting to '.'"); 377 allelesp[i] = ".".ptr; 378 } 379 else allelesp[i] = toStringz(a[i]); 380 } 381 bcf_update_alleles(this.vcfheader.hdr, this.line, &allelesp[0], cast(int)a.length); 382 } 383 } 384 /// Set REF allele only 385 /// param r is \0-term Cstring 386 /// TODO: UNTESTED 387 void setRefAllele(const(char)* r) 388 { 389 // first, get REF 390 if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR); 391 // a valid record could have no ref (or alt) alleles 392 if (this.line.n_allele < 2) // if none or 1 (=REF only), just add the REF we receieved 393 bcf_update_alleles(this.vcfheader.hdr, this.line, &r, 1); 394 else { 395 // length of REF allele is allele[1] - allele[0], (minus one more for \0) 396 // TODO ** TODO ** : there is a property line.refLen already 397 const auto reflen = (this.line.d.allele[1] - this.line.d.allele[0]) - 1; 398 if (strlen(r) <= reflen) { 399 memcpy(this.line.d.allele[0], r, reflen + 1); // +1 -> copy a trailing \0 in case original REF was longer than new 400 // TODO: do we need to call _sync ? 401 } else { 402 // slower way: replace allele[0] with r, but keep rest of pointers poitning at existing allele block, 403 // then call bcf_update_alleles; this will make complete new copy of this.line.d.allele, so forgive the casts 404 this.line.d.allele[0] = cast(char*) r; 405 bcf_update_alleles(this.vcfheader.hdr, this.line, 406 cast( const(char)** ) this.line.d.allele, this.line.n_allele); 407 } 408 } 409 } 410 /// Set alleles; alt can be comma separated 411 void setAlleles(string _ref, string alt) 412 { 413 immutable string alleles = _ref ~ "," ~ alt ~ "\0"; 414 bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr); 415 } 416 /// Set alleles; min. 2 alleles (ref, alt1); unlimited alts may be specified 417 void setAlleles(string _ref, string alt, ...) 418 { 419 string alleles = _ref ~ "," ~ alt; 420 421 for (int i = 0; i < _arguments.length; i++) 422 { 423 alleles ~= "," ~ va_arg!string(_argptr); 424 } 425 426 alleles ~= "\0"; 427 428 bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr); 429 } 430 431 432 /* Quality (QUAL) */ 433 /// Get variant quality (QUAL) 434 @property float qual() 435 out(result) { assert(result >= 0); } 436 do 437 { 438 if (this.line.max_unpack < BCF_UN_FLT) bcf_unpack(this.line, BCF_UN_FLT); 439 return this.line.qual; 440 } 441 /// Set variant quality (QUAL) 442 @property void qual(float q) 443 in { assert(q >= 0); } 444 do { this.line.qual = q; } 445 446 447 /* FILTER */ 448 /// Get FILTER column (nothing in htslib sadly) 449 @property string filter() 450 { 451 const(char)[] ret; 452 453 if (this.line.max_unpack < BCF_UN_FLT) bcf_unpack(this.line, BCF_UN_FLT); 454 455 if (this.line.d.n_flt) { 456 for(int i; i< this.line.d.n_flt; i++) { 457 if (i) ret ~= ";"; 458 ret ~= fromStringz(this.vcfheader.hdr.id[BCF_DT_ID][ this.line.d.flt[0] ].key); 459 } 460 } else { 461 ret = "."; 462 } 463 464 return ret.idup; 465 } 466 /// Remove all entries in FILTER 467 void removeAllFilters() 468 { 469 // always returns zero 470 bcf_update_filter(this.vcfheader.hdr, this.line, null, 0); 471 } 472 /// Set the FILTER column to f 473 @property void filter(string f) 474 { 475 this.filter([f]); 476 } 477 /// Set the FILTER column to f0,f1,f2... 478 /// TODO: determine definitiely whether "." is replaced with "PASS" 479 @property void filter(string[] fs) 480 { 481 int[] fids; 482 foreach(f; fs) { 483 if(f == "") continue; 484 const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f)); 485 if (fid == -1) hts_log_warning(__FUNCTION__, format("filter not found in header (ignoring): %s", f) ); 486 else fids ~= fid; 487 } 488 if (fids.length > 0) 489 bcf_update_filter(this.vcfheader.hdr, this.line, fids.ptr, cast(int)fids.length); 490 else 491 hts_log_warning(__FUNCTION__, "No FILTER update was performed due to empty list"); 492 } 493 /// Add a filter; from htslib: 494 /// "If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed." 495 int addFilter(string f) 496 { 497 return bcf_add_filter(this.vcfheader.hdr, this.line, 498 bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f))); 499 } 500 /// Remove a filter by name 501 int removeFilter(string f) 502 { 503 const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f)); 504 return removeFilter(fid); 505 } 506 /// Remove a filter by numeric id 507 int removeFilter(int fid) 508 { 509 return bcf_remove_filter(this.vcfheader.hdr, this.line, fid, 0); 510 } 511 /// Determine whether FILTER is present. log warning if filter does not exist. "PASS" and "." can be used interchangeably. 512 bool hasFilter(string filter) 513 { 514 char[] f = filter.dup ~ '\0'; 515 //const auto id = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, f.ptr); 516 const auto ret = bcf_has_filter(this.vcfheader.hdr, this.line, f.ptr); 517 518 if (ret > 0) return true; 519 else if (ret == 0) return false; 520 else { 521 hts_log_warning(__FUNCTION__, format("FILTER %s does not exist in the header", filter)); 522 return false; 523 } 524 } 525 526 527 /* INFO */ 528 /+ 529 int tmpi = 1; 530 bcf_update_info_int32(this.vcfhdr.hdr, line, toStringz("NS"c), &tmpi, 1); 531 +/ 532 /// Add a tag:value to the INFO column -- tag must already exist in the header 533 void addInfo(T)(string tag, T data) 534 { 535 int ret = -1; 536 537 static if(isIntegral!T) { 538 auto integer = cast(int) data; 539 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1); 540 } 541 542 else static if(isFloatingPoint!T) { 543 auto flt = cast(float) data; // simply passing "2.0" (or whatever) => data is a double 544 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1); 545 } 546 547 else static if(isSomeString!T) 548 ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), toStringz(data)); 549 550 else static if(isBoolean!T) { 551 immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 552 ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set); 553 } 554 555 if (ret == -1) 556 hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data)); 557 } 558 /// ditto 559 /// This handles a vector of values for the tag 560 void addInfo(T)(string tag, T[] data) 561 if(!is(T==immutable(char))) // otherwise string::immutable(char)[] will match the other template 562 { 563 int ret = -1; 564 565 static if(isIntegral!T) { 566 auto integer = cast(int) data; 567 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, data.length); 568 } 569 570 else static if(isFloatingPoint!T) { 571 auto flt = cast(float) data; // simply passing "2.0" (or whatever) => data is a double 572 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, data.length); 573 } 574 575 if (ret == -1) 576 hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data)); 577 } 578 579 /* FORMAT (sample info) */ 580 /// bc_update_format_{int32,flat,string,flag} 581 void addFormat(T)(string tag, T data) 582 if(!isArray!T) 583 { 584 int ret = -1; 585 586 static if(isIntegral!T) { 587 auto integer = cast(int) data; 588 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1); 589 } 590 591 else static if(isFloatingPoint!T) { 592 auto flt = cast(float) data; // simply passing "2.0" (or whatever) => data is a double 593 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1); 594 } 595 596 else static if(isSomeString!T) 597 ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), toStringz(data)); 598 599 else static if(isBoolean!T) { 600 immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 601 ret = bcf_update_format_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set); 602 } 603 604 if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data)); 605 } 606 /// auto bcf_update_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, int *values, int n) // @suppress(dscanner.style.undocumented_declaration) 607 void addFormat(T)(string tag, T[] data) 608 { 609 int ret = -1; 610 611 static if(isIntegral!T) { 612 auto integer = cast(int[]) data; 613 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), 614 integer.ptr, cast(int)data.length); 615 } 616 617 else static if(isFloatingPoint!T) { 618 auto flt = cast(float[]) data; // simply passing "2.0" (or whatever) => data is a double 619 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), 620 flt.ptr, cast(int)data.length); 621 } 622 623 if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data)); 624 625 } 626 627 /// add INFO or FORMAT key:value pairs to a record 628 /// add a single datapoint OR vector of values, OR, values to each sample (if tagType == FORMAT) 629 void add(string tagType, T)(const(char)[] tag, T data) 630 if((tagType == "INFO" || tagType == "FORMAT") && 631 (isIntegral!T || isIntegral!(ElementType!T) || 632 isFloatingPoint!T || isFloatingPoint!(ElementType!T) || 633 isSomeString!T || isSomeString!(ElementType!T) || 634 isBoolean!T || isBoolean!(ElementType!T))) 635 { 636 int ret = -1; 637 int len; 638 639 static if (!isDynamicArray!T) { 640 len = 1; 641 static if (isIntegral!T) auto d = data.to!int; 642 else static if (isFloatingPoint!T) auto d = data.to!float; 643 else static if (isSomeString!T) auto d = toStringz(data); 644 else static if (isBoolean!T) immutable int d = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 645 else static assert(0); 646 647 const auto ptr = &d; 648 } 649 else static if (isDynamicArray!T) { 650 assert(data.length < int.max); 651 len = cast(int) data.length; 652 static if (isIntegral!(ElementType!T)) auto d = data.to!(int[]); 653 else static if (isFloatingPoint!(ElementType!T)) auto d = data.to!(float[]); 654 else static if (isSomeString!(ElementType!T)) { 655 char[] d; 656 foreach(s; data) { 657 d ~= s ~ "\0"; 658 } 659 if(d.length == 0) d ~= "\0"; // TODO replace with asserts on data length earlier 660 } 661 else static if (isBoolean!(ElementType!T)) { 662 int[] d; 663 foreach(b; data) { 664 d ~= (b ? 1 : 0); 665 } 666 } 667 668 const auto ptr = d.ptr; 669 } 670 else static assert(0); 671 672 static if (tagType == "INFO") { 673 static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int)) 674 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 675 else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float)) 676 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 677 else static if (isSomeString!T || isSomeString!(ElementType!T)) 678 ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr); 679 else static if (is(T == bool) || is(ElementType!T == bool)) 680 ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 681 else static assert(0, "Type not recognized for INFO tag"); 682 } 683 else static if (tagType == "FORMAT") { 684 static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int)) 685 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 686 else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float)) 687 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 688 else static if (isSomeString!T || isSomeString!(ElementType!T)) 689 ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len); 690 else static assert(0, "Type not recognized for FORMAT tag"); 691 } 692 else static assert(0); 693 694 if (ret == -1) 695 hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data)); 696 } 697 698 override string toString() const 699 { 700 kstring_t s; 701 702 const int ret = vcf_format(this.vcfheader.hdr, this.line, &s); 703 if (ret) 704 { 705 hts_log_error(__FUNCTION__, 706 format("vcf_format returned nonzero (%d) (likely EINVAL, invalid bcf1_t struct?)", ret)); 707 return "[VCFRecord vcf_format parse_error]"; 708 } 709 710 return cast(string) s.s[0 .. s.l]; 711 } 712 } 713 714 /** Basic support for writing VCF, BCF files */ 715 struct VCFWriter 716 { 717 // htslib data structures 718 vcfFile *fp; /// htsFile 719 //bcf_hdr_t *hdr; /// header 720 VCFHeader *vcfhdr; /// header wrapper -- no copies 721 bcf1_t*[] rows; /// individual records 722 723 @disable this(); 724 /// open file or network resources for writing 725 /// setup incl header allocation 726 /// if mode==w: 727 /// bcf_hdr_init automatically sets version string (##fileformat=VCFv4.2) 728 /// bcf_hdr_init automatically adds the PASS filter 729 this(string fn) 730 { 731 if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor"); 732 this.fp = vcf_open(toStringz(fn), toStringz("w"c)); 733 if (!this.fp) throw new Exception("Could not hts_open file"); 734 735 this.vcfhdr = new VCFHeader( bcf_hdr_init("w\0"c.ptr)); 736 addFiledate(); 737 bcf_hdr_sync(this.vcfhdr.hdr); 738 739 /+ 740 hts_log_trace(__FUNCTION__, "Displaying header at construction"); 741 // int bcf_hdr_format(const(bcf_hdr_t) *hdr, int is_bcf, kstring_t *str); 742 kstring_t ks; 743 bcf_hdr_format(this.vcfhdr.hdr, 0, &ks); 744 char[] hdr; 745 hdr.length = ks.l; 746 import core.stdc.string: memcpy; 747 memcpy(hdr.ptr, ks.s, ks.l); 748 import std.stdio: writeln; 749 writeln(hdr); 750 +/ 751 } 752 /// setup and copy a header from another BCF/VCF as template 753 this(T)(string fn, T other) 754 if(is(T == VCFHeader*) || is(T == bcf_hdr_t*)) 755 { 756 if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor"); 757 this.fp = vcf_open(toStringz(fn), toStringz("w"c)); 758 if (!this.fp) throw new Exception("Could not hts_open file"); 759 760 static if(is(T == VCFHeader*)) { this.vcfhdr = new VCFHeader( bcf_hdr_dup(other.hdr) ); } 761 else static if(is(T == bcf_hdr_t*)) { this.vcfhdr = new VCFHeader( bcf_hdr_dup(other) ); } 762 } 763 /// dtor 764 ~this() 765 { 766 const ret = vcf_close(this.fp); 767 if (ret != 0) hts_log_error(__FUNCTION__, "couldn't close VCF after writing"); 768 769 // Deallocate header 770 //bcf_hdr_destroy(this.hdr); 771 // 2018-09-15: Do not deallocate header; will be free'd by VCFHeader dtor 772 } 773 invariant 774 { 775 assert(this.vcfhdr != null); 776 } 777 778 VCFHeader* getHeader() 779 { 780 return this.vcfhdr; 781 } 782 783 // TODO 784 /// copy header lines from a template without overwiting existing lines 785 void copyHeaderLines(bcf_hdr_t *other) 786 { 787 assert(this.vcfhdr != null); 788 assert(0); 789 // bcf_hdr_t *bcf_hdr_merge(bcf_hdr_t *dst, const(bcf_hdr_t) *src); 790 } 791 792 /// Add sample to this VCF 793 /// * int bcf_hdr_add_sample(bcf_hdr_t *hdr, const(char) *sample); 794 int addSample(string name) 795 in { assert(name != ""); } 796 do 797 { 798 assert(this.vcfhdr != null); 799 800 bcf_hdr_add_sample(this.vcfhdr.hdr, toStringz(name)); 801 802 // AARRRRGGGHHHH 803 // https://github.com/samtools/htslib/issues/767 804 bcf_hdr_sync(this.vcfhdr.hdr); 805 806 return 0; 807 } 808 809 /// Add a new header line 810 int addHeaderLineKV(string key, string value) 811 { 812 // TODO check that key is not INFO, FILTER, FORMAT (or contig?) 813 string line = format("##%s=%s", key, value); 814 815 return bcf_hdr_append(this.vcfhdr.hdr, toStringz(line)); 816 } 817 /// Add a new header line -- must be formatted ##key=value 818 int addHeaderLineRaw(string line) 819 { 820 assert(this.vcfhdr != null); 821 // int bcf_hdr_append(bcf_hdr_t *h, const(char) *line); 822 const auto ret = bcf_hdr_append(this.vcfhdr.hdr, toStringz(line)); 823 bcf_hdr_sync(this.vcfhdr.hdr); 824 return ret; 825 } 826 /// Add a filedate= headerline, which is not called out specifically in the spec, 827 /// but appears in the spec's example files. We could consider allowing a param here. 828 int addFiledate() 829 { 830 return addHeaderLineKV("filedate", (cast(Date) Clock.currTime()).toISOString ); 831 } 832 833 /** Add INFO (§1.2.2) or FORMAT (§1.2.4) tag 834 835 The INFO tag describes row-specific keys used in the INFO column; 836 The FORMAT tag describes sample-specific keys used in the last, and optional, genotype column. 837 838 Template parameter: string; must be INFO or FORMAT 839 840 The first four parameters are required; NUMBER and TYPE have specific allowable values. 841 source and version are optional, but recommended (for INFO only). 842 843 * id: ID tag 844 * number: NUMBER tag; here a string because it can also take special values {A,R,G,.} (see §1.2.2) 845 * type: Integer, Float, Flag, Character, and String 846 * description: Text description; will be double quoted 847 * source: Annotation source (eg dbSNP) 848 * version: Annotation version (eg 142) 849 */ 850 void addTag(string tagType, T)( string id, 851 T number, 852 string type, 853 string description, 854 string source="", 855 string _version="") 856 if((tagType == "INFO" || tagType == "FORMAT") && (isIntegral!T || isSomeString!T)) 857 { 858 string line; // we'll suffix with \0, don't worry 859 860 // check ID 861 if (id == "") { 862 hts_log_error(__FUNCTION__, "no ID"); 863 return; 864 } 865 866 // check Number is either a special code {A,R,G,.} or an integer 867 static if(isSomeString!T) { 868 if (number != "A" && 869 number != "R" && 870 number != "G" && 871 number != ".") { 872 // not a special ; check if integer 873 try { 874 number.to!int; // don't need to store result, will use format/%s 875 } 876 catch (ConvException e) { 877 hts_log_error(__FUNCTION__, "Number not A/R/G/. nor an integer"); 878 return; 879 } 880 } 881 } 882 883 // check Type 884 if (type != "Integer" && 885 type != "Float" && 886 type != "Flag" && 887 type != "Character" && 888 type != "String") { 889 hts_log_error(__FUNCTION__, "unrecognized type"); 890 return; 891 } 892 893 // check Description 894 if (description == "") hts_log_error(__FUNCTION__, "no description"); 895 896 // check Source and Version 897 if (source == "" && _version != "") hts_log_error(__FUNCTION__, "version wo source"); 898 899 // Format params 900 if (source != "" && _version != "") 901 line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\",Source=\"%s\",Version=\"%s\">\0", 902 tagType, id, number, type, description, source, _version); 903 else if (source != "" && _version == "") 904 line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\",Source=\"%s\">\0", 905 tagType, id, number, type, description, source); 906 else 907 line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\">\0", 908 tagType, id, number, type, description); 909 910 bcf_hdr_append(this.vcfhdr.hdr, line.ptr); 911 } 912 913 /** Add FILTER tag (§1.2.3) */ 914 void addTag(string tagType)(string id, string description) 915 if(tagType == "FILTER") 916 { 917 // check ID 918 if (id == "") { 919 hts_log_error(__FUNCTION__, "no ID"); 920 return; 921 } 922 // check Description 923 if (description == "") hts_log_error(__FUNCTION__, "no description"); 924 925 string line = format("##FILTER=<ID=%s,Description=\"%s\">\0", id, description); 926 bcf_hdr_append(this.vcfhdr.hdr, line.ptr); 927 } 928 929 /** Add FILTER tag (§1.2.3) */ 930 deprecated void addFilterTag(string id, string description) 931 { 932 string filter = format("##FILTER=<ID=%s,Description=\"%s\">\0", id, description); 933 bcf_hdr_append(this.vcfhdr.hdr, filter.ptr); 934 } 935 936 /** Add contig definition (§1.2.7) to header meta-info 937 938 other: "url=...,md5=...,etc." 939 */ 940 auto addTag(string tagType)(const(char)[] id, const int length = 0, string other = "") 941 if(tagType == "contig" || tagType == "CONTIG") 942 { 943 const(char)[] contig = "##contig=<ID=" ~ id ~ 944 (length > 0 ? ",length=" ~ length.to!string : "") ~ 945 (other != "" ? "," ~ other : "") ~ 946 ">\0"; 947 948 return bcf_hdr_append(this.vcfhdr.hdr, contig.ptr); 949 } 950 951 /** 952 Add a record 953 954 alleles: comma-separated string, including ref allele 955 qual: a float, but accepts any numeric 956 */ 957 int addRecord(S, N)(S contig, int pos, S id, S alleles, N qual, S[] filters) 958 if ( (isSomeString!S || is(S == char*) ) && isNumeric!N) 959 { 960 bcf1_t *line = new bcf1_t; 961 962 line.rid = bcf_hdr_name2id(this.vcfhdr.hdr, toStringz(contig)); 963 if (line.rid == -1) hts_log_error(__FUNCTION__, "contig not found"); 964 965 line.pos = pos; 966 967 bcf_update_id(this.vcfhdr.hdr, line, (id == "" ? null : toStringz(id)) ); // TODO: could support >1 id with array as with the filters 968 969 bcf_update_alleles_str(this.vcfhdr.hdr, line, toStringz(alleles)); 970 971 line.qual = qual; 972 973 // Update filter(s); if/else blocks for speed 974 if(filters.length == 0) 975 { 976 int pass = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz("PASS"c)); 977 bcf_update_filter(this.vcfhdr.hdr, line, &pass, 1); 978 } 979 else if(filters.length == 1) 980 { 981 int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(filters[0])); 982 if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", filters[0]) ); 983 bcf_update_filter(this.vcfhdr.hdr, line, &fid, 1); 984 } 985 else // TODO: factor out the check for -1 into a safe_update_filter or something 986 { 987 int[] filter_ids; 988 foreach(f; filters) { 989 const int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(f)); 990 if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", f) ); 991 else filter_ids ~= fid; 992 } 993 bcf_update_filter(this.vcfhdr.hdr, line, filter_ids.ptr, cast(int)filter_ids.length ); 994 } 995 996 // Add a record 997 /+ 998 // .. FORMAT 999 bcf_hdr_append(hdr, "##FORMAT=<ID=TF,Number=1,Type=Float,Description=\"Test Float\">"); 1000 float[4] test; 1001 bcf_float_set_missing(test[0]); 1002 test[1] = 47.11f; 1003 bcf_float_set_vector_end(test[2]); 1004 writeln("pre update format float"); 1005 bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("TF"), &test[0], 4); 1006 +/ 1007 int tmpi = 1; 1008 bcf_update_info_int32(this.vcfhdr.hdr, line, toStringz("NS"c), &tmpi, 1); 1009 1010 // Add the actual sample 1011 int[4] dp = [ 9000, 1, 2, 3]; 1012 bcf_update_format(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1, BCF_HT_INT); 1013 //int dp = 9000; 1014 //bcf_update_format_int32(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1); 1015 //auto f = new float; 1016 //*f = 1.0; 1017 //bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("XF"c), f, 1); 1018 1019 this.rows ~= line; 1020 1021 return 0; 1022 } 1023 1024 /// as expected 1025 int writeHeader() 1026 { 1027 return bcf_hdr_write(this.fp, this.vcfhdr.hdr); 1028 } 1029 /// as expected 1030 int writeRecord(ref VCFRecord r) 1031 { 1032 const ret = bcf_write(this.fp, this.vcfhdr.hdr, r.line); 1033 if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error"); 1034 return ret; 1035 } 1036 /// as expected 1037 int writeRecord(bcf_hdr_t *hdr, bcf1_t *rec) 1038 { 1039 hts_log_warning(__FUNCTION__, "pre call"); 1040 const ret = bcf_write(this.fp, hdr, rec); 1041 hts_log_warning(__FUNCTION__, "post call"); 1042 if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error"); 1043 return ret; 1044 } 1045 } 1046 1047 /** Basic support for reading VCF, BCF files */ 1048 struct VCFReader 1049 { 1050 // htslib data structures 1051 vcfFile *fp; /// htsFile 1052 //bcf_hdr_t *hdr; /// header 1053 VCFHeader *vcfhdr; /// header wrapper -- no copies 1054 bcf1_t* b; /// record for use in iterator, will be recycled 1055 1056 int MAX_UNPACK; /// see dhtslib.htslib.vcf 1057 1058 private static int refct; 1059 1060 this(this) 1061 { 1062 this.refct++; 1063 } 1064 @disable this(); 1065 /// read existing VCF file 1066 /// MAX_UNPACK: setting alternate value could speed reading 1067 this(string fn, int MAX_UNPACK = BCF_UN_ALL) 1068 { 1069 import dhtslib.htslib.hts : hts_set_threads; 1070 1071 if (fn == "") throw new Exception("Empty filename passed to VCFReader constructor"); 1072 this.fp = vcf_open(toStringz(fn), "r"c.ptr); 1073 if (!this.fp) throw new Exception("Could not hts_open file"); 1074 1075 hts_set_threads(this.fp, 1); // extra decoding thread 1076 1077 this.vcfhdr = new VCFHeader( bcf_hdr_read(this.fp)); 1078 1079 this.b = bcf_init1(); 1080 this.b.max_unpack = MAX_UNPACK; 1081 this.MAX_UNPACK = MAX_UNPACK; 1082 } 1083 /// dtor 1084 ~this() 1085 { 1086 this.refct--; 1087 1088 // block file close and bcf1_t free() with reference counting 1089 // to allow VCFReader to implement Range interface 1090 if(!this.refct) { 1091 const ret = vcf_close(this.fp); 1092 if (ret != 0) hts_log_error(__FUNCTION__, "couldn't close VCF after reading"); 1093 1094 // Deallocate header 1095 //bcf_hdr_destroy(this.hdr); 1096 // 2018-09-15: Do not deallocate header; will be free'd by VCFHeader dtor 1097 1098 bcf_destroy1(this.b); 1099 } 1100 } 1101 invariant 1102 { 1103 assert(this.vcfhdr != null); 1104 } 1105 1106 VCFHeader* getHeader() 1107 { 1108 return this.vcfhdr; 1109 } 1110 1111 /** VCF version, e.g. VCFv4.2 */ 1112 @property string vcfVersion() { return cast(string) fromStringz( bcf_hdr_get_version(this.vcfhdr.hdr) ).idup; } 1113 1114 /++ 1115 bcf_hrec_t *bcf_hdr_get_hrec(const(bcf_hdr_t) *hdr, int type, const(char) *key, const(char) *value, 1116 const(char) *str_class); 1117 +/ 1118 // TODO: check key for "", fail if empty 1119 // TODO: check value, str_class for "" and replace with NULL 1120 // TODO: Memory leak. We are never freeing the bcf_hrec_t, but, it escapes as pointer inside key/value string 1121 // TODO: handle structured and general lines 1122 string[string] getTagByKV(string tagType, T)(string key, string value, string str_class) 1123 if((tagType == "FILTER" || tagType == "INFO" || tagType == "FORMAT" && tagType == "contig") && 1124 (isIntegral!T || isSomeString!T)) 1125 { 1126 // hlt : header line type 1127 static if (tagType == "FILTER") const int hlt = BCF_HL_FLT; 1128 else static if (tagType == "INFO") const int hlt = BCF_HL_INFO; // @suppress(dscanner.suspicious.label_var_same_name) 1129 else static if (tagType == "FORMAT") const int hlt= BCF_HL_FMT; // @suppress(dscanner.suspicious.label_var_same_name) 1130 else static if (tagType == "contig") const int hlt= BCF_HL_CTG; // @suppress(dscanner.suspicious.label_var_same_name) 1131 else assert(0); 1132 1133 bcf_hrec_t *hrec = bcf_hdr_get_hrec(this.vcfhdr.hdr, hlt, toStringz(key), 1134 toStringz(value), 1135 toStringz(str_class)); 1136 1137 const int nkeys = hrec.nkeys; 1138 string[string] kv; 1139 1140 foreach(int i; 0 .. nkeys) { 1141 string k = cast(string) fromStringz(hrec.keys[i]); 1142 string v = cast(string) fromStringz(hrec.vals[i]); 1143 kv[k] = v; 1144 } 1145 1146 return kv; 1147 } 1148 1149 /// InputRange interface: iterate over all records 1150 @property bool empty() 1151 { 1152 // int bcf_read(htsFile *fp, const(bcf_hdr_t) *h, bcf1_t *v); 1153 // documentation claims returns -1 on critical errors, 0 otherwise 1154 // however it looks like -1 is EOF and -2 is critical errors? 1155 immutable success = bcf_read(this.fp, this.vcfhdr.hdr, this.b); 1156 if (success >= 0) return false; 1157 else if (success == -1) { 1158 // EOF 1159 // see htslib my comments https://github.com/samtools/htslib/issues/246 1160 // and commit 9845bc9a947350d0f34e6ce69e79ab81b6339bd2 1161 return true; 1162 } 1163 else { 1164 hts_log_error(__FUNCTION__, "*** CRITICAL ERROR bcf_read < -1"); 1165 // TODO: check b->errcode 1166 return true; 1167 } 1168 } 1169 /// ditto 1170 void popFront() 1171 { 1172 // noop? 1173 // free this.b ? 1174 //bam_destroy1(this.b); 1175 1176 // TODO: clear (not destroy) the bcf1_t for reuse? 1177 } 1178 /// ditto 1179 VCFRecord front() 1180 { 1181 // note that VCFRecord's constructor will be responsible for 1182 // * unpacking and 1183 // * destroying 1184 // its copy 1185 return new VCFRecord(this.vcfhdr, bcf_dup(this.b), this.MAX_UNPACK); 1186 } 1187 } 1188 1189 /// 1190 debug(dhtslib_unittest) 1191 unittest 1192 { 1193 import std.exception: assertThrown; 1194 import std.stdio: writeln, writefln; 1195 1196 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 1197 1198 1199 auto vw = VCFWriter("/dev/null"); 1200 1201 vw.addHeaderLineRaw("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">"); 1202 vw.addHeaderLineKV("INFO", "<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">"); 1203 // ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency"> 1204 vw.addTag!"INFO"("AF", "A", "Integer", "Number of Samples With Data"); 1205 vw.addHeaderLineRaw("##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>"); // @suppress(dscanner.style.long_line) 1206 vw.addHeaderLineRaw("##FILTER=<ID=q10,Description=\"Quality below 10\">"); 1207 1208 // Exercise header 1209 assert(vw.vcfhdr.nsamples == 0); 1210 vw.addSample("NA12878"); 1211 assert(vw.vcfhdr.nsamples == 1); 1212 1213 auto r = new VCFRecord(vw.vcfhdr, bcf_init1()); 1214 1215 r.chrom = "20"; 1216 assert(r.chrom == "20"); 1217 assertThrown(r.chrom = "chr20"); // headerline chromosome is "20" not "chr20" 1218 1219 r.pos = 999_999; 1220 assert(r.pos == 999_999); 1221 r.pos = 62_435_964 + 1; // Exceeds contig length 1222 1223 // Test ID field 1224 // note that in an empty freshly initialized bcf1_t, the ID field is "" rather than "." 1225 // Will consider adding initialization code 1226 //writefln("ID: %s", r.id); 1227 r.id = ""; 1228 assert(r.id == "."); 1229 r.id = "."; 1230 assert(r.id == "."); 1231 r.id = "rs001"; 1232 assert(r.id == "rs001"); 1233 r.addID("rs999"); 1234 assert(r.id == "rs001;rs999"); 1235 r.addID("rs001"); // test duplicate checking 1236 assert(r.id == "rs001;rs999"); 1237 1238 // Test REF/ALT allele setters and getters 1239 // many overloads to test for good coverage 1240 // Test matrix: Set {zero, one, two, three} alleles * {set by string, set by array} = 8 test cases 1241 // * also, there is setAlleles(ref, alt) and setAlleles(ref, alt1, ...) 1242 // * TODO: will also need to retreive alt alleles as array and string 1243 1244 string[] alleles_array; 1245 1246 // Zero alleles 1247 r.alleles(""); 1248 const auto zs = r.allelesAsArray; 1249 const auto zsr = r.refAllele; 1250 assert(zs.length == 0); 1251 assert(zsr == ""); 1252 1253 r.alleles(alleles_array); 1254 const auto za = r.allelesAsArray; 1255 const auto zar = r.refAllele; 1256 assert(za.length == 0); 1257 assert(zar == ""); 1258 1259 // One allele 1260 r.alleles("C"); 1261 const auto os = r.allelesAsArray; 1262 const auto osr = r.refAllele; 1263 assert(os.length == 1 && os[0] == "C"); 1264 assert(osr == "C"); 1265 1266 r.alleles(["C"]); 1267 const auto oa = r.allelesAsArray; 1268 const auto oar = r.refAllele; 1269 assert(oa.length == 1 && oa[0] == "C"); 1270 assert(oar == "C"); 1271 1272 // Two alleles 1273 r.alleles("C,T"); 1274 const auto ts = r.allelesAsArray; 1275 const auto tsr= r.refAllele; 1276 assert(ts == ["C", "T"]); 1277 assert(tsr== "C"); 1278 1279 r.alleles(["C", "T"]); 1280 const auto ta = r.allelesAsArray; 1281 const auto tar= r.refAllele; 1282 assert(ta == ["C", "T"]); 1283 assert(tar== "C"); 1284 1285 const taaa = r.altAllelesAsArray; 1286 const taas = r.altAllelesAsString; 1287 assert(taaa == ["T"]); 1288 assert(taas == "T"); 1289 1290 // alternate setter for >= 2 alleles 1291 r.setAlleles("A", "G"); 1292 assert(r.allelesAsArray == ["A", "G"]); 1293 1294 // Three alleles 1295 r.alleles("A,C,T"); 1296 assert(r.allelesAsArray == ["A", "C", "T"]); 1297 assert(r.refAllele == "A"); 1298 assert(r.altAllelesAsString == "C,T"); 1299 1300 // alternate the alleles for testing purposes 1301 r.alleles(["G", "A", "C"]); 1302 assert(r.allelesAsArray == ["G", "A", "C"]); 1303 assert(r.refAllele == "G"); 1304 assert(r.altAllelesAsString == "A,C"); 1305 1306 r.setAlleles("A", "C", "T"); 1307 assert(r.allelesAsArray == ["A", "C", "T"]); 1308 assert(r.refAllele == "A"); 1309 assert(r.altAllelesAsString == "C,T"); 1310 1311 1312 // Test QUAL 1313 r.qual = 1.0; 1314 assert(r.qual == 1.0); 1315 // now test setting qual without unpacking 1316 // TODO: see https://forum.dlang.org/post/hebouvswxlslqhovzaia@forum.dlang.org, once resolved (or once factory function written), 1317 // add template value param BCF_UN_STR 1318 auto rr = new VCFRecord(vw.vcfhdr, "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;AF=0.017\n"); // @suppress(dscanner.style.long_line) 1319 rr.qual = 3.0; 1320 assert(rr.qual == 3.0); 1321 1322 1323 // Test FILTER 1324 assert(r.hasFilter("PASS")); 1325 assert(r.hasFilter(".")); 1326 // add q10 filter (is in header) 1327 r.filter = "q10"; 1328 assert(r.filter == "q10"); 1329 assert(!r.hasFilter("PASS")); // if has another filter, no longer PASS (unless added explicitly) 1330 assert(!r.hasFilter(".")); // if has another filter, no longer has . (which is PASS, or no filter [spec conflict?]) 1331 1332 // add q30 filteR (not in header) 1333 r.filter = "q30"; 1334 assert(r.filter == "q10"); // i.e., unchanged 1335 1336 assert(r.hasFilter("q10")); 1337 assert(!r.hasFilter("q30")); 1338 1339 r.removeAllFilters(); 1340 assert(r.hasFilter(".")); 1341 r.addFilter("q10"); 1342 assert(r.hasFilter("q10")); 1343 r.removeFilter("q10"); 1344 assert(r.hasFilter("PASS")); 1345 1346 1347 // Finally, print the records: 1348 writefln("VCF records via toString:\n%s%s", r, rr); 1349 1350 writeln("\ndhtslib.vcf: all tests passed\n"); 1351 }