1 /** Module provides VCF Reader/Writer
2 
3 VCF version 4.2 (including BCF) reader and writer, including
4 a model with convenience functions for the header (metadata)
5 and individual VCF/BCF records (rows).
6 
7 Specifications: https://samtools.github.io/hts-specs/VCFv4.2.pdf
8 
9 */
10 module dhtslib.vcf.record;
11 
12 import core.stdc.string;
13 import core.vararg;
14 
15 import std.conv: to, ConvException;
16 import std.format: format;
17 import std.range;
18 import std.string: fromStringz, toStringz;
19 import std.utf : toUTFz;
20 import std.algorithm : map;
21 import std.array : array;
22 import std.traits;
23 import std.meta : staticIndexOf;
24 
25 import dhtslib.vcf;
26 import dhtslib.coordinates;
27 import dhtslib.memory;
28 import htslib.hts_log;
29 import htslib.kstring;
30 import htslib.vcf;
31 
32 alias BCFRecord = VCFRecord;
33 
34 /// Struct to aid in conversion of VCF info data
35 /// into D types
36 struct InfoField
37 {
38     /// String identifier of info field
39     string key;
40     /// BCF TYPE indicator
41     BcfRecordType type;
42     /// number of data elements
43     int len;
44     /// reference of info field data
45     ubyte[] data;
46 
47     /// VCFRecord refct
48     Bcf1 line;
49 
50     /// ctor from VCFRecord
51     this(string key, bcf_info_t * info, Bcf1 line)
52     {
53         this.line = line;
54         this(key, info);
55     }
56 
57     /// string, info field ctor
58     this(string key, bcf_info_t * info)
59     {
60         /// get type
61         this.type = cast(BcfRecordType)(info.type & 0b1111);
62         /// get len
63         this.len = info.len;
64         /// copy data
65         this.data = info.vptr[0.. info.vptr_len];
66         /// double check our lengths
67         debug{
68             final switch(this.type){
69                 case BcfRecordType.Int8:
70                     assert(data.length == this.len * byte.sizeof);
71                     break;
72                 case BcfRecordType.Int16:
73                     assert(data.length == this.len * short.sizeof);
74                     break;
75                 case BcfRecordType.Int32:
76                     assert(data.length == this.len * int.sizeof);
77                     break;
78                 case BcfRecordType.Int64:
79                     assert(data.length == this.len * long.sizeof);
80                     break;
81                 case BcfRecordType.Float:
82                     assert(data.length == this.len * float.sizeof);
83                     break;
84                 case BcfRecordType.Char:
85                     assert(data.length == this.len * char.sizeof);
86                     break;
87                 case BcfRecordType.Null:
88                     assert(data.length == 0);
89             }
90         }
91     }
92 
93     /// convert to a D type array if int, float, or bool type array
94     T[] to(T: T[])()
95     if(isIntegral!T || is(T == float) || isBoolean!T)
96     {
97         static if(isIntegral!T){
98             /// if we select the correct type just slice and return
99             if(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType))
100                 return (cast(T *)this.data.ptr)[0 .. T.sizeof * this.len];
101             /// if we select type that is too small log error and return
102             if(RecordTypeSizes[this.type] > T.sizeof)
103             {
104                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
105                 return [];
106             }
107             T[] ret;
108             /// if we select type that is bigger slice, convert and return
109             switch(this.type){
110                 case BcfRecordType.Int8:
111                     ret = ((cast(byte *)this.data.ptr)[0 .. this.len]).to!(T[]);
112                     break;
113                 case BcfRecordType.Int16:
114                     ret = ((cast(short *)this.data.ptr)[0 .. short.sizeof * this.len]).to!(T[]);
115                     break;
116                 case BcfRecordType.Int32:
117                     ret = ((cast(int *)this.data.ptr)[0 .. int.sizeof * this.len]).to!(T[]);
118                     break;
119                 case BcfRecordType.Int64:
120                     ret = ((cast(long *)this.data.ptr)[0 .. long.sizeof * this.len]).to!(T[]);
121                     break;
122                 default:
123                     hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(this.type, T.stringof));
124                     return [];
125             }
126             return ret;
127         }else{
128             /// if we select type the wrong type error and return
129             if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)))
130             {
131                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
132                 return [];
133             }
134             return (cast(T *)this.data.ptr)[0 .. T.sizeof * this.len];
135         }
136     }
137 
138     /// convert to a D type if int, float, or bool type
139     T to(T)()
140     if(isIntegral!T || is(T == float) || isBoolean!T)
141     {
142         /// if bool return true
143         static if(isBoolean!T)
144             return true;
145         else{
146             /// if info field has > 1 element error and return
147             if(this.len != 1){
148                 hts_log_error(__FUNCTION__, "This info field has a length of %d not 1".format(this.len));
149                 return T.init;
150             }
151             static if(isIntegral!T){
152                 /// if we select type that is too small log error and return
153                 if(RecordTypeSizes[this.type] > T.sizeof)
154                 {
155                     hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
156                     return T.init;
157                 }
158                 /// just gonna use the array impl and grab index 0
159                 return this.to!(T[])[0];
160             }else{
161                 /// if we select type the wrong type error and return
162                 if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)))
163                 {
164                     hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
165                     return T.init;
166                 }
167                 /// just gonna use the array impl and grab index 0
168                 return this.to!(T[])[0];
169             }
170         }   
171     }
172 
173     /// convert to a string
174     T to(T)()
175     if(isSomeString!T)
176     {
177         /// if we select type the wrong type error and return
178         if(!(this.type == BcfRecordType.Char))
179         {
180             hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
181             return T.init;
182         }
183         /// otherwise slice and dup
184         return cast(T)(cast(char *)this.data.ptr)[0 .. this.len].dup;
185     }
186 }
187 
188 /// Struct to aid in conversion of VCF format data
189 /// into D types
190 struct FormatField
191 {
192     /// String identifier of info field
193     string key;
194     /// BCF TYPE indicator
195     BcfRecordType type;
196     /// number of samples
197     int nSamples;
198     /// number of data elements per sample
199     int n;
200     /// number of bytes per sample
201     int size;
202     /// reference of info field data
203     ubyte[] data;
204 
205     /// VCFRecord refct
206     Bcf1 line;
207 
208     /// ctor from VCFRecord
209     this(string key, bcf_fmt_t * fmt, Bcf1 line)
210     {
211         this.line = line;
212         this(key, fmt);
213     }
214 
215     /// string and format ctor
216     this(string key, bcf_fmt_t * fmt)
217     {
218         /// set all our data
219         this.key = key;
220         this.type = cast(BcfRecordType)(fmt.type & 0b1111);
221         this.n = fmt.n;
222         this.nSamples = fmt.p_len / fmt.size;
223         this.data = fmt.p[0 .. fmt.p_len].dup;
224         this.size = fmt.size;
225         /// double check our work
226         debug{
227             final switch(this.type){
228                 case BcfRecordType.Int8:
229                     assert(data.length == this.nSamples * this.n * byte.sizeof);
230                     break;
231                 case BcfRecordType.Int16:
232                     assert(data.length == this.nSamples * this.n * short.sizeof);
233                     break;
234                 case BcfRecordType.Int32:
235                     assert(data.length == this.nSamples * this.n * int.sizeof);
236                     break;
237                 case BcfRecordType.Int64:
238                     assert(data.length == this.nSamples * this.n * long.sizeof);
239                     break;
240                 case BcfRecordType.Float:
241                     assert(data.length == this.nSamples * this.n * float.sizeof);
242                     break;
243                 case BcfRecordType.Char:
244                     assert(data.length == this.nSamples * this.n * char.sizeof);
245                     break;
246                 case BcfRecordType.Null:
247                     assert(data.length == 0);
248             }
249         }
250     }
251 
252     /// convert to a D type array if int, float, bool, or string type array
253     /// very similar to InfoField.to 
254     /// This returns chunks as we separated FORMAT values by sample
255     auto to(T)()
256     if(isIntegral!T || is(T == float) || isBoolean!T || isSomeString!T)
257     {
258         T[] ret;
259         static if(isIntegral!T){
260             if(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)){
261                 ret = (cast(T *)this.data.ptr)[0 .. this.n * this.nSamples * T.sizeof];
262                 return ret.chunks(this.n);
263             }
264             if(RecordTypeSizes[this.type] > T.sizeof)
265             {
266                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
267                 return ret.chunks(this.n);
268             }
269             switch(this.type){
270                 case BcfRecordType.Int8:
271                     ret = ((cast(byte *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]);
272                     break;
273                 case BcfRecordType.Int16:
274                     ret = ((cast(short *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]);
275                     break;
276                 case BcfRecordType.Int32:
277                     ret = ((cast(int *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]);
278                     break;
279                 case BcfRecordType.Int64:
280                     ret = ((cast(long *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]);
281                     break;
282                 default:
283                     hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(this.type, T.stringof));
284                     return ret.chunks(this.n);
285             }
286         }else static if(isSomeString!T){
287             if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)))
288             {
289                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
290                 return ret.chunks(this.n);
291             }
292             auto ptr = this.data.ptr;
293             for(auto i=0; i < nSamples; i++)
294             {
295                 ret ~= (cast(char *)ptr)[i*size .. (i+1) * size].idup;
296             }
297         }else{
298             if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)))
299             {
300                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
301                 return ret.chunks(this.n);
302             }
303             ret = (cast(T *)this.data.ptr)[0 .. this.n * this.nSamples * T.sizeof];
304         }
305         return ret.chunks(this.n);
306 
307     }
308 }
309 
310 
311 
312 /** Wrapper around bcf1_t 
313 
314     Because it uses bcf1_t internally, it must conform to the BCF2 part
315     of the VCFv4.2 specs, rather than the loosey-goosey VCF specs. i.e.,
316     INFO, CONTIG, FILTER records must exist in the header.
317 
318     TODO: Does this need to be kept in a consistent state?
319     Ideally, VCFWriter would reject invalid ones, but we are informed
320     that it is invalid (e.g. if contig not found) while building this
321     struct; bcf_write1 will actually segfault, unfortunately. I'd like
322     to avoid expensive validate() calls for every record before writing
323     if possible, which means keeping this consistent. However, not
324     sure what to do if error occurs when using the setters herein?
325 
326     2019-01-23 struct->class to mirror SAMRecord -- faster if reference type?
327 
328     2019-01-23 WIP: getters for chrom, pos, id, ref, alt are complete (untested)
329 
330 After parsing a BCF or VCF line, bcf1_t must be UnpackLeveled. (not applicable when building bcf1_t from scratch)
331 Depending on information needed, this can be done to various levels with performance tradeoff.
332 Unpacking symbols:
333 UNPACK.ALL: all
334 UNPACK.SHR: all shared information (UNPACK.ALT|UNPACK.FILT|UNPACK.INFO)
335 
336                         UnpackLevel.ALT
337                        /               UnpackLevel.FILT
338                       |               /      UnpackLevel.INFO
339                       |              |      /       ____________________________ UnpackLevel.FMT
340                       V              V     V       /       |       |       |
341 #CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  NA00001 NA00002 NA00003 ...
342 
343 */
344 struct VCFRecord
345 {
346     Bcf1 line;   /// htslib structured record TODO: change to 'b' for better internal consistency? (vcf.h/c actually use line quite a bit in fn params)
347 
348     VCFHeader vcfheader;   /// corresponding header (required);
349 
350     /** VCFRecord
351 
352     Construct a bcf/vcf record, backed by bcf1_t, from: an existing bcf1_t, parameters, or a VCF line.
353 
354     Internal backing by bcf1_t means it must conform to the BCF2 rules -- i.e., header must contain
355     appropriate INFO, CONTIG, and FILTER lines.
356 
357     Protip: specifying alternate MAX_UNPACK can speed it tremendously
358         as it will not UnpackLevel all fields, only up to those requested (see htslib.vcf)
359         For example, UnpackLevel.ALT is up to ALT inclusive, and UnpackLevel.ALT is up to FILTER
360     */
361     this(T)(T h, bcf1_t *b, UnpackLevel MAX_UNPACK = UnpackLevel.None)
362     if(is(T == VCFHeader) || is(T == bcf_hdr_t *))
363     {
364         static if (is(T == VCFHeader)) this.vcfheader = h;
365         else static if (is(T == bcf_hdr_t *)) this.vcfheader = VCFHeader(Bcf_hdr_t(h)); // double free() bug if we don't own bcf_hdr_t h
366         else static if (is(T == bcf_hdr_t)) assert(0);  // ferret out situations that will lead to free() crashes
367         else assert(0);
368 
369         this.line = Bcf1(b);
370 
371         // Now it must be UNPACKed
372         // Protip: specifying alternate MAX_UNPACK can speed it tremendously
373         immutable int ret = bcf_unpack(this.line, MAX_UNPACK);    // unsure what to do c̄ return value // @suppress(dscanner.suspicious.unused_variable)
374     }
375     /// ditto
376     this(SS)(VCFHeader vcfhdr, string chrom, int pos, string id, string _ref, string alt, float qual, SS filter, )
377     if (isSomeString!SS || is(SS == string[]))
378     {
379         this.line = Bcf1(bcf_init1());
380         this.vcfheader = vcfhdr;
381         
382         this.chrom = chrom;
383         this.pos = pos;
384         this.id = id;
385 
386         this.setAlleles(_ref, alt);
387 
388         this.qual = qual;
389         this.filter = filter;
390     }
391     /// ditto
392     this(VCFHeader vcfhdr, string line, UnpackLevel MAX_UNPACK = UnpackLevel.None)
393     {
394         this.vcfheader = vcfhdr;
395 
396         kstring_t kline;
397         auto dupline = line.dup; // slower, but safer than a cast // @suppress(dscanner.suspicious.unmodified)
398 
399         kline.l = dupline.length;
400         kline.m = dupline.length;
401         kline.s = dupline.ptr;
402 
403         this.line = Bcf1(bcf_init1());
404         this.line.max_unpack = MAX_UNPACK;
405 
406         auto ret = vcf_parse(&kline, this.vcfheader.hdr, this.line);
407         if (ret < 0) {
408             hts_log_error(__FUNCTION__, "vcf_parse returned < 0 -- code error or malformed VCF line");
409         } else {
410             ret = bcf_unpack(this.line, MAX_UNPACK);    // unsure what to do c̄ return value
411         }
412     }
413 
414     /// ensure that vcf variable length data is unpacked to at least desired level
415     pragma(inline, true)
416     void unpack(UnpackLevel unpackLevel)
417     {
418         if (this.line.max_unpack < unpackLevel) bcf_unpack(this.line, unpackLevel);
419     }
420 
421     //----- FIXED FIELDS -----//
422     
423     /* CHROM */
424     /// Get chromosome (CHROM)
425     @property
426     string chrom()
427     {   
428         return fromStringz(bcf_hdr_id2name(this.vcfheader.hdr, this.line.rid)).idup;
429     }
430     /// Set chromosome (CHROM)
431     @property
432     void chrom(const(char)[] c)
433     {
434         auto rid = bcf_hdr_name2id(this.vcfheader.hdr, toStringz(c));
435         if (rid == -1) {
436             hts_log_error(__FUNCTION__, format("contig not found: %s", c));
437             throw new Exception("contig not found");
438         }
439         else line.rid = rid;
440     }
441 
442 
443     /* POS */
444     /** Get position (POS, column 2)
445      *
446      * NB: internally BCF is uzing 0 based coordinates; we only show +1 when printing a VCF line with toString (which calls vcf_format)
447     */
448     @property
449     ZB pos()
450     out(coord) { assert(coord >= 0); }
451     do
452     {
453         return ZB(this.line.pos);
454     }
455     /// Set position (POS, column 2)
456     @property
457     void pos(ZB p)
458     in { assert(p >= 0); }
459     do
460     {
461         // TODO: should we check for pos >= 0 && pos < contig length? Could really hamper performance.
462         // TODO: if writing out the file with invalid POS values crashes htslib, will consider it
463         this.line.pos = p.pos;
464     }
465 
466 
467     /* ID */
468     /// Get ID string
469     @property string id()
470     {
471         this.unpack(UnpackLevel.AltAllele);
472         return fromStringz(this.line.d.id).idup;
473     }
474     /// Sets new ID string; comma-separated list allowed but no dup checking performed
475     @property int id(const(char)[] id)
476     {
477         // bcf_update_id expects null pointer instead of empty string to mean "no value"
478         if (id == "") return bcf_update_id(this.vcfheader.hdr, this.line, null);
479         else return bcf_update_id(this.vcfheader.hdr, this.line, toStringz(id));
480     }
481     /// Append an ID (column 3) to the record.
482     ///
483     /// NOTE: htslib performs duplicate checking
484     int addID(const(char)[] id)
485     {
486         if(id == "") return 0;
487         return bcf_add_id(this.vcfheader.hdr, this.line, toStringz(id));
488     }
489 
490 
491     /* Alleles (REF, ALT)
492     
493         Internally, alleles are stored as a \0 separated array:
494         [C \0 T \0 T T \0.. \0]
495          ^    ^    ^ ^
496          |    |    L L ALT_1 = "TT"
497          |    L ALT_0 
498          L REF
499 
500         TODO: Getters and setters poorly or inconsistently (?) named at this time or there are too many overloads?
501         TODO: need non-overwriting setter for ref and alt alleles
502         TODO: some of these may be inefficent; since they may be used in hot inner loops, pls optimize
503     */
504     /// REF allele length
505     @property long refLen()
506     {
507         version(DigitalMars) pragma(inline);
508         version(LDC) pragma(inline, true);
509         version(GNU) pragma(inline, true);
510         return this.line.rlen;
511     }
512 
513     /// Coordinate range of the reference allele
514     @property ZBHO coordinates()
515     {
516         return ZBHO(this.pos, this.pos + this.refLen);
517     }
518     
519     /// All alleles getter (array)
520     @property string[] allelesAsArray()
521     {
522         this.unpack(UnpackLevel.AltAllele);
523         string[] ret;
524         ret.length = this.line.n_allele;        // n=0, no reference; n=1, ref but no alt
525         foreach(int i; 0 .. this.line.n_allele) // ref allele is index 0
526         {
527             ret[i] = fromStringz(this.line.d.allele[i]).idup;
528         }
529         return ret;
530     }
531     /// Reference allele getter
532     @property string refAllele()
533     {
534         this.unpack(UnpackLevel.AltAllele);
535         if (this.line.n_allele < 1) return ""; // a valid record could have no ref (or alt) alleles
536         else return fromStringz(this.line.d.als).idup;
537     }
538     // NB WIP: there could be zero, or multiple alt alleles
539     /+@property string altAlleles()
540     {
541         if (!this.line.unpacked) bcf_unpack(this.line, UnpackLevel.ALT);
542         return fromStringz(this.line.d.als)
543     }+/
544     /// Alternate alleles getter version 1: ["A", "ACTG", ...]
545     @property string[] altAllelesAsArray()
546     {
547         this.unpack(UnpackLevel.AltAllele);
548 
549         string[] ret;
550         if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt
551         return this.allelesAsArray[1 .. $];     // trim off REF
552     }
553     /// Alternate alleles getter version 2: "A,ACTG,..."
554     @property string altAllelesAsString()
555     {
556         this.unpack(UnpackLevel.AltAllele);
557 
558         string ret;
559         if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt
560 
561         char[] tmp;
562         tmp.length = this.line.d.m_allele;    // max allocated size in htslib
563 
564         char *a = this.line.d.allele[1];    // ref allele is index 0
565         const char *last = this.line.d.allele[this.line.n_allele - 1];    // pointer to start of last allele
566         size_t i = 0;
567 
568         assert( this.line.d.allele[1] == (this.line.d.als + this.line.rlen + 1) );  // ensue data structure is as we think (+1: past ref's term \0)
569 
570         while (i < this.line.d.m_allele)    // safety stop at max allocated size
571         {
572             if (*a) tmp[i] = *a;
573             else {  // '\0'
574                 if (a < last) tmp[i] = ',';
575                 else break;
576             }
577             i++;
578             a++;
579         }
580         tmp.length = i;
581 
582         return tmp.idup;
583     }
584     /// Set alleles; comma-separated list
585     @property void alleles(string a)
586     {
587         this.unpack(UnpackLevel.AltAllele);
588         if (a == "") {
589             this.line.n_allele = 0;
590             if (this.line.d.m_allele) this.line.d.als[0] = '\0';    // if storage allocated, zero out the REF
591         }
592         else
593             bcf_update_alleles_str(this.vcfheader.hdr, this.line, toStringz(a));
594     }
595     /// Set alleles; array
596     @property void alleles(string[] a)
597     {
598         this.unpack(UnpackLevel.AltAllele);
599         if (a.length == 0) {
600             this.line.n_allele = 0;
601             if (this.line.d.m_allele) this.line.d.als[0] = '\0';    // if storage allocated, zero out the REF
602         }
603         else {
604             const(char)*[64] allelesp;  // will fail if a locus has more than 63 ALT alleles :-O
605             foreach(i; 0 .. a.length ) {
606                 // In order to zero out all alleles, pass a zero-length allele to the string version,
607                 // or a zero-length array to this version. Zero length string member of nonempty allele array is an error.
608                 if (a[i].length == 0) {
609                     hts_log_error(__FUNCTION__, "Zero-length allele in nonempty allele array. Setting to '.'");
610                     allelesp[i] = ".".ptr;
611                 }
612                 else allelesp[i] = toStringz(a[i]);
613             }
614             bcf_update_alleles(this.vcfheader.hdr, this.line, &allelesp[0], cast(int)a.length);
615         }
616     }
617 
618     /// Set REF allele only
619     void setRefAllele(string r)
620     {
621         // first, get REF
622         this.unpack(UnpackLevel.AltAllele);
623         // a valid record could have no ref (or alt) alleles
624         auto alleles = [toUTFz!(char *)(r)];
625         if (this.line.n_allele < 2) // if none or 1 (=REF only), just add the REF we receieved
626             bcf_update_alleles(this.vcfheader.hdr, this.line, alleles.ptr, 1);
627         else {
628             // length of REF allele is allele[1] - allele[0], (minus one more for \0)
629             // TODO ** TODO ** : there is a property line.refLen already
630             const auto reflen = (this.line.d.allele[1] - this.line.d.allele[0]) - 1;
631             alleles ~= this.altAllelesAsArray.map!(toUTFz!(char *)).array;
632             bcf_update_alleles(this.vcfheader.hdr, this.line, alleles.ptr, cast(int)alleles.length);
633         }
634     }
635     /// Set alleles; alt can be comma separated
636     void setAlleles(string _ref, string alt)
637     {
638         immutable string alleles = _ref ~ "," ~ alt ~ "\0";
639         bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr);
640     }
641     /// Set alleles; min. 2 alleles (ref, alt1); unlimited alts may be specified
642     void setAlleles(string _ref, string alt, ...)
643     {
644         string alleles = _ref ~ "," ~ alt;
645 
646         for (int i = 0; i < _arguments.length; i++)
647         {
648             alleles ~= "," ~ va_arg!string(_argptr);
649         }
650 
651         alleles ~= "\0";
652 
653         bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr);
654     }
655 
656 
657     /* Quality (QUAL) */
658     /// Get variant quality (QUAL, column 6)
659     @property float qual()
660     out(result) { assert(result >= 0); }
661     do
662     {
663         return this.line.qual;
664     }
665     /// Set variant quality (QUAL, column 6)
666     @property void qual(float q)
667     in { assert(q >= 0); }
668     do { this.line.qual = q; }
669 
670 
671     /* FILTER */
672     /// Get FILTER column (nothing in htslib sadly)
673     @property string filter()
674     {
675         const(char)[] ret;
676 
677         this.unpack(UnpackLevel.Filter);
678 
679         if (this.line.d.n_flt) {
680             for(int i; i< this.line.d.n_flt; i++) {
681                 if (i) ret ~= ";";
682                 ret ~= fromStringz(this.vcfheader.hdr.id[BCF_DT_ID][ this.line.d.flt[0] ].key);
683             }
684         } else {
685             ret = ".";
686         }
687 
688         return ret.idup;
689     }
690     /// Remove all entries in FILTER
691     void removeAllFilters()
692     {
693         // always returns zero
694         bcf_update_filter(this.vcfheader.hdr, this.line, null, 0);
695     }
696     /// Set the FILTER column to f
697     @property void filter(string f)
698     {
699         this.filter([f]);
700     }
701     /// Set the FILTER column to f0,f1,f2...
702     /// TODO: determine definitiely whether "." is replaced with "PASS"
703     @property void filter(string[] fs)
704     {
705         int[] fids;
706         foreach(f; fs) {
707             const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
708             if (fid == -1){
709                 hts_log_warning(__FUNCTION__, format("ignoring filter not found in header: %s", f.empty() ? "(empty)" : f));
710                 continue;
711             }
712             else fids ~= fid;
713         }
714         if (fids.length > 0)
715             bcf_update_filter(this.vcfheader.hdr, this.line, fids.ptr, cast(int)fids.length);
716         else
717             hts_log_warning(__FUNCTION__, "No FILTER update was performed due to empty list");
718     }
719     /// Add a filter; from htslib: 
720     /// "If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed."
721     int addFilter(string f)
722     {
723         
724         if(f == "") hts_log_warning(__FUNCTION__, "filter was empty");
725         const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
726         if (fid == -1){
727             hts_log_warning(__FUNCTION__, format("ignoring filter not found in header: %s", f.empty() ? "(empty)" : f));
728             return -1;
729         }
730 
731         return bcf_add_filter(this.vcfheader.hdr, this.line, fid);
732     }
733     /// Remove a filter by name
734     int removeFilter(string f)
735     {
736         const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
737         if (fid == -1){
738             hts_log_warning(__FUNCTION__, format("filter not found in header (ignoring): %s", f) );
739             return -1;
740         }
741         return removeFilter(fid);
742     }
743     /// Remove a filter by numeric id
744     int removeFilter(int fid)
745     {
746         return bcf_remove_filter(this.vcfheader.hdr, this.line, fid, 0);
747     }
748     /// Determine whether FILTER is present. log warning if filter does not exist. "PASS" and "." can be used interchangeably.
749     bool hasFilter(string filter)
750     {
751         char[] f = filter.dup ~ '\0';
752         //const auto id = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, f.ptr);
753         const auto ret = bcf_has_filter(this.vcfheader.hdr, this.line, f.ptr);
754 
755         if (ret > 0) return true;
756         else if (ret == 0) return false;
757         else {
758             hts_log_warning(__FUNCTION__, format("FILTER %s does not exist in the header", filter));
759             return false;
760         }
761     }
762 
763 
764     /** Update INFO (pan-sample info; column 8) 
765      *
766      *  Add a tag:value to the INFO column
767      *  NOTE: tag must already exist in the header
768      *
769      *  Templated on data type, calls one of bcf_update_info_{int32,float,string,flag}
770      *  Both singletons and arrays are supported.
771     */
772     void addInfo(T)(string tag, T data)
773     if(isSomeString!T || ((isIntegral!T || isFloatingPoint!T || isBoolean!T) && !isArray!T))
774     {
775         int ret = -1;
776 
777         static if(isIntegral!T) {
778             auto integer = cast(int) data;
779             ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1);
780         }
781         
782         else static if(isFloatingPoint!T) {
783             auto flt = cast(float) data;    // simply passing "2.0" (or whatever) => data is a double
784             ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1);
785         }
786 
787         else static if(isSomeString!T)
788             ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), toStringz(data));
789         
790         else static if(isBoolean!T) {
791             immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 
792             ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set);
793         }
794         
795         if (ret == -1)
796             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
797     }
798     /// ditto
799     void addInfo(T)(string tag, T data)
800     if(isIntegral!(ElementType!T) || isFloatingPoint!(ElementType!T))             // otherwise string::immutable(char)[] will match the other template
801     {
802         int ret = -1;
803 
804         static if(isIntegral!(ElementType!T)) {
805             auto integer = data.to!(int[]);
806             ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), integer.ptr, cast(int)data.length);
807         }
808         
809         else static if(isFloatingPoint!(ElementType!T)) {
810             auto flt = data.to!(float[]);    // simply passing "2.0" (or whatever) => data is a double
811             ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), flt.ptr, cast(int)data.length);
812         }
813         
814         if (ret == -1)
815             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
816     }
817 
818     /** Update FORMAT (sample info; column 9+)
819      *  
820      *  Templated on data type, calls one of bc_update_format_{int32,float,string,flag}
821     */
822     void addFormat(T)(string tag, T data)
823     if(!isArray!T || isSomeString!T)
824     {
825         int ret = -1;
826 
827         static if(isIntegral!T) {
828             auto integer = data.to!int;
829             ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1);
830         }
831         
832         else static if(isFloatingPoint!T) {
833             auto flt = data.to!float;    // simply passing "2.0" (or whatever) => data is a double
834             ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1);
835         }
836 
837         else static if(isSomeString!T){
838             auto strs = [data].map!(toUTFz!(char*)).array;
839             ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), strs.ptr, 1);
840         }
841         
842         else static if(isBoolean!T) {
843             immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 
844             ret = bcf_update_format_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set);
845         }
846         
847         if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data));
848     }
849     /// ditto
850     void addFormat(T)(string tag, T[] data)
851     if((!isArray!T || isSomeString!T) && !is(T == immutable(char)))
852     {
853                 int ret = -1;
854 
855         static if(isIntegral!T) {
856             auto integer = data.to!(int[]);
857             ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag),
858                                             integer.ptr, cast(int)data.length);
859         }
860         
861         else static if(isFloatingPoint!T) {
862             auto flt = data.to!(float[]);    // simply passing "2.0" (or whatever) => data is a double
863             ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag),
864                                             flt.ptr, cast(int)data.length);
865         }
866 
867         else static if(isSomeString!T){
868             auto strs = data.map!(toUTFz!(char*)).array;
869             ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), strs.ptr, cast(int)strs.length);
870         }
871 
872         if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data));
873 
874     }
875 
876     /// remove an info section
877     void removeInfo(string tag)
878     {
879         bcf_update_info(this.vcfheader.hdr,this.line, toStringz(tag),null,0,HeaderTypes.None);
880     }
881 
882     /// remove a format section
883     void removeFormat(string tag)
884     {
885         bcf_update_format(this.vcfheader.hdr,this.line, toStringz(tag),null,0,HeaderTypes.None);
886     }
887 
888 
889     /// add INFO or FORMAT key:value pairs to a record
890     /// add a single datapoint OR vector of values, OR, values to each sample (if lineType == FORMAT)
891     void add(HeaderRecordType lineType, T)(const(char)[] tag, T data)
892     if((lineType == HeaderRecordType.Info || lineType == HeaderRecordType.Format) &&
893         (isIntegral!T       || isIntegral!(ElementType!T)   ||
894         isFloatingPoint!T   || isFloatingPoint!(ElementType!T) ||
895         isSomeString!T      || isSomeString!(ElementType!T) ||
896         isBoolean!T         || isBoolean!(ElementType!T)))
897     {
898         int ret = -1;
899         int len;
900 
901         static if (!isDynamicArray!T) {
902             len = 1;
903             static if (isIntegral!T) auto d = data.to!int;
904             else static if (isFloatingPoint!T) auto d = data.to!float;
905             else static if (isSomeString!T) auto d = toStringz(data);
906             else static if (isBoolean!T) immutable int d = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag
907             else static assert(0);
908 
909             const auto ptr = &d;
910         }
911         else static if (isDynamicArray!T) {
912             assert(data.length < int.max);
913             len = cast(int) data.length;
914             static if (isIntegral!(ElementType!T)) auto d = data.to!(int[]);
915             else static if (isFloatingPoint!(ElementType!T)) auto d = data.to!(float[]);
916             else static if (isSomeString!(ElementType!T)) {
917                 char[] d;
918                 foreach(s; data) {
919                     d ~= s ~ "\0";
920                 }
921                 if(d.length == 0) d ~= "\0";    // TODO replace with asserts on data length earlier
922             }
923             else static if (isBoolean!(ElementType!T)) {
924                 int[] d;
925                 foreach(b; data) {
926                     d ~= (b ? 1 : 0);
927                 }
928             }
929 
930             const auto ptr = d.ptr;
931         }
932         else static assert(0);
933 
934         static if (lineType == HeaderRecordType.Info) {
935             static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int))
936                 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
937             else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float))
938                 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
939             else static if (isSomeString!T || isSomeString!(ElementType!T))
940                 ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr);
941             else static if (is(T == bool) || is(ElementType!T == bool))
942                 ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
943             else static assert(0, "Type not recognized for INFO tag");
944         }
945         else static if (lineType == HeaderRecordType.Format) {
946             static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int))
947                 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
948             else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float))
949                 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
950             else static if (isSomeString!T || isSomeString!(ElementType!T))
951                 ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
952             else static assert(0, "Type not recognized for FORMAT tag");
953         }
954         else static assert(0);
955 
956         if (ret == -1)
957             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
958     }
959 
960     /// returns a hashmap of info data by field
961     InfoField[string] getInfos()
962     {
963         /// UnpackLevel
964         this.unpack(UnpackLevel.Info);
965 
966         /// copy some data
967         InfoField[string] infoMap;
968         bcf_info_t[] infos = this.line.d.info[0..this.line.n_info].dup;
969 
970         foreach (bcf_info_t info; infos)
971         {
972             /// if variable data is null then value is set for deletion
973             /// skip
974             if(!info.vptr) continue;
975             auto key = fromStringz(this.vcfheader.hdr.id[HeaderDictTypes.Id][info.key].key).idup;
976             infoMap[key] = InfoField(key, &info, this.line);
977         }
978         return infoMap;
979     }
980 
981     /// returns a hashmap of format data by field
982     FormatField[string] getFormats()
983     {
984         this.unpack(UnpackLevel.Format);
985         FormatField[string] fmtMap;
986         bcf_fmt_t[] fmts = this.line.d.fmt[0..this.line.n_fmt].dup;
987         foreach (bcf_fmt_t fmt; fmts)
988         {
989             /// if variable data is null then value is set for deletion
990             /// skip
991             if(!fmt.p) continue;
992             auto key = fromStringz(this.vcfheader.hdr.id[HeaderDictTypes.Id][fmt.id].key).idup;
993             fmtMap[key] = FormatField(key, &fmt, this.line);
994         }
995         return fmtMap;
996     }
997 
998     /// Return a string representation of the VCFRecord (i.e. as would appear in .vcf)
999     ///
1000     /// As a bonus, there is a kstring_t memory leak
1001     string toString() const
1002     {
1003         kstring_t s;
1004 
1005         const int ret = vcf_format(this.vcfheader.hdr, this.line, &s);
1006         if (ret)
1007         {
1008             hts_log_error(__FUNCTION__,
1009                 format("vcf_format returned nonzero (%d) (likely EINVAL, invalid bcf1_t struct?)", ret));
1010             return "[VCFRecord vcf_format parse_error]";
1011         }
1012 
1013         return cast(string) s.s[0 .. s.l];
1014     }
1015 }
1016 
1017 
1018 
1019 
1020 
1021 ///
1022 debug(dhtslib_unittest)
1023 unittest
1024 {
1025     import std.exception: assertThrown;
1026     import std.stdio: writeln, writefln;
1027     import dhtslib.vcf.writer;
1028 
1029     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1030 
1031 
1032     auto vw = VCFWriter("/dev/null");
1033 
1034     vw.addHeaderLineRaw("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">");
1035     vw.addHeaderLineKV("INFO", "<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">");
1036     // ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
1037     vw.vcfhdr.addHeaderLine!(HeaderRecordType.Info)("AF", HeaderLengths.OnePerAltAllele, HeaderTypes.Integer, "Number of Samples With Data");
1038     vw.addHeaderLineRaw("##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>"); // @suppress(dscanner.style.long_line)
1039     vw.addHeaderLineRaw("##FILTER=<ID=q10,Description=\"Quality below 10\">");
1040     HeaderRecord hrec;
1041     hrec.setHeaderRecordType(HeaderRecordType.Format);
1042     hrec.setID("AF");
1043     hrec.setLength(HeaderLengths.OnePerAltAllele);
1044     hrec.setValueType(HeaderTypes.Float);
1045     hrec.setDescription("Allele Freq");
1046     vw.vcfhdr.addHeaderRecord(hrec);
1047     
1048     assert(vw.vcfhdr.getHeaderRecord(HeaderRecordType.Format, "ID","AF")["ID"] == "AF");
1049     vw.addHeaderLineRaw("##FORMAT=<ID=CH,Number=3,Type=String,Description=\"test\">");
1050 
1051     // Exercise header
1052     assert(vw.vcfhdr.nsamples == 0);
1053     vw.addSample("NA12878");
1054     assert(vw.vcfhdr.nsamples == 1);
1055 
1056     vw.writeHeader();
1057 
1058     auto r = new VCFRecord(vw.vcfhdr, bcf_init1());
1059     
1060     r.chrom = "20";
1061     assert(r.chrom == "20");
1062     assertThrown(r.chrom = "chr20");   // headerline chromosome is "20" not "chr20"
1063 
1064     r.pos = Coordinate!(Basis.zero)(999_999);
1065     assert(r.pos == Coordinate!(Basis.zero)(999_999));
1066     r.pos = Coordinate!(Basis.zero)(62_435_964 + 1);     // Exceeds contig length
1067 
1068     // Test ID field
1069     // note that in an empty freshly initialized bcf1_t, the ID field is "" rather than "."
1070     // Will consider adding initialization code
1071     //writefln("ID: %s", r.id);
1072     r.id = "";
1073     assert(r.id == ".");
1074     r.id = ".";
1075     assert(r.id == ".");
1076     r.id = "rs001";
1077     assert(r.id == "rs001");
1078     r.addID("rs999");
1079     assert(r.id == "rs001;rs999");
1080     r.addID("rs001");   // test duplicate checking
1081     assert(r.id == "rs001;rs999");
1082 
1083     // Test REF/ALT allele setters and getters
1084     // many overloads to test for good coverage
1085     // Test matrix: Set {zero, one, two, three} alleles * {set by string, set by array} = 8 test cases
1086     //  * also, there is setAlleles(ref, alt) and setAlleles(ref, alt1, ...) 
1087     //  * TODO: will also need to retreive alt alleles as array and string
1088 
1089     string[] alleles_array;
1090 
1091     // Zero alleles
1092     r.alleles("");
1093     const auto zs = r.allelesAsArray;
1094     const auto zsr = r.refAllele;
1095     assert(zs.length == 0);
1096     assert(zsr == "");
1097 
1098     r.alleles(alleles_array);
1099     const auto za = r.allelesAsArray;
1100     const auto zar = r.refAllele;
1101     assert(za.length == 0);
1102     assert(zar == "");
1103     
1104     // One allele
1105     r.alleles("C");
1106     const auto os = r.allelesAsArray;
1107     const auto osr = r.refAllele;
1108     assert(os.length == 1 && os[0] == "C");
1109     assert(osr == "C");
1110 
1111     r.alleles(["C"]);
1112     const auto oa = r.allelesAsArray;
1113     const auto oar = r.refAllele;
1114     assert(oa.length == 1 && oa[0] == "C");
1115     assert(oar == "C");
1116 
1117     // Two alleles
1118     r.alleles("C,T");
1119     const auto ts = r.allelesAsArray;
1120     const auto tsr= r.refAllele;
1121     assert(ts == ["C", "T"]);
1122     assert(tsr== "C");
1123 
1124     r.alleles(["C", "T"]);
1125     const auto ta = r.allelesAsArray;
1126     const auto tar= r.refAllele;
1127     assert(ta == ["C", "T"]);
1128     assert(tar== "C");
1129 
1130     const taaa = r.altAllelesAsArray;
1131     const taas = r.altAllelesAsString;
1132     assert(taaa == ["T"]);
1133     assert(taas == "T");
1134 
1135     // alternate setter for >= 2 alleles
1136     r.setAlleles("A", "G");
1137     assert(r.allelesAsArray == ["A", "G"]);
1138 
1139     // Three alleles
1140     r.alleles("A,C,T");
1141     assert(r.allelesAsArray == ["A", "C", "T"]);
1142     assert(r.refAllele == "A");
1143     assert(r.altAllelesAsString == "C,T");
1144 
1145     // alternate the alleles for testing purposes
1146     r.alleles(["G", "A", "C"]);
1147     assert(r.allelesAsArray == ["G", "A", "C"]);
1148     assert(r.refAllele == "G");
1149     assert(r.altAllelesAsString == "A,C");
1150 
1151     r.setAlleles("A", "C", "T");
1152     assert(r.allelesAsArray == ["A", "C", "T"]);
1153     assert(r.refAllele == "A");
1154     assert(r.altAllelesAsString == "C,T");
1155 
1156     r.setRefAllele("G");
1157     assert(r.allelesAsArray == ["G", "C", "T"]);
1158     assert(r.refAllele == "G");
1159     assert(r.altAllelesAsString == "C,T");
1160 
1161     r.setRefAllele("A");
1162     assert(r.allelesAsArray == ["A", "C", "T"]);
1163     assert(r.refAllele == "A");
1164     assert(r.altAllelesAsString == "C,T");
1165 
1166 
1167 
1168     // Test QUAL
1169     r.qual = 1.0;
1170     assert(r.qual == 1.0);
1171     // now test setting qual without UnpackLeveling
1172     // TODO: see https://forum.dlang.org/post/hebouvswxlslqhovzaia@forum.dlang.org, once resolved (or once factory function written),
1173     //  add template value param UnpackLevel.ALT
1174     auto rr = new VCFRecord(vw.vcfhdr, "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;AF=0.017\n"); // @suppress(dscanner.style.long_line)
1175     rr.qual = 3.0;
1176     assert(rr.qual == 3.0);
1177 
1178 
1179     // Test FILTER
1180     assert(r.hasFilter("PASS"));
1181     assert(r.hasFilter("."));
1182     // add q10 filter (is in header)
1183     r.filter = "q10";
1184     assert(r.filter == "q10");
1185     assert(!r.hasFilter("PASS"));   // if has another filter, no longer PASS (unless added explicitly)
1186     assert(!r.hasFilter("."));      // if has another filter, no longer has . (which is PASS, or no filter [spec conflict?])
1187 
1188     // add q30 filteR (not in header)
1189     r.filter = "q30";
1190     assert(r.filter == "q10");  // i.e., unchanged
1191 
1192     assert(r.hasFilter("q10"));
1193     assert(!r.hasFilter("q30"));
1194 
1195     r.removeAllFilters();
1196     assert(r.hasFilter("."));
1197     r.addFilter("q10");
1198     assert(r.hasFilter("q10"));
1199     r.removeFilter("q10");
1200     assert(r.hasFilter("PASS"));
1201     assert(r.toString == "20\t62435966\trs001;rs999\tA\tC,T\t1\t.\t.\n");
1202 
1203     r.addFormat("AF",[0.1, 0.5]);
1204     assert(r.toString == "20\t62435966\trs001;rs999\tA\tC,T\t1\t.\t.\tAF\t0.1,0.5\n");
1205 
1206     rr.addFormat("AF",[0.1]);
1207     writeln(rr.toString);
1208     assert(rr.toString == "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;AF=0\tAF\t0.1\n");
1209 
1210 
1211     assert(rr.getFormats["AF"].to!float[0][0] == 0.1f);
1212 
1213     rr.addInfo("AF",0.5);
1214     assert(rr.getInfos["AF"].to!float == 0.5f);
1215     
1216     rr.removeInfo("AF");
1217 
1218     rr.removeFormat("AF");
1219 
1220     rr.addFormat("CH", "test");
1221 
1222     
1223     writeln(rr.toString);
1224     writeln(rr.getFormats["CH"].to!string);
1225     assert(rr.getFormats["CH"].to!string[0][0] == "test");
1226 
1227     assert(rr.toString == "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11\tCH\ttest\n");
1228 
1229     assert(rr.coordinates == ZBHO(17329,17330));
1230     vw.writeRecord(*r);
1231     vw.writeRecord(vw.vcfhdr.hdr, r.line);
1232 
1233 
1234     // Finally, print the records:
1235     writefln("VCF records via toString:\n%s%s", (*r), (*rr));
1236 
1237     writeln("\ndhtslib.vcf: all tests passed\n");
1238 }
1239 
1240 debug(dhtslib_unittest) unittest
1241 {
1242     import dhtslib.util;
1243     import std.stdio;
1244     import htslib.hts_log;
1245     import std.algorithm : map, count;
1246     import std.array : array;
1247     import std.path : buildPath, dirName;
1248     import std.math : approxEqual, isNaN;
1249     import dhtslib.tabix;
1250     import dhtslib.vcf;
1251     hts_set_log_level(htsLogLevel.HTS_LOG_INFO);
1252     hts_log_info(__FUNCTION__, "Testing VCFReader");
1253     hts_log_info(__FUNCTION__, "Loading test file");
1254     auto fn = buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","tabix","vcf_file.vcf.gz");
1255     auto tbx = TabixIndexedFile(fn);
1256     auto reg = getIntervalFromString("1:3000151-3062916");
1257     auto vcf = VCFReader(tbx, reg.contig, reg.interval);
1258     assert(!vcf.empty);
1259 
1260     VCFRecord rec = vcf.front;
1261     assert(rec.getInfos["AN"].to!int == 4);
1262     rec.addInfo("AN", rec.getInfos["AN"].to!int + 1);
1263     assert(rec.getInfos["AN"].to!int == 5);
1264     assert(rec.getInfos["AN"].to!byte == 5);
1265     assert(rec.getInfos["AN"].to!short == 5);
1266     assert(rec.getInfos["AN"].to!long == 5);
1267 
1268     assert(rec.getInfos["AN"].to!uint == 5);
1269     assert(rec.getInfos["AN"].to!ubyte == 5);
1270     assert(rec.getInfos["AN"].to!ushort == 5);
1271     assert(rec.getInfos["AN"].to!ulong == 5);
1272 
1273     assert(rec.getInfos["AN"].to!float.isNaN);
1274 
1275     rec.addInfo("AN", 128);
1276     assert(rec.getInfos["AN"].to!int == 128);
1277     assert(rec.getInfos["AN"].to!byte == 0);
1278     assert(rec.getInfos["AN"].to!short == 128);
1279     assert(rec.getInfos["AN"].to!long == 128);
1280 
1281     rec.addInfo("AN", 32768);
1282     assert(rec.getInfos["AN"].to!int == 32768);
1283     assert(rec.getInfos["AN"].to!byte == 0);
1284     assert(rec.getInfos["AN"].to!short == 0);
1285     assert(rec.getInfos["AN"].to!long == 32768);
1286 
1287     assert(rec.getInfos["AN"].to!long == 32768);
1288 
1289     // rec.addInfo("AN", 2147483648);
1290     // assert(rec.getInfos["AN"].to!int == 0);
1291     // assert(rec.getInfos["AN"].to!byte == 0);
1292     // assert(rec.getInfos["AN"].to!short == 0);
1293     // assert(rec.getInfos["AN"].to!long == 2147483648);
1294 
1295     vcf.popFront;
1296     rec = vcf.front;
1297     assert(rec.refAllele == "GTTT");
1298     assert(rec.getInfos["INDEL"].to!bool == true);
1299 
1300     assert(rec.getInfos["DP4"].to!(int[]) == [1, 2, 3, 4]);
1301     assert(rec.getInfos["DP4"].to!(byte[]) == [1, 2, 3, 4]);
1302     assert(rec.getInfos["DP4"].to!(short[]) == [1, 2, 3, 4]);
1303     assert(rec.getInfos["DP4"].to!(long[]) == [1, 2, 3, 4]);
1304 
1305     assert(rec.getInfos["DP4"].to!(uint[]) == [1, 2, 3, 4]);
1306     assert(rec.getInfos["DP4"].to!(ubyte[]) == [1, 2, 3, 4]);
1307     assert(rec.getInfos["DP4"].to!(ushort[]) == [1, 2, 3, 4]);
1308     assert(rec.getInfos["DP4"].to!(ulong[]) == [1, 2, 3, 4]);
1309 
1310     assert(rec.getInfos["DP4"].to!(float[]) == []);
1311     rec.addInfo("DP4", [2, 2, 3, 4]);
1312     assert(rec.getInfos["DP4"].to!(int[]) == [2, 2, 3, 4]);
1313 
1314     assert(rec.getFormats["GQ"].to!int.array == [[409], [409]]);
1315 
1316     rec.addFormat("GQ", [32768,32768]);
1317 
1318     assert(rec.getFormats["GQ"].to!byte.array == []);
1319     assert(rec.getFormats["GQ"].to!short.array == []);
1320     assert(rec.getFormats["GQ"].to!int[0][0] == 32768);
1321     
1322 
1323     assert(rec.getInfos["STR"].to!string == "test");
1324 
1325     assert(rec.getInfos["DP4"].to!string == "");
1326 
1327     assert(rec.getFormats["GT"].to!int.array == [[2, 4], [2, 4]]);
1328     
1329 
1330     vcf.popFront;
1331     rec = vcf.front;
1332 
1333     auto fmts = rec.getFormats;
1334     auto sam = vcf.vcfhdr.getSampleId("A");
1335     assert(fmts["GT"].to!int[sam] == [2, 4]);
1336     sam = vcf.vcfhdr.getSampleId("B");
1337     assert(fmts["GT"].to!int[sam] == [6, -127]);
1338 }