1 /** Module provides VCF Reader/Writer
2 
3 VCF version 4.2 (including BCF) reader and writer, including
4 a model with convenience functions for the header (metadata)
5 and individual VCF/BCF records (rows).
6 
7 Specifications: https://samtools.github.io/hts-specs/VCFv4.2.pdf
8 
9 */
10 module dhtslib.vcf.record;
11 
12 import core.stdc.string;
13 import core.vararg;
14 
15 import std.conv: to, ConvException;
16 import std.format: format;
17 import std.range;
18 import std.string: fromStringz, toStringz;
19 import std.utf : toUTFz;
20 import std.algorithm : map, filter, count;
21 import std.array : array;
22 import std.traits;
23 import std.meta : staticIndexOf;
24 
25 import dhtslib.vcf;
26 import dhtslib.coordinates;
27 import dhtslib.memory;
28 import htslib.hts_log;
29 import htslib.kstring;
30 import htslib.vcf;
31 
32 alias BCFRecord = VCFRecord;
33 
34 /// Struct to aid in conversion of VCF info data
35 /// into D types
36 struct InfoField
37 {
38     /// String identifier of info field
39     string key;
40     /// BCF TYPE indicator
41     BcfRecordType type;
42     /// number of data elements
43     int len;
44     /// reference of info field data
45     ubyte[] data;
46 
47     /// VCFRecord refct
48     Bcf1 line;
49 
50     /// ctor from VCFRecord
51     this(string key, bcf_info_t * info, Bcf1 line)
52     {
53         this.line = line;
54         this(key, info);
55     }
56 
57     /// Explicit postblit to avoid 
58     /// https://github.com/blachlylab/dhtslib/issues/122
59     this(this)
60     {
61         this.line = line;
62         this.key = key;
63         this.type = type;
64         this.data = data;
65     }
66 
67     /// string, info field ctor
68     this(string key, bcf_info_t * info)
69     {
70         /// get type
71         this.type = cast(BcfRecordType)(info.type & 0b1111);
72         /// get len
73         this.len = info.len;
74         /// copy data
75         this.data = info.vptr[0.. info.vptr_len];
76         /// double check our lengths
77         debug{
78             final switch(this.type){
79                 case BcfRecordType.Int8:
80                     assert(data.length == this.len * byte.sizeof);
81                     break;
82                 case BcfRecordType.Int16:
83                     assert(data.length == this.len * short.sizeof);
84                     break;
85                 case BcfRecordType.Int32:
86                     assert(data.length == this.len * int.sizeof);
87                     break;
88                 case BcfRecordType.Int64:
89                     assert(data.length == this.len * long.sizeof);
90                     break;
91                 case BcfRecordType.Float:
92                     assert(data.length == this.len * float.sizeof);
93                     break;
94                 case BcfRecordType.Char:
95                     assert(data.length == this.len * char.sizeof);
96                     break;
97                 case BcfRecordType.Null:
98                     assert(data.length == 0);
99             }
100         }
101     }
102 
103     /// convert to a D type array if int, float, or bool type array
104     T[] to(T: T[])()
105     if(isIntegral!T || is(T == float) || isBoolean!T)
106     {
107         static if(isIntegral!T){
108             /// if we select the correct type just slice and return
109             if(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType))
110                 return (cast(T *)this.data.ptr)[0 .. this.len];
111             /// if we select type that is too small log error and return
112             if(RecordTypeSizes[this.type] > T.sizeof)
113             {
114                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
115                 return [];
116             }
117             T[] ret;
118             /// if we select type that is bigger slice, convert and return
119             switch(this.type){
120                 case BcfRecordType.Int8:
121                     ret = ((cast(byte *)this.data.ptr)[0 .. this.len]).to!(T[]);
122                     break;
123                 case BcfRecordType.Int16:
124                     ret = ((cast(short *)this.data.ptr)[0 .. this.len]).to!(T[]);
125                     break;
126                 case BcfRecordType.Int32:
127                     ret = ((cast(int *)this.data.ptr)[0 .. this.len]).to!(T[]);
128                     break;
129                 case BcfRecordType.Int64:
130                     ret = ((cast(long *)this.data.ptr)[0 .. this.len]).to!(T[]);
131                     break;
132                 default:
133                     hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(this.type, T.stringof));
134                     return [];
135             }
136             return ret;
137         }else{
138             /// if we select type the wrong type error and return
139             if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)))
140             {
141                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
142                 return [];
143             }
144             return (cast(T *)this.data.ptr)[0 .. this.len];
145         }
146     }
147 
148     /// convert to a D type if int, float, or bool type
149     T to(T)()
150     if(isIntegral!T || is(T == float) || isBoolean!T)
151     {
152         /// if bool return true
153         static if(isBoolean!T)
154             return true;
155         else{
156             /// if info field has > 1 element error and return
157             if(this.len != 1){
158                 hts_log_error(__FUNCTION__, "This info field has a length of %d not 1".format(this.len));
159                 return T.init;
160             }
161             static if(isIntegral!T){
162                 /// if we select type that is too small log error and return
163                 if(RecordTypeSizes[this.type] > T.sizeof)
164                 {
165                     hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
166                     return T.init;
167                 }
168                 /// just gonna use the array impl and grab index 0
169                 return this.to!(T[])[0];
170             }else{
171                 /// if we select type the wrong type error and return
172                 if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)))
173                 {
174                     hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
175                     return T.init;
176                 }
177                 /// just gonna use the array impl and grab index 0
178                 return this.to!(T[])[0];
179             }
180         }   
181     }
182 
183     /// convert to a string
184     T to(T)()
185     if(isSomeString!T)
186     {
187         /// if we select type the wrong type error and return
188         if(!(this.type == BcfRecordType.Char))
189         {
190             hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
191             return T.init;
192         }
193         /// otherwise slice and dup
194         return cast(T)(cast(char *)this.data.ptr)[0 .. this.len].dup;
195     }
196 }
197 
198 /// Struct to aid in conversion of VCF format data
199 /// into D types
200 struct FormatField
201 {
202     /// String identifier of info field
203     string key;
204     /// BCF TYPE indicator
205     BcfRecordType type;
206     /// number of samples
207     int nSamples;
208     /// number of data elements per sample
209     int n;
210     /// number of bytes per sample
211     int size;
212     /// reference of info field data
213     ubyte[] data;
214 
215     /// VCFRecord refct
216     Bcf1 line;
217 
218     /// ctor from VCFRecord
219     this(string key, bcf_fmt_t * fmt, Bcf1 line)
220     {
221         this.line = line;
222         this(key, fmt);
223     }
224 
225     /// Explicit postblit to avoid 
226     /// https://github.com/blachlylab/dhtslib/issues/122
227     this(this)
228     {
229         this.line = line;
230         this.key = key;
231         this.type = type;
232         this.nSamples = nSamples;
233         this.n = n;
234         this.size = size;
235         this.data = data;
236     }
237 
238     /// string and format ctor
239     this(string key, bcf_fmt_t * fmt)
240     {
241         /// set all our data
242         this.key = key;
243         this.type = cast(BcfRecordType)(fmt.type & 0b1111);
244         this.n = fmt.n;
245         this.nSamples = fmt.p_len / fmt.size;
246         this.data = fmt.p[0 .. fmt.p_len].dup;
247         this.size = fmt.size;
248         /// double check our work
249         debug{
250             final switch(this.type){
251                 case BcfRecordType.Int8:
252                     assert(data.length == this.nSamples * this.n * byte.sizeof);
253                     break;
254                 case BcfRecordType.Int16:
255                     assert(data.length == this.nSamples * this.n * short.sizeof);
256                     break;
257                 case BcfRecordType.Int32:
258                     assert(data.length == this.nSamples * this.n * int.sizeof);
259                     break;
260                 case BcfRecordType.Int64:
261                     assert(data.length == this.nSamples * this.n * long.sizeof);
262                     break;
263                 case BcfRecordType.Float:
264                     assert(data.length == this.nSamples * this.n * float.sizeof);
265                     break;
266                 case BcfRecordType.Char:
267                     assert(data.length == this.nSamples * this.n * char.sizeof);
268                     break;
269                 case BcfRecordType.Null:
270                     assert(data.length == 0);
271             }
272         }
273     }
274 
275     /// convert to a D type array if int, float, bool, or string type array
276     /// very similar to InfoField.to 
277     /// This returns chunks as we separated FORMAT values by sample
278     auto to(T)()
279     if(isIntegral!T || is(T == float) || isBoolean!T || isSomeString!T)
280     {
281         T[] ret;
282         static if(isIntegral!T){
283             if(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)){
284                 ret = (cast(T *)this.data.ptr)[0 .. this.n * this.nSamples * T.sizeof];
285                 return ret.chunks(this.n);
286             }
287             if(RecordTypeSizes[this.type] > T.sizeof)
288             {
289                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
290                 return ret.chunks(this.n);
291             }
292             switch(this.type){
293                 case BcfRecordType.Int8:
294                     ret = ((cast(byte *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]);
295                     break;
296                 case BcfRecordType.Int16:
297                     ret = ((cast(short *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]);
298                     break;
299                 case BcfRecordType.Int32:
300                     ret = ((cast(int *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]);
301                     break;
302                 case BcfRecordType.Int64:
303                     ret = ((cast(long *)this.data.ptr)[0 .. this.n * this.nSamples]).to!(T[]);
304                     break;
305                 default:
306                     hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(this.type, T.stringof));
307                     return ret.chunks(this.n);
308             }
309         }else static if(isSomeString!T){
310             if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)))
311             {
312                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
313                 return ret.chunks(this.n);
314             }
315             auto ptr = this.data.ptr;
316             for(auto i=0; i < nSamples; i++)
317             {
318                 auto chrArr = (cast(char *)ptr)[i*size .. (i+1) * size];
319                 if(chrArr[$-1])
320                     ret ~= chrArr.idup;
321                 else
322                     ret ~= fromStringz(chrArr.ptr).idup;
323             }
324         }else{
325             if(!(this.type == cast(BcfRecordType) staticIndexOf!(T, RecordTypeToDType)))
326             {
327                 hts_log_error(__FUNCTION__, "Cannot convert %s to %s".format(type, T.stringof));
328                 return ret.chunks(this.n);
329             }
330             ret = (cast(T *)this.data.ptr)[0 .. this.n * this.nSamples];
331         }
332         static if(isSomeString!T)
333             return ret.chunks(1);
334         else 
335             return ret.chunks(this.n);
336     }
337 }
338 
339 /// Represents individual GT values as encoded in BCF. 
340 /// This is described in the VCF 4.2 spec section 6.3.3.
341 /// In summary it can be an int, short, or byte and 
342 /// encodes an allele value and a phased flag
343 /// e.g (allele + 1) << 1 | phased
344 union GT(T) 
345 {
346     import std.bitmanip : bitfields;
347     T raw;
348     mixin(bitfields!(
349         bool, "phased", 1,
350         T, "allele", (T.sizeof * 8) - 1,
351         ));
352     
353     /// get allele value
354     auto getAllele() const {
355         return this.allele - 1;
356     }
357 
358     /// set allele value
359     void setAllele(int val) {
360         this.allele = cast(T)(val + 1);
361     }
362 
363     /// check if value is missing
364     bool isMissing() const {
365         return this.allele == 0;
366     }
367 
368     /// set value as missing
369     void setMissing() {
370         this.allele = T(0);
371     }
372 
373     /// check if value is padding
374     bool isPadding() {
375         static if (is(T == byte)) {
376             return cast(bool)((this.raw & 0xFE) == 0x80);
377         } else static if (is(T == short)) {
378             return cast(bool)((this.raw & 0xFFFE) == 0x8000);
379         } else static if (is(T == int)) {
380             return cast(bool)((this.raw & 0xFFFFFFFE) == 0x80000000);
381         }
382     }
383 
384 }
385 
386 /// Struct to represent VCF/BCF genotype data
387 struct Genotype
388 {
389     /// Type of underlying format data
390     BcfRecordType type;
391     /// Array of encoded genotypes
392     void[] data;
393 
394     /// VCFRecord refct
395     Bcf1 line;
396 
397     /// ctor from FormatField
398     this(FormatField gt, ulong sampleIdx)
399     {
400         this.line = gt.line;
401         this.type = gt.type;
402         final switch(this.type){
403             case BcfRecordType.Int8:
404                 this.data = gt.to!byte()[sampleIdx];
405                 break;
406             case BcfRecordType.Int16:
407                 this.data = gt.to!short()[sampleIdx];
408                 break;
409             case BcfRecordType.Int32:
410                 this.data = gt.to!int()[sampleIdx];
411                 break;
412             case BcfRecordType.Int64:
413             case BcfRecordType.Float:
414             case BcfRecordType.Char:
415             case BcfRecordType.Null:
416                 assert(0);
417         }
418     }
419 
420     /// Explicit postblit to avoid 
421     /// https://github.com/blachlylab/dhtslib/issues/122
422     this(this)
423     {
424         this.type = type;
425         this.data = data;
426         this.line = line;
427     }
428     
429     bool isNull()
430     {
431         final switch(this.type){
432             case BcfRecordType.Int8:
433                 return (cast(GT!byte[])(this.data)).filter!(x=>!x.isMissing).empty;
434             case BcfRecordType.Int16:
435                 return (cast(GT!short[])(this.data)).filter!(x=>!x.isMissing).empty;
436             case BcfRecordType.Int32:
437                 return (cast(GT!int[])(this.data)).filter!(x=>!x.isMissing).empty;
438             case BcfRecordType.Int64:
439             case BcfRecordType.Float:
440             case BcfRecordType.Char:
441             case BcfRecordType.Null:
442                 assert(0);
443         }
444     }
445 
446     auto alleles()
447     {
448         final switch(this.type){
449             case BcfRecordType.Int8:
450                 return (cast(GT!byte[])(this.data)).filter!(x=>!x.isPadding).map!(x=>x.getAllele).array;
451             case BcfRecordType.Int16:
452                 return (cast(GT!short[])(this.data)).filter!(x=>!x.isPadding).map!(x=>x.getAllele).array;
453             case BcfRecordType.Int32:
454                 return (cast(GT!int[])(this.data)).filter!(x=>!x.isPadding).map!(x=>x.getAllele).array;
455             case BcfRecordType.Int64:
456             case BcfRecordType.Float:
457             case BcfRecordType.Char:
458             case BcfRecordType.Null:
459                 assert(0);
460         }
461     }
462 
463     /// get genotype ploidy
464     auto getPloidy() {
465         final switch(this.type){
466             case BcfRecordType.Int8:
467                 return (cast(GT!byte[])(this.data)).filter!(x=>!x.isPadding).count;
468             case BcfRecordType.Int16:
469                 return (cast(GT!short[])(this.data)).filter!(x=>!x.isPadding).count;
470             case BcfRecordType.Int32:
471                 return (cast(GT!int[])(this.data)).filter!(x=>!x.isPadding).count;
472             case BcfRecordType.Int64:
473             case BcfRecordType.Float:
474             case BcfRecordType.Char:
475             case BcfRecordType.Null:
476                 assert(0);
477         }
478     }
479     /// mixin for genotype printing
480     auto toString(T)() const {
481         string ret;
482         foreach(i, gt; cast(GT!T[]) this.data) {
483             if(i)
484                 ret ~= ['/','|'][gt.phased];
485             if(gt.isPadding) {
486                 ret = ret[0..$-1];
487                 break;
488             }
489             if (gt.getAllele() == -1) {
490                 ret ~= '.';
491             } else { 
492                 ret ~= gt.getAllele().to!string;
493             }
494         }
495         return ret;
496     }
497     /// get string representation
498     auto toString() const {
499         final switch(this.type){
500             case BcfRecordType.Int8:
501                 return this.toString!byte;
502             case BcfRecordType.Int16:
503                 return this.toString!short;
504             case BcfRecordType.Int32:
505                 return this.toString!int;
506             case BcfRecordType.Int64:
507             case BcfRecordType.Float:
508             case BcfRecordType.Char:
509             case BcfRecordType.Null:
510                 assert(0);
511         }
512     }
513 }
514 
515 
516 
517 /** Wrapper around bcf1_t 
518 
519     Because it uses bcf1_t internally, it must conform to the BCF2 part
520     of the VCFv4.2 specs, rather than the loosey-goosey VCF specs. i.e.,
521     INFO, CONTIG, FILTER records must exist in the header.
522 
523     TODO: Does this need to be kept in a consistent state?
524     Ideally, VCFWriter would reject invalid ones, but we are informed
525     that it is invalid (e.g. if contig not found) while building this
526     struct; bcf_write1 will actually segfault, unfortunately. I'd like
527     to avoid expensive validate() calls for every record before writing
528     if possible, which means keeping this consistent. However, not
529     sure what to do if error occurs when using the setters herein?
530 
531     2019-01-23 struct->class to mirror SAMRecord -- faster if reference type?
532 
533     2019-01-23 WIP: getters for chrom, pos, id, ref, alt are complete (untested)
534 
535 After parsing a BCF or VCF line, bcf1_t must be UnpackLeveled. (not applicable when building bcf1_t from scratch)
536 Depending on information needed, this can be done to various levels with performance tradeoff.
537 Unpacking symbols:
538 UNPACK.ALL: all
539 UNPACK.SHR: all shared information (UNPACK.ALT|UNPACK.FILT|UNPACK.INFO)
540 
541                         UnpackLevel.ALT
542                        /               UnpackLevel.FILT
543                       |               /      UnpackLevel.INFO
544                       |              |      /       ____________________________ UnpackLevel.FMT
545                       V              V     V       /       |       |       |
546 #CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  NA00001 NA00002 NA00003 ...
547 
548 */
549 struct VCFRecord
550 {
551     Bcf1 line;   /// htslib structured record TODO: change to 'b' for better internal consistency? (vcf.h/c actually use line quite a bit in fn params)
552 
553     VCFHeader vcfheader;   /// corresponding header (required);
554 
555     /** VCFRecord
556 
557     Construct a bcf/vcf record, backed by bcf1_t, from: an existing bcf1_t, parameters, or a VCF line.
558 
559     Internal backing by bcf1_t means it must conform to the BCF2 rules -- i.e., header must contain
560     appropriate INFO, CONTIG, and FILTER lines.
561 
562     Protip: specifying alternate MAX_UNPACK can speed it tremendously
563         as it will not UnpackLevel all fields, only up to those requested (see htslib.vcf)
564         For example, UnpackLevel.ALT is up to ALT inclusive, and UnpackLevel.ALT is up to FILTER
565     */
566     this(T)(T h, bcf1_t *b, UnpackLevel MAX_UNPACK = UnpackLevel.None)
567     if(is(T == VCFHeader) || is(T == bcf_hdr_t *))
568     {
569         static if (is(T == VCFHeader)) this.vcfheader = h;
570         else static if (is(T == bcf_hdr_t *)) this.vcfheader = VCFHeader(Bcf_hdr_t(h)); // double free() bug if we don't own bcf_hdr_t h
571         else static if (is(T == bcf_hdr_t)) assert(0);  // ferret out situations that will lead to free() crashes
572         else assert(0);
573 
574         this.line = Bcf1(b);
575 
576         // Now it must be UNPACKed
577         // Protip: specifying alternate MAX_UNPACK can speed it tremendously
578         immutable int ret = bcf_unpack(this.line, MAX_UNPACK);    // unsure what to do c̄ return value // @suppress(dscanner.suspicious.unused_variable)
579     }
580     /// ditto
581     this(SS)(VCFHeader vcfhdr, string chrom, int pos, string id, string _ref, string alt, float qual, SS filter, )
582     if (isSomeString!SS || is(SS == string[]))
583     {
584         this.line = Bcf1(bcf_init1());
585         this.vcfheader = vcfhdr;
586         
587         this.chrom = chrom;
588         this.pos = pos;
589         this.id = id;
590 
591         this.setAlleles(_ref, alt);
592 
593         this.qual = qual;
594         this.filter = filter;
595     }
596     /// ditto
597     this(VCFHeader vcfhdr, string line, UnpackLevel MAX_UNPACK = UnpackLevel.None)
598     {
599         this.vcfheader = vcfhdr;
600 
601         kstring_t kline;
602         auto dupline = line.dup; // slower, but safer than a cast // @suppress(dscanner.suspicious.unmodified)
603 
604         kline.l = dupline.length;
605         kline.m = dupline.length;
606         kline.s = dupline.ptr;
607 
608         this.line = Bcf1(bcf_init1());
609         this.line.max_unpack = MAX_UNPACK;
610 
611         auto ret = vcf_parse(&kline, this.vcfheader.hdr, this.line);
612         if (ret < 0) {
613             hts_log_error(__FUNCTION__, "vcf_parse returned < 0 -- code error or malformed VCF line");
614         } else {
615             ret = bcf_unpack(this.line, MAX_UNPACK);    // unsure what to do c̄ return value
616         }
617     }
618 
619     /// Explicit postblit to avoid 
620     /// https://github.com/blachlylab/dhtslib/issues/122
621     this(this)
622     {
623         this.line = line;
624         this.vcfheader = vcfheader;
625     }
626 
627     /// ensure that vcf variable length data is unpacked to at least desired level
628     pragma(inline, true)
629     void unpack(UnpackLevel unpackLevel)
630     {
631         if (this.line.max_unpack < unpackLevel) bcf_unpack(this.line, unpackLevel);
632     }
633 
634     //----- FIXED FIELDS -----//
635     
636     /* CHROM */
637     /// Get chromosome (CHROM)
638     @property
639     string chrom()
640     {   
641         return fromStringz(bcf_hdr_id2name(this.vcfheader.hdr, this.line.rid)).idup;
642     }
643     /// Set chromosome (CHROM)
644     @property
645     void chrom(const(char)[] c)
646     {
647         auto rid = bcf_hdr_name2id(this.vcfheader.hdr, toStringz(c));
648         if (rid == -1) {
649             hts_log_error(__FUNCTION__, format("contig not found: %s", c));
650             throw new Exception("contig not found");
651         }
652         else line.rid = rid;
653     }
654 
655 
656     /* POS */
657     /** Get position (POS, column 2)
658      *
659      * NB: internally BCF is uzing 0 based coordinates; we only show +1 when printing a VCF line with toString (which calls vcf_format)
660     */
661     @property
662     ZB pos()
663     out(coord) { assert(coord >= 0); }
664     do
665     {
666         return ZB(this.line.pos);
667     }
668     /// Set position (POS, column 2)
669     @property
670     void pos(ZB p)
671     in { assert(p >= 0); }
672     do
673     {
674         // TODO: should we check for pos >= 0 && pos < contig length? Could really hamper performance.
675         // TODO: if writing out the file with invalid POS values crashes htslib, will consider it
676         this.line.pos = p.pos;
677     }
678 
679 
680     /* ID */
681     /// Get ID string
682     @property string id()
683     {
684         this.unpack(UnpackLevel.AltAllele);
685         return fromStringz(this.line.d.id).idup;
686     }
687     /// Sets new ID string; comma-separated list allowed but no dup checking performed
688     @property int id(const(char)[] id)
689     {
690         // bcf_update_id expects null pointer instead of empty string to mean "no value"
691         if (id == "") return bcf_update_id(this.vcfheader.hdr, this.line, null);
692         else return bcf_update_id(this.vcfheader.hdr, this.line, toStringz(id));
693     }
694     /// Append an ID (column 3) to the record.
695     ///
696     /// NOTE: htslib performs duplicate checking
697     int addID(const(char)[] id)
698     {
699         if(id == "") return 0;
700         return bcf_add_id(this.vcfheader.hdr, this.line, toStringz(id));
701     }
702 
703 
704     /* Alleles (REF, ALT)
705     
706         Internally, alleles are stored as a \0 separated array:
707         [C \0 T \0 T T \0.. \0]
708          ^    ^    ^ ^
709          |    |    L L ALT_1 = "TT"
710          |    L ALT_0 
711          L REF
712 
713         TODO: Getters and setters poorly or inconsistently (?) named at this time or there are too many overloads?
714         TODO: need non-overwriting setter for ref and alt alleles
715         TODO: some of these may be inefficent; since they may be used in hot inner loops, pls optimize
716     */
717     /// REF allele length
718     @property long refLen()
719     {
720         version(DigitalMars) pragma(inline);
721         version(LDC) pragma(inline, true);
722         version(GNU) pragma(inline, true);
723         return this.line.rlen;
724     }
725 
726     /// Coordinate range of the reference allele
727     @property ZBHO coordinates()
728     {
729         return ZBHO(this.pos, this.pos + this.refLen);
730     }
731     
732     /// All alleles getter (array)
733     @property string[] allelesAsArray()
734     {
735         this.unpack(UnpackLevel.AltAllele);
736         string[] ret;
737         ret.length = this.line.n_allele;        // n=0, no reference; n=1, ref but no alt
738         foreach(int i; 0 .. this.line.n_allele) // ref allele is index 0
739         {
740             ret[i] = fromStringz(this.line.d.allele[i]).idup;
741         }
742         return ret;
743     }
744     /// Reference allele getter
745     @property string refAllele()
746     {
747         this.unpack(UnpackLevel.AltAllele);
748         if (this.line.n_allele < 1) return ""; // a valid record could have no ref (or alt) alleles
749         else return fromStringz(this.line.d.als).idup;
750     }
751     // NB WIP: there could be zero, or multiple alt alleles
752     /+@property string altAlleles()
753     {
754         if (!this.line.unpacked) bcf_unpack(this.line, UnpackLevel.ALT);
755         return fromStringz(this.line.d.als)
756     }+/
757     /// Alternate alleles getter version 1: ["A", "ACTG", ...]
758     @property string[] altAllelesAsArray()
759     {
760         this.unpack(UnpackLevel.AltAllele);
761 
762         string[] ret;
763         if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt
764         return this.allelesAsArray[1 .. $];     // trim off REF
765     }
766     /// Alternate alleles getter version 2: "A,ACTG,..."
767     @property string altAllelesAsString()
768     {
769         this.unpack(UnpackLevel.AltAllele);
770 
771         string ret;
772         if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt
773 
774         char[] tmp;
775         tmp.length = this.line.d.m_allele;    // max allocated size in htslib
776 
777         char *a = this.line.d.allele[1];    // ref allele is index 0
778         const char *last = this.line.d.allele[this.line.n_allele - 1];    // pointer to start of last allele
779         size_t i = 0;
780 
781         assert( this.line.d.allele[1] == (this.line.d.als + this.line.rlen + 1) );  // ensue data structure is as we think (+1: past ref's term \0)
782 
783         while (i < this.line.d.m_allele)    // safety stop at max allocated size
784         {
785             if (*a) tmp[i] = *a;
786             else {  // '\0'
787                 if (a < last) tmp[i] = ',';
788                 else break;
789             }
790             i++;
791             a++;
792         }
793         tmp.length = i;
794 
795         return tmp.idup;
796     }
797     /// Set alleles; comma-separated list
798     @property void alleles(string a)
799     {
800         this.unpack(UnpackLevel.AltAllele);
801         if (a == "") {
802             this.line.n_allele = 0;
803             if (this.line.d.m_allele) this.line.d.als[0] = '\0';    // if storage allocated, zero out the REF
804         }
805         else
806             bcf_update_alleles_str(this.vcfheader.hdr, this.line, toStringz(a));
807     }
808     /// Set alleles; array
809     @property void alleles(string[] a)
810     {
811         this.unpack(UnpackLevel.AltAllele);
812         if (a.length == 0) {
813             this.line.n_allele = 0;
814             if (this.line.d.m_allele) this.line.d.als[0] = '\0';    // if storage allocated, zero out the REF
815         }
816         else {
817             const(char)*[64] allelesp;  // will fail if a locus has more than 63 ALT alleles :-O
818             foreach(i; 0 .. a.length ) {
819                 // In order to zero out all alleles, pass a zero-length allele to the string version,
820                 // or a zero-length array to this version. Zero length string member of nonempty allele array is an error.
821                 if (a[i].length == 0) {
822                     hts_log_error(__FUNCTION__, "Zero-length allele in nonempty allele array. Setting to '.'");
823                     allelesp[i] = ".".ptr;
824                 }
825                 else allelesp[i] = toStringz(a[i]);
826             }
827             bcf_update_alleles(this.vcfheader.hdr, this.line, &allelesp[0], cast(int)a.length);
828         }
829     }
830 
831     /// Set REF allele only
832     void setRefAllele(string r)
833     {
834         // first, get REF
835         this.unpack(UnpackLevel.AltAllele);
836         // a valid record could have no ref (or alt) alleles
837         auto alleles = [toUTFz!(char *)(r)];
838         if (this.line.n_allele < 2) // if none or 1 (=REF only), just add the REF we receieved
839             bcf_update_alleles(this.vcfheader.hdr, this.line, alleles.ptr, 1);
840         else {
841             // length of REF allele is allele[1] - allele[0], (minus one more for \0)
842             // TODO ** TODO ** : there is a property line.refLen already
843             const auto reflen = (this.line.d.allele[1] - this.line.d.allele[0]) - 1;
844             alleles ~= this.altAllelesAsArray.map!(toUTFz!(char *)).array;
845             bcf_update_alleles(this.vcfheader.hdr, this.line, alleles.ptr, cast(int)alleles.length);
846         }
847     }
848     /// Set alleles; alt can be comma separated
849     void setAlleles(string _ref, string alt)
850     {
851         immutable string alleles = _ref ~ "," ~ alt ~ "\0";
852         bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr);
853     }
854     /// Set alleles; min. 2 alleles (ref, alt1); unlimited alts may be specified
855     void setAlleles(string _ref, string alt, ...)
856     {
857         string alleles = _ref ~ "," ~ alt;
858 
859         for (int i = 0; i < _arguments.length; i++)
860         {
861             alleles ~= "," ~ va_arg!string(_argptr);
862         }
863 
864         alleles ~= "\0";
865 
866         bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr);
867     }
868 
869 
870     /* Quality (QUAL) */
871     /// Get variant quality (QUAL, column 6)
872     @property float qual()
873     out(result) { assert(result >= 0); }
874     do
875     {
876         return this.line.qual;
877     }
878     /// Set variant quality (QUAL, column 6)
879     @property void qual(float q)
880     in { assert(q >= 0); }
881     do { this.line.qual = q; }
882 
883 
884     /* FILTER */
885     /// Get FILTER column (nothing in htslib sadly)
886     @property string filter()
887     {
888         const(char)[] ret;
889 
890         this.unpack(UnpackLevel.Filter);
891 
892         if (this.line.d.n_flt) {
893             for(int i; i< this.line.d.n_flt; i++) {
894                 if (i) ret ~= ";";
895                 ret ~= fromStringz(this.vcfheader.hdr.id[BCF_DT_ID][ this.line.d.flt[0] ].key);
896             }
897         } else {
898             ret = ".";
899         }
900 
901         return ret.idup;
902     }
903     /// Remove all entries in FILTER
904     void removeAllFilters()
905     {
906         // always returns zero
907         bcf_update_filter(this.vcfheader.hdr, this.line, null, 0);
908     }
909     /// Set the FILTER column to f
910     @property void filter(string f)
911     {
912         this.filter([f]);
913     }
914     /// Set the FILTER column to f0,f1,f2...
915     /// TODO: determine definitiely whether "." is replaced with "PASS"
916     @property void filter(string[] fs)
917     {
918         int[] fids;
919         foreach(f; fs) {
920             const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
921             if (fid == -1){
922                 hts_log_warning(__FUNCTION__, format("ignoring filter not found in header: %s", f.empty() ? "(empty)" : f));
923                 continue;
924             }
925             else fids ~= fid;
926         }
927         if (fids.length > 0)
928             bcf_update_filter(this.vcfheader.hdr, this.line, fids.ptr, cast(int)fids.length);
929         else
930             hts_log_warning(__FUNCTION__, "No FILTER update was performed due to empty list");
931     }
932     /// Add a filter; from htslib: 
933     /// "If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed."
934     int addFilter(string f)
935     {
936         
937         if(f == "") hts_log_warning(__FUNCTION__, "filter was empty");
938         const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
939         if (fid == -1){
940             hts_log_warning(__FUNCTION__, format("ignoring filter not found in header: %s", f.empty() ? "(empty)" : f));
941             return -1;
942         }
943 
944         return bcf_add_filter(this.vcfheader.hdr, this.line, fid);
945     }
946     /// Remove a filter by name
947     int removeFilter(string f)
948     {
949         const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
950         if (fid == -1){
951             hts_log_warning(__FUNCTION__, format("filter not found in header (ignoring): %s", f) );
952             return -1;
953         }
954         return removeFilter(fid);
955     }
956     /// Remove a filter by numeric id
957     int removeFilter(int fid)
958     {
959         return bcf_remove_filter(this.vcfheader.hdr, this.line, fid, 0);
960     }
961     /// Determine whether FILTER is present. log warning if filter does not exist. "PASS" and "." can be used interchangeably.
962     bool hasFilter(string filter)
963     {
964         char[] f = filter.dup ~ '\0';
965         //const auto id = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, f.ptr);
966         const auto ret = bcf_has_filter(this.vcfheader.hdr, this.line, f.ptr);
967 
968         if (ret > 0) return true;
969         else if (ret == 0) return false;
970         else {
971             hts_log_warning(__FUNCTION__, format("FILTER %s does not exist in the header", filter));
972             return false;
973         }
974     }
975 
976 
977     /** Update INFO (pan-sample info; column 8) 
978      *
979      *  Add a tag:value to the INFO column
980      *  NOTE: tag must already exist in the header
981      *
982      *  Templated on data type, calls one of bcf_update_info_{int32,float,string,flag}
983      *  Both singletons and arrays are supported.
984     */
985     void addInfo(T)(string tag, T data)
986     if(isSomeString!T || ((isIntegral!T || isFloatingPoint!T || isBoolean!T) && !isArray!T))
987     {
988         int ret = -1;
989 
990         static if(isIntegral!T) {
991             auto integer = cast(int) data;
992             ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1);
993         }
994         
995         else static if(isFloatingPoint!T) {
996             auto flt = cast(float) data;    // simply passing "2.0" (or whatever) => data is a double
997             ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1);
998         }
999 
1000         else static if(isSomeString!T)
1001             ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), toStringz(data));
1002         
1003         else static if(isBoolean!T) {
1004             immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 
1005             ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set);
1006         }
1007         
1008         if (ret == -1)
1009             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
1010     }
1011     /// ditto
1012     void addInfo(T)(string tag, T data)
1013     if(isIntegral!(ElementType!T) || isFloatingPoint!(ElementType!T))             // otherwise string::immutable(char)[] will match the other template
1014     {
1015         int ret = -1;
1016 
1017         static if(isIntegral!(ElementType!T)) {
1018             auto integer = data.to!(int[]);
1019             ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), integer.ptr, cast(int)data.length);
1020         }
1021         
1022         else static if(isFloatingPoint!(ElementType!T)) {
1023             auto flt = data.to!(float[]);    // simply passing "2.0" (or whatever) => data is a double
1024             ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), flt.ptr, cast(int)data.length);
1025         }
1026         
1027         if (ret == -1)
1028             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
1029     }
1030 
1031     /** Update FORMAT (sample info; column 9+)
1032      *  
1033      *  Templated on data type, calls one of bc_update_format_{int32,float,string,flag}
1034     */
1035     void addFormat(T)(string tag, T data)
1036     if(!isArray!T || isSomeString!T)
1037     {
1038         int ret = -1;
1039 
1040         static if(isIntegral!T) {
1041             auto integer = data.to!int;
1042             ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1);
1043         }
1044         
1045         else static if(isFloatingPoint!T) {
1046             auto flt = data.to!float;    // simply passing "2.0" (or whatever) => data is a double
1047             ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1);
1048         }
1049 
1050         else static if(isSomeString!T){
1051             auto strs = [data].map!(toUTFz!(char*)).array;
1052             ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), strs.ptr, 1);
1053         }
1054         
1055         else static if(isBoolean!T) {
1056             immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 
1057             ret = bcf_update_format_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set);
1058         }
1059         
1060         if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data));
1061     }
1062     /// ditto
1063     void addFormat(T)(string tag, T[] data)
1064     if((!isArray!T || isSomeString!T) && !is(T == immutable(char)))
1065     {
1066                 int ret = -1;
1067 
1068         static if(isIntegral!T) {
1069             auto integer = data.to!(int[]);
1070             ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag),
1071                                             integer.ptr, cast(int)data.length);
1072         }
1073         
1074         else static if(isFloatingPoint!T) {
1075             auto flt = data.to!(float[]);    // simply passing "2.0" (or whatever) => data is a double
1076             ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag),
1077                                             flt.ptr, cast(int)data.length);
1078         }
1079 
1080         else static if(isSomeString!T){
1081             auto strs = data.map!(toUTFz!(char*)).array;
1082             ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), strs.ptr, cast(int)strs.length);
1083         }
1084 
1085         if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data));
1086 
1087     }
1088 
1089     /// remove an info section
1090     void removeInfo(string tag)
1091     {
1092         bcf_update_info(this.vcfheader.hdr,this.line, toStringz(tag),null,0,HeaderTypes.None);
1093     }
1094 
1095     /// remove a format section
1096     void removeFormat(string tag)
1097     {
1098         bcf_update_format(this.vcfheader.hdr,this.line, toStringz(tag),null,0,HeaderTypes.None);
1099     }
1100 
1101 
1102     /// add INFO or FORMAT key:value pairs to a record
1103     /// add a single datapoint OR vector of values, OR, values to each sample (if lineType == FORMAT)
1104     void add(HeaderRecordType lineType, T)(const(char)[] tag, T data)
1105     if((lineType == HeaderRecordType.Info || lineType == HeaderRecordType.Format) &&
1106         (isIntegral!T       || isIntegral!(ElementType!T)   ||
1107         isFloatingPoint!T   || isFloatingPoint!(ElementType!T) ||
1108         isSomeString!T      || isSomeString!(ElementType!T) ||
1109         isBoolean!T         || isBoolean!(ElementType!T)))
1110     {
1111         int ret = -1;
1112         int len;
1113 
1114         static if (!isDynamicArray!T) {
1115             len = 1;
1116             static if (isIntegral!T) auto d = data.to!int;
1117             else static if (isFloatingPoint!T) auto d = data.to!float;
1118             else static if (isSomeString!T) auto d = toStringz(data);
1119             else static if (isBoolean!T) immutable int d = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag
1120             else static assert(0);
1121 
1122             const auto ptr = &d;
1123         }
1124         else static if (isDynamicArray!T) {
1125             assert(data.length < int.max);
1126             len = cast(int) data.length;
1127             static if (isIntegral!(ElementType!T)) auto d = data.to!(int[]);
1128             else static if (isFloatingPoint!(ElementType!T)) auto d = data.to!(float[]);
1129             else static if (isSomeString!(ElementType!T)) {
1130                 char[] d;
1131                 foreach(s; data) {
1132                     d ~= s ~ "\0";
1133                 }
1134                 if(d.length == 0) d ~= "\0";    // TODO replace with asserts on data length earlier
1135             }
1136             else static if (isBoolean!(ElementType!T)) {
1137                 int[] d;
1138                 foreach(b; data) {
1139                     d ~= (b ? 1 : 0);
1140                 }
1141             }
1142 
1143             const auto ptr = d.ptr;
1144         }
1145         else static assert(0);
1146 
1147         static if (lineType == HeaderRecordType.Info) {
1148             static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int))
1149                 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
1150             else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float))
1151                 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
1152             else static if (isSomeString!T || isSomeString!(ElementType!T))
1153                 ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr);
1154             else static if (is(T == bool) || is(ElementType!T == bool))
1155                 ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
1156             else static assert(0, "Type not recognized for INFO tag");
1157         }
1158         else static if (lineType == HeaderRecordType.Format) {
1159             static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int))
1160                 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
1161             else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float))
1162                 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
1163             else static if (isSomeString!T || isSomeString!(ElementType!T))
1164                 ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
1165             else static assert(0, "Type not recognized for FORMAT tag");
1166         }
1167         else static assert(0);
1168 
1169         if (ret == -1)
1170             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
1171     }
1172 
1173     /// returns a hashmap of info data by field
1174     InfoField[string] getInfos()
1175     {
1176         /// UnpackLevel
1177         this.unpack(UnpackLevel.Info);
1178 
1179         /// copy some data
1180         InfoField[string] infoMap;
1181         bcf_info_t[] infos = this.line.d.info[0..this.line.n_info].dup;
1182 
1183         foreach (bcf_info_t info; infos)
1184         {
1185             /// if variable data is null then value is set for deletion
1186             /// skip
1187             if(!info.vptr) continue;
1188             auto key = fromStringz(this.vcfheader.hdr.id[HeaderDictTypes.Id][info.key].key).idup;
1189             infoMap[key] = InfoField(key, &info, this.line);
1190         }
1191         return infoMap;
1192     }
1193 
1194     /// returns a hashmap of format data by field
1195     FormatField[string] getFormats()
1196     {
1197         this.unpack(UnpackLevel.Format);
1198         FormatField[string] fmtMap;
1199         bcf_fmt_t[] fmts = this.line.d.fmt[0..this.line.n_fmt].dup;
1200         foreach (bcf_fmt_t fmt; fmts)
1201         {
1202             /// if variable data is null then value is set for deletion
1203             /// skip
1204             if(!fmt.p) continue;
1205             auto key = fromStringz(this.vcfheader.hdr.id[HeaderDictTypes.Id][fmt.id].key).idup;
1206             fmtMap[key] = FormatField(key, &fmt, this.line);
1207         }
1208         return fmtMap;
1209     }
1210 
1211     Genotype getGenotype(int sampleIdx) {
1212         Genotype ret;
1213         if("GT" in this.getFormats() && sampleIdx < this.vcfheader.nsamples) {
1214             ret = Genotype(this.getFormats["GT"], sampleIdx);
1215         }else if (sampleIdx >= this.vcfheader.nsamples) {
1216             hts_log_error(__FUNCTION__, "sample idx is larger than nsamples");
1217         }else{
1218             hts_log_error(__FUNCTION__, "GT doesn't exist in FORMAT fields");
1219         }
1220         return ret;
1221     }
1222 
1223     Genotype[] getGenotypes() {
1224         Genotype[] ret;
1225         FormatField gt;
1226         if("GT" in this.getFormats()) {
1227             gt = this.getFormats["GT"];
1228         }else{
1229             hts_log_error(__FUNCTION__, "GT doesn't exist in FORMAT fields");
1230         }
1231         for(auto i=0; i < this.vcfheader.nsamples; i++)
1232         {
1233             ret ~= Genotype(gt, i);
1234         }
1235         return ret;
1236     }
1237 
1238     /// Return a string representation of the VCFRecord (i.e. as would appear in .vcf)
1239     ///
1240     /// As a bonus, there is a kstring_t memory leak
1241     string toString() const
1242     {
1243         kstring_t s;
1244 
1245         const int ret = vcf_format(this.vcfheader.hdr, this.line, &s);
1246         if (ret)
1247         {
1248             hts_log_error(__FUNCTION__,
1249                 format("vcf_format returned nonzero (%d) (likely EINVAL, invalid bcf1_t struct?)", ret));
1250             return "[VCFRecord vcf_format parse_error]";
1251         }
1252 
1253         return cast(string) s.s[0 .. s.l];
1254     }
1255 }
1256 
1257 
1258 
1259 
1260 
1261 ///
1262 debug(dhtslib_unittest)
1263 unittest
1264 {
1265     import std.exception: assertThrown;
1266     import std.stdio: writeln, writefln;
1267     import dhtslib.vcf.writer;
1268 
1269     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1270 
1271 
1272     auto vw = VCFWriter("/dev/null", VCFWriterTypes.VCF);
1273 
1274     vw.addHeaderLineRaw("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">");
1275     vw.addHeaderLineKV("INFO", "<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">");
1276     vw.addHeaderLineKV("INFO", "<ID=DP2,Number=2,Type=Float,Description=\"Total Depth\">");
1277     // ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
1278     vw.vcfhdr.addHeaderLine!(HeaderRecordType.Info)("AF", HeaderLengths.OnePerAltAllele, HeaderTypes.Integer, "Number of Samples With Data");
1279     vw.addHeaderLineRaw("##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>"); // @suppress(dscanner.style.long_line)
1280     vw.addHeaderLineRaw("##FILTER=<ID=q10,Description=\"Quality below 10\">");
1281     HeaderRecord hrec;
1282     hrec.setHeaderRecordType(HeaderRecordType.Format);
1283     hrec.setID("AF");
1284     hrec.setLength(HeaderLengths.OnePerAltAllele);
1285     hrec.setValueType(HeaderTypes.Float);
1286     hrec.setDescription("Allele Freq");
1287     vw.vcfhdr.addHeaderRecord(hrec);
1288     
1289     assert(vw.vcfhdr.getHeaderRecord(HeaderRecordType.Format, "ID","AF")["ID"] == "AF");
1290     vw.addHeaderLineRaw("##FORMAT=<ID=CH,Number=3,Type=String,Description=\"test\">");
1291 
1292     // Exercise header
1293     assert(vw.vcfhdr.nsamples == 0);
1294     vw.addSample("NA12878");
1295     assert(vw.vcfhdr.nsamples == 1);
1296 
1297     vw.writeHeader();
1298 
1299     auto r = new VCFRecord(vw.vcfhdr, bcf_init1());
1300     
1301     r.chrom = "20";
1302     assert(r.chrom == "20");
1303     assertThrown(r.chrom = "chr20");   // headerline chromosome is "20" not "chr20"
1304 
1305     r.pos = Coordinate!(Basis.zero)(999_999);
1306     assert(r.pos == Coordinate!(Basis.zero)(999_999));
1307     r.pos = Coordinate!(Basis.zero)(62_435_964 + 1);     // Exceeds contig length
1308 
1309     // Test ID field
1310     // note that in an empty freshly initialized bcf1_t, the ID field is "" rather than "."
1311     // Will consider adding initialization code
1312     //writefln("ID: %s", r.id);
1313     r.id = "";
1314     assert(r.id == ".");
1315     r.id = ".";
1316     assert(r.id == ".");
1317     r.id = "rs001";
1318     assert(r.id == "rs001");
1319     r.addID("rs999");
1320     assert(r.id == "rs001;rs999");
1321     r.addID("rs001");   // test duplicate checking
1322     assert(r.id == "rs001;rs999");
1323 
1324     // Test REF/ALT allele setters and getters
1325     // many overloads to test for good coverage
1326     // Test matrix: Set {zero, one, two, three} alleles * {set by string, set by array} = 8 test cases
1327     //  * also, there is setAlleles(ref, alt) and setAlleles(ref, alt1, ...) 
1328     //  * TODO: will also need to retreive alt alleles as array and string
1329 
1330     string[] alleles_array;
1331 
1332     // Zero alleles
1333     r.alleles("");
1334     const auto zs = r.allelesAsArray;
1335     const auto zsr = r.refAllele;
1336     assert(zs.length == 0);
1337     assert(zsr == "");
1338 
1339     r.alleles(alleles_array);
1340     const auto za = r.allelesAsArray;
1341     const auto zar = r.refAllele;
1342     assert(za.length == 0);
1343     assert(zar == "");
1344     
1345     // One allele
1346     r.alleles("C");
1347     const auto os = r.allelesAsArray;
1348     const auto osr = r.refAllele;
1349     assert(os.length == 1 && os[0] == "C");
1350     assert(osr == "C");
1351 
1352     r.alleles(["C"]);
1353     const auto oa = r.allelesAsArray;
1354     const auto oar = r.refAllele;
1355     assert(oa.length == 1 && oa[0] == "C");
1356     assert(oar == "C");
1357 
1358     // Two alleles
1359     r.alleles("C,T");
1360     const auto ts = r.allelesAsArray;
1361     const auto tsr= r.refAllele;
1362     assert(ts == ["C", "T"]);
1363     assert(tsr== "C");
1364 
1365     r.alleles(["C", "T"]);
1366     const auto ta = r.allelesAsArray;
1367     const auto tar= r.refAllele;
1368     assert(ta == ["C", "T"]);
1369     assert(tar== "C");
1370 
1371     const taaa = r.altAllelesAsArray;
1372     const taas = r.altAllelesAsString;
1373     assert(taaa == ["T"]);
1374     assert(taas == "T");
1375 
1376     // alternate setter for >= 2 alleles
1377     r.setAlleles("A", "G");
1378     assert(r.allelesAsArray == ["A", "G"]);
1379 
1380     // Three alleles
1381     r.alleles("A,C,T");
1382     assert(r.allelesAsArray == ["A", "C", "T"]);
1383     assert(r.refAllele == "A");
1384     assert(r.altAllelesAsString == "C,T");
1385 
1386     // alternate the alleles for testing purposes
1387     r.alleles(["G", "A", "C"]);
1388     assert(r.allelesAsArray == ["G", "A", "C"]);
1389     assert(r.refAllele == "G");
1390     assert(r.altAllelesAsString == "A,C");
1391 
1392     r.setAlleles("A", "C", "T");
1393     assert(r.allelesAsArray == ["A", "C", "T"]);
1394     assert(r.refAllele == "A");
1395     assert(r.altAllelesAsString == "C,T");
1396 
1397     r.setRefAllele("G");
1398     assert(r.allelesAsArray == ["G", "C", "T"]);
1399     assert(r.refAllele == "G");
1400     assert(r.altAllelesAsString == "C,T");
1401 
1402     r.setRefAllele("A");
1403     assert(r.allelesAsArray == ["A", "C", "T"]);
1404     assert(r.refAllele == "A");
1405     assert(r.altAllelesAsString == "C,T");
1406 
1407 
1408 
1409     // Test QUAL
1410     r.qual = 1.0;
1411     assert(r.qual == 1.0);
1412     // now test setting qual without UnpackLeveling
1413     // TODO: see https://forum.dlang.org/post/hebouvswxlslqhovzaia@forum.dlang.org, once resolved (or once factory function written),
1414     //  add template value param UnpackLevel.ALT
1415     auto rr = new VCFRecord(vw.vcfhdr, "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;AF=0.017\n"); // @suppress(dscanner.style.long_line)
1416     rr.qual = 3.0;
1417     assert(rr.qual == 3.0);
1418 
1419 
1420     // Test FILTER
1421     assert(r.hasFilter("PASS"));
1422     assert(r.hasFilter("."));
1423     // add q10 filter (is in header)
1424     r.filter = "q10";
1425     assert(r.filter == "q10");
1426     assert(!r.hasFilter("PASS"));   // if has another filter, no longer PASS (unless added explicitly)
1427     assert(!r.hasFilter("."));      // if has another filter, no longer has . (which is PASS, or no filter [spec conflict?])
1428 
1429     // add q30 filteR (not in header)
1430     r.filter = "q30";
1431     assert(r.filter == "q10");  // i.e., unchanged
1432 
1433     assert(r.hasFilter("q10"));
1434     assert(!r.hasFilter("q30"));
1435 
1436     r.removeAllFilters();
1437     assert(r.hasFilter("."));
1438     r.addFilter("q10");
1439     assert(r.hasFilter("q10"));
1440     r.removeFilter("q10");
1441     assert(r.hasFilter("PASS"));
1442     assert(r.toString == "20\t62435966\trs001;rs999\tA\tC,T\t1\t.\t.\n");
1443 
1444     r.addFormat("AF",[0.1, 0.5]);
1445     assert(r.toString == "20\t62435966\trs001;rs999\tA\tC,T\t1\t.\t.\tAF\t0.1,0.5\n");
1446 
1447     rr.addFormat("AF",[0.1]);
1448     writeln(rr.toString);
1449     assert(rr.toString == "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;AF=0\tAF\t0.1\n");
1450 
1451 
1452     assert(rr.getFormats["AF"].to!float[0][0] == 0.1f);
1453 
1454     rr.addInfo("AF",0.5);
1455     assert(rr.getInfos["AF"].to!float == 0.5f);
1456 
1457     rr.addInfo("DP2",[0.5f, 0.4f]);
1458     assert(rr.getInfos["DP2"].to!(float[]) == [0.5f, 0.4f]);
1459     
1460     rr.removeInfo("AF");
1461 
1462     rr.removeFormat("AF");
1463 
1464     rr.addFormat("CH", "test");
1465 
1466     
1467     writeln(rr.toString);
1468     writeln(rr.getFormats["CH"].to!string);
1469     assert(rr.getFormats["CH"].to!string[0][0] == "test");
1470 
1471     assert(rr.toString == "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;DP2=0.5,0.4\tCH\ttest\n");
1472 
1473     assert(rr.coordinates == ZBHO(17329,17330));
1474     vw.writeRecord(*r);
1475     vw.writeRecord(vw.vcfhdr.hdr, r.line);
1476 
1477 
1478     // Finally, print the records:
1479     writefln("VCF records via toString:\n%s%s", (*r), (*rr));
1480 
1481     writeln("\ndhtslib.vcf: all tests passed\n");
1482 }
1483 
1484 debug(dhtslib_unittest) unittest
1485 {
1486     import dhtslib.util;
1487     import std.stdio;
1488     import htslib.hts_log;
1489     import std.algorithm : map, count;
1490     import std.array : array;
1491     import std.path : buildPath, dirName;
1492     import std.math : approxEqual, isNaN;
1493     import dhtslib.tabix;
1494     import dhtslib.vcf;
1495     hts_set_log_level(htsLogLevel.HTS_LOG_INFO);
1496     hts_log_info(__FUNCTION__, "Testing VCFReader");
1497     hts_log_info(__FUNCTION__, "Loading test file");
1498     auto fn = buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","tabix","vcf_file.vcf.gz");
1499     auto tbx = TabixIndexedFile(fn);
1500     auto reg = getIntervalFromString("1:3000151-3062916");
1501     auto vcf = VCFReader(tbx, reg.contig, reg.interval);
1502     assert(!vcf.empty);
1503 
1504     VCFRecord rec = vcf.front;
1505     assert(rec.getInfos["AN"].to!int == 4);
1506     rec.addInfo("AN", rec.getInfos["AN"].to!int + 1);
1507     assert(rec.getInfos["AN"].to!int == 5);
1508     assert(rec.getInfos["AN"].to!byte == 5);
1509     assert(rec.getInfos["AN"].to!short == 5);
1510     assert(rec.getInfos["AN"].to!long == 5);
1511 
1512     assert(rec.getInfos["AN"].to!uint == 5);
1513     assert(rec.getInfos["AN"].to!ubyte == 5);
1514     assert(rec.getInfos["AN"].to!ushort == 5);
1515     assert(rec.getInfos["AN"].to!ulong == 5);
1516 
1517     assert(rec.getInfos["AN"].to!float.isNaN);
1518 
1519     rec.addInfo("AN", 128);
1520     assert(rec.getInfos["AN"].to!int == 128);
1521     assert(rec.getInfos["AN"].to!byte == 0);
1522     assert(rec.getInfos["AN"].to!short == 128);
1523     assert(rec.getInfos["AN"].to!long == 128);
1524 
1525     rec.addInfo("AN", 32768);
1526     assert(rec.getInfos["AN"].to!int == 32768);
1527     assert(rec.getInfos["AN"].to!byte == 0);
1528     assert(rec.getInfos["AN"].to!short == 0);
1529     assert(rec.getInfos["AN"].to!long == 32768);
1530 
1531     assert(rec.getInfos["AN"].to!long == 32768);
1532 
1533     // rec.addInfo("AN", 2147483648);
1534     // assert(rec.getInfos["AN"].to!int == 0);
1535     // assert(rec.getInfos["AN"].to!byte == 0);
1536     // assert(rec.getInfos["AN"].to!short == 0);
1537     // assert(rec.getInfos["AN"].to!long == 2147483648);
1538 
1539     vcf.popFront;
1540     rec = vcf.front;
1541     assert(rec.refAllele == "GTTT");
1542     assert(rec.getInfos["INDEL"].to!bool == true);
1543 
1544     assert(rec.getInfos["DP4"].to!(int[]) == [1, 2, 3, 4]);
1545     assert(rec.getInfos["DP4"].to!(byte[]) == [1, 2, 3, 4]);
1546     assert(rec.getInfos["DP4"].to!(short[]) == [1, 2, 3, 4]);
1547     assert(rec.getInfos["DP4"].to!(long[]) == [1, 2, 3, 4]);
1548 
1549     assert(rec.getInfos["DP4"].to!(uint[]) == [1, 2, 3, 4]);
1550     assert(rec.getInfos["DP4"].to!(ubyte[]) == [1, 2, 3, 4]);
1551     assert(rec.getInfos["DP4"].to!(ushort[]) == [1, 2, 3, 4]);
1552     assert(rec.getInfos["DP4"].to!(ulong[]) == [1, 2, 3, 4]);
1553 
1554     assert(rec.getInfos["DP4"].to!(float[]) == []);
1555     rec.addInfo("DP4", [2, 2, 3, 4]);
1556     assert(rec.getInfos["DP4"].to!(int[]) == [2, 2, 3, 4]);
1557 
1558     rec.addInfo("DP4", [128, 128, 128, 128]);
1559     assert(rec.getInfos["DP4"].to!(int[]) == [128, 128, 128, 128]);
1560     assert(rec.getInfos["DP4"].to!(byte[]) == []);
1561     assert(rec.getInfos["DP4"].to!(short[]) == [128, 128, 128, 128]);
1562     assert(rec.getInfos["DP4"].to!(long[]) == [128, 128, 128, 128]);
1563 
1564     rec.addInfo("DP4", [32768, 32768, 32768, 32768]);
1565     assert(rec.getInfos["DP4"].to!(int[]) == [32768, 32768, 32768, 32768]);
1566     assert(rec.getInfos["DP4"].to!(byte[]) == []);
1567     assert(rec.getInfos["DP4"].to!(short[]) == []);
1568     assert(rec.getInfos["DP4"].to!(long[]) == [32768, 32768, 32768, 32768]);
1569     
1570 
1571     assert(rec.getFormats["GQ"].to!int.array == [[409], [409]]);
1572 
1573     rec.addFormat("GQ", [32768,32768]);
1574 
1575     assert(rec.getFormats["GQ"].to!byte.array == []);
1576     assert(rec.getFormats["GQ"].to!short.array == []);
1577     assert(rec.getFormats["GQ"].to!int[0][0] == 32768);
1578 
1579     assert(rec.getFormats["GL"].to!float[1][1] == -5.0);
1580     
1581 
1582     assert(rec.getInfos["STR"].to!string == "test");
1583 
1584     assert(rec.getInfos["DP4"].to!string == "");
1585 
1586     assert(rec.getFormats["GT"].to!int.array == [[2, 4], [2, 4]]);
1587     assert(rec.getGenotypes.map!(x => x.toString).array == ["0/1", "0/1"]);
1588     
1589 
1590     vcf.popFront;
1591     rec = vcf.front;
1592 
1593     auto fmts = rec.getFormats;
1594     auto sam = vcf.vcfhdr.getSampleId("A");
1595     assert(fmts["GT"].to!int[sam] == [2, 4]);
1596     assert(rec.getGenotype(sam).toString == "0/1");
1597     assert(rec.getGenotype(sam).getPloidy == 2);
1598     sam = vcf.vcfhdr.getSampleId("B");
1599     assert(fmts["GT"].to!int[sam] == [6, -127]);
1600     assert(rec.getGenotype(sam).toString == "2");
1601     assert(rec.getGenotype(sam).getPloidy == 1);
1602 }