1 /**
2 
3 VCF version 4.2 (including BCF) reader and writer, including
4 a model with convenience functions for the header (metadata)
5 and individual VCF/BCF records (rows).
6 
7 Specifications: https://samtools.github.io/hts-specs/VCFv4.2.pdf
8 
9 */
10 module dhtslib.vcf;
11 
12 import core.stdc..string;
13 import core.vararg;
14 
15 import std.conv: to, ConvException;
16 import std.datetime;
17 import std.format: format;
18 import std.range: ElementType;
19 import std..string: fromStringz, toStringz;
20 import std.traits: isArray, isDynamicArray, isBoolean, isIntegral, isFloatingPoint, isNumeric, isSomeString;
21 import std.traits: Unqual;
22 
23 import htslib.hts_log;
24 import htslib.kstring;
25 import htslib.vcf;
26 
27 alias BCFRecord = VCFRecord;
28 alias BCFWriter = VCFWriter;
29 
30 /** Wrapper around bcf_hdr_t
31 
32     Q: Why do we have VCFHeader, but not SAMHeader?
33     A: Most (all)? of the SAM (bam1_t) manipulation functions do not require a ptr to the header,
34     whereas almost all of the VCF (bcf1_t) manipulation functions do. Therefore, we track bcf_hdr_t*
35     inside each VCFRecord; this is wrapped by VCFHeader for future convenience (for example,
36     now we have @property nsamples; may move the tag reader and writer functions here?)
37 
38     In order to avoid double free()'ing an instance bcf_hdr_t,
39     this wrapper will be the authoritative holder of of bcf_hdr_t ptrs,
40     it shall be passed around by reference, and copies are disabled.
41 */
42 struct VCFHeader
43 {
44     /// Pointer to htslib BCF/VCF header struct; will be freed from VCFHeader dtor 
45     bcf_hdr_t *hdr;
46 
47     // Copies have to be disabled to avoid double free()
48     @disable this(this);
49 
50     ~this()
51     {
52         // Deallocate header
53         if (this.hdr != null) bcf_hdr_destroy(this.hdr);
54     }
55 
56     invariant
57     {
58         assert(this.hdr != null);
59     }
60 
61     /// List of contigs in the header
62     @property string[] sequences()
63     {
64         import core.stdc.stdlib : free;
65         int nseqs;
66 
67         /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */
68         //const(char) **bcf_hdr_seqnames(const(bcf_hdr_t) *h, int *nseqs);
69         const(char) **ary = bcf_hdr_seqnames(this.hdr, &nseqs);
70         if (!nseqs) return [];
71 
72         string[] ret;
73         ret.reserve(nseqs);
74 
75         for(int i; i < nseqs; i++) {
76             ret ~= fromStringz(ary[i]).idup;
77         }
78 
79         free(ary);
80         return ret;        
81     }
82 
83     /// Number of samples in the header
84     pragma(inline, true)
85     @property int nsamples() { return bcf_hdr_nsamples(this.hdr); }
86 
87 }
88 
89 /** Wrapper around bcf1_t 
90 
91     Because it uses bcf1_t internally, it must conform to the BCF2 part
92     of the VCFv4.2 specs, rather than the loosey-goosey VCF specs. i.e.,
93     INFO, CONTIG, FILTER records must exist in the header.
94 
95     TODO: Does this need to be kept in a consistent state?
96     Ideally, VCFWriter would reject invalid ones, but we are informed
97     that it is invalid (e.g. if contig not found) while building this
98     struct; bcf_write1 will actually segfault, unfortunately. I'd like
99     to avoid expensive validate() calls for every record before writing
100     if possible, which means keeping this consistent. However, not
101     sure what to do if error occurs when using the setters herein?
102 
103     2019-01-23 struct->class to mirror SAMRecord -- faster if reference type?
104 
105     2019-01-23 WIP: getters for chrom, pos, id, ref, alt are complete (untested)
106 
107 After parsing a BCF or VCF line, bcf1_t must be unpacked. (not applicable when building bcf1_t from scratch)
108 Depending on information needed, this can be done to various levels with performance tradeoff.
109 Unpacking symbols:
110 BCF_UN_ALL: all
111 BCF_UN_SHR: all shared information (BCF_UN_STR|BCF_UN_FLT|BCF_UN_INFO)
112 
113                         BCF_UN_STR
114                        /               BCF_UN_FLT
115                       |               /      BCF_UN_INFO
116                       |              |      /       ____________________________ BCF_UN_FMT
117                       V              V     V       /       |       |       |
118 #CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  NA00001 NA00002 NA00003 ...
119 
120 */
121 class VCFRecord
122 {
123     bcf1_t* line;   /// htslib structured record TODO: change to 'b' for better internal consistency? (vcf.h/c actually use line quite a bit in fn params)
124 
125     VCFHeader *vcfheader;   /// corresponding header (required);
126                             /// is ptr to avoid copying struct containing ptr to bcf_hdr_t (leads to double free())
127 
128     /** VCFRecord
129 
130     Construct a bcf/vcf record, backed by bcf1_t, from: an existing bcf1_t, parameters, or a VCF line.
131 
132     Internal backing by bcf1_t means it must conform to the BCF2 rules -- i.e., header must contain
133     appropriate INFO, CONTIG, and FILTER lines.
134 
135     Protip: specifying alternate MAX_UNPACK can speed it tremendously
136         as it will not unpack all fields, only up to those requested (see htslib.vcf)
137         For example, BCF_UN_STR is up to ALT inclusive, and BCF_UN_STR is up to FILTER
138     */
139     this(T)(T *h, bcf1_t *b, int MAX_UNPACK = BCF_UN_ALL)
140     if(is(T == VCFHeader) || is(T == bcf_hdr_t))
141     {
142         static if (is(T == VCFHeader)) this.vcfheader = h;
143         //else static if (is(T == bcf_hdr_t)) this.vcfheader = new VCFHeader(h); // double free() bug if we don't own bcf_hdr_t h
144         else static if (is(T == bcf_hdr_t)) assert(0);  // ferret out situations that will lead to free() crashes
145         else assert(0);
146 
147         this.line = b;
148 
149         // Now it must be unpacked
150         // Protip: specifying alternate MAX_UNPACK can speed it tremendously
151         immutable int ret = bcf_unpack(this.line, MAX_UNPACK);    // unsure what to do c̄ return value // @suppress(dscanner.suspicious.unused_variable)
152     }
153     /// ditto
154     this(SS)(VCFHeader *vcfhdr, string chrom, int pos, string id, string _ref, string alt, float qual, SS filter, )
155     if (isSomeString!SS || is(SS == string[]))
156     {
157         this.line = bcf_init1();
158         this.vcfheader = vcfhdr;
159         
160         this.chrom = chrom;
161         this.pos = pos;
162         this.id = id;
163 
164         this.setAlleles(_ref, alt);
165 
166         this.qual = qual;
167         this.filter = filter;
168     }
169     /// ditto
170     this(VCFHeader *vcfhdr, string line, int MAX_UNPACK = BCF_UN_ALL)
171     {
172         this.vcfheader = vcfhdr;
173 
174         kstring_t kline;
175         auto dupline = line.dup; // slower, but safer than a cast // @suppress(dscanner.suspicious.unmodified)
176 
177         kline.l = dupline.length;
178         kline.m = dupline.length;
179         kline.s = dupline.ptr;
180 
181         this.line = bcf_init1();
182         this.line.max_unpack = MAX_UNPACK;
183 
184         auto ret = vcf_parse(&kline, this.vcfheader.hdr, this.line);
185         if (ret < 0) {
186             hts_log_error(__FUNCTION__, "vcf_parse returned < 0 -- code error or malformed VCF line");
187         } else {
188             ret = bcf_unpack(this.line, MAX_UNPACK);    // unsure what to do c̄ return value
189         }
190     }
191 
192     /// disable copying to prevent double-free (which should not come up except when writeln'ing)
193     //@disable this(this);  // commented out as class has no postblit ctor (relevant when VCFRecord was struct)
194     /// dtor
195     ~this()
196     {
197         if (this.line) bcf_destroy1(this.line);
198     }
199 
200     //----- FIXED FIELDS -----//
201     
202     /* CHROM */
203     /// Get chromosome (CHROM)
204     @property
205     string chrom()
206     {
207         if (!this.line.unpacked)
208             bcf_unpack(this.line, BCF_UN_STR);
209         
210         return fromStringz(bcf_hdr_id2name(this.vcfheader.hdr, this.line.rid)).idup;
211     }
212     /// Set chromosome (CHROM)
213     @property
214     void chrom(const(char)[] c)
215     {
216         auto rid = bcf_hdr_name2id(this.vcfheader.hdr, toStringz(c));
217         if (rid == -1) {
218             hts_log_error(__FUNCTION__, format("contig not found: %s", c));
219             throw new Exception("contig not found");
220         }
221         else line.rid = rid;
222     }
223 
224 
225     /* POS */
226     /** Get position (POS, column 2)
227      *
228      * NB: internally BCF is uzing 0 based coordinates; we only show +1 when printing a VCF line with toString (which calls vcf_format)
229     */
230     @property
231     long pos()
232     out(coord) { assert(coord >= 0); }
233     do
234     {
235         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
236         return this.line.pos;
237     }
238     /// Set position (POS, column 2)
239     @property
240     void pos(long p)
241     in { assert(p >= 0); }
242     do
243     {
244         // TODO: should we check for pos >= 0 && pos < contig length? Could really hamper performance.
245         // TODO: if writing out the file with invalid POS values crashes htslib, will consider it
246         this.line.pos = p;
247     }
248 
249 
250     /* ID */
251     /// Get ID string
252     @property string id()
253     {
254         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
255         return fromStringz(this.line.d.id).idup;
256     }
257     /// Sets new ID string; comma-separated list allowed but no dup checking performed
258     @property int id(const(char)[] id)
259     {
260         // bcf_update_id expects null pointer instead of empty string to mean "no value"
261         if (id == "") return bcf_update_id(this.vcfheader.hdr, this.line, null);
262         else return bcf_update_id(this.vcfheader.hdr, this.line, toStringz(id));
263     }
264     /// Append an ID (column 3) to the record.
265     ///
266     /// NOTE: htslib performs duplicate checking
267     int addID(const(char)[] id)
268     {
269         if(id == "") return 0;
270         return bcf_add_id(this.vcfheader.hdr, this.line, toStringz(id));
271     }
272 
273 
274     /* Alleles (REF, ALT)
275     
276         Internally, alleles are stored as a \0 separated array:
277         [C \0 T \0 T T \0.. \0]
278          ^    ^    ^ ^
279          |    |    L L ALT_1 = "TT"
280          |    L ALT_0 
281          L REF
282 
283         TODO: Getters and setters poorly or inconsistently (?) named at this time or there are too many overloads?
284         TODO: need non-overwriting setter for ref and alt alleles
285         TODO: some of these may be inefficent; since they may be used in hot inner loops, pls optimize
286     */
287     /// REF allele length
288     @property long refLen()
289     {
290         version(DigitalMars) pragma(inline);
291         version(LDC) pragma(inline, true);
292         version(GNU) pragma(inline, true);
293         return this.line.rlen;
294     }
295     /// All alleles getter (array)
296     @property string[] allelesAsArray()
297     {
298         string[] ret;
299         ret.length = this.line.n_allele;        // n=0, no reference; n=1, ref but no alt
300         foreach(int i; 0 .. this.line.n_allele) // ref allele is index 0
301         {
302             ret[i] = fromStringz(this.line.d.allele[i]).idup;
303         }
304         return ret;
305     }
306     /// Reference allele getter
307     @property string refAllele()
308     {
309         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
310         if (this.line.n_allele < 1) return ""; // a valid record could have no ref (or alt) alleles
311         else return fromStringz(this.line.d.als).idup;
312     }
313     // NB WIP: there could be zero, or multiple alt alleles
314     /+@property string altAlleles()
315     {
316         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
317         return fromStringz(this.line.d.als)
318     }+/
319     /// Alternate alleles getter version 1: ["A", "ACTG", ...]
320     @property string[] altAllelesAsArray()
321     {
322         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
323 
324         string[] ret;
325         if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt
326         return this.allelesAsArray[1 .. $];     // trim off REF
327     }
328     /// Alternate alleles getter version 2: "A,ACTG,..."
329     @property string altAllelesAsString()
330     {
331         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
332 
333         string ret;
334         if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt
335 
336         char[] tmp;
337         tmp.length = this.line.d.m_allele;    // max allocated size in htslib
338 
339         char *a = this.line.d.allele[1];    // ref allele is index 0
340         const char *last = this.line.d.allele[this.line.n_allele - 1];    // pointer to start of last allele
341         size_t i = 0;
342 
343         assert( this.line.d.allele[1] == (this.line.d.als + this.line.rlen + 1) );  // ensue data structure is as we think (+1: past ref's term \0)
344 
345         while (i < this.line.d.m_allele)    // safety stop at max allocated size
346         {
347             if (*a) tmp[i] = *a;
348             else {  // '\0'
349                 if (a < last) tmp[i] = ',';
350                 else break;
351             }
352             i++;
353             a++;
354         }
355         tmp.length = i;
356 
357         return tmp.idup;
358     }
359     /// Set alleles; comma-separated list
360     @property void alleles(string a)
361     {
362         if (a == "") {
363             this.line.n_allele = 0;
364             if (this.line.d.m_allele) this.line.d.als[0] = '\0';    // if storage allocated, zero out the REF
365         }
366         else
367             bcf_update_alleles_str(this.vcfheader.hdr, this.line, toStringz(a));
368     }
369     /// Set alleles; array
370     @property void alleles(string[] a)
371     {
372         if (a.length == 0) {
373             this.line.n_allele = 0;
374             if (this.line.d.m_allele) this.line.d.als[0] = '\0';    // if storage allocated, zero out the REF
375         }
376         else {
377             const(char)*[64] allelesp;  // will fail if a locus has more than 63 ALT alleles :-O
378             foreach(i; 0 .. a.length ) {
379                 // In order to zero out all alleles, pass a zero-length allele to the string version,
380                 // or a zero-length array to this version. Zero length string member of nonempty allele array is an error.
381                 if (a[i].length == 0) {
382                     hts_log_error(__FUNCTION__, "Zero-length allele in nonempty allele array. Setting to '.'");
383                     allelesp[i] = ".".ptr;
384                 }
385                 else allelesp[i] = toStringz(a[i]);
386             }
387             bcf_update_alleles(this.vcfheader.hdr, this.line, &allelesp[0], cast(int)a.length);
388         }
389     }
390     /// Set REF allele only
391     /// param r is \0-term Cstring
392     /// TODO: UNTESTED
393     void setRefAllele(const(char)* r)
394     {
395         // first, get REF
396         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
397         // a valid record could have no ref (or alt) alleles
398         if (this.line.n_allele < 2) // if none or 1 (=REF only), just add the REF we receieved
399             bcf_update_alleles(this.vcfheader.hdr, this.line, &r, 1);
400         else {
401             // length of REF allele is allele[1] - allele[0], (minus one more for \0)
402             // TODO ** TODO ** : there is a property line.refLen already
403             const auto reflen = (this.line.d.allele[1] - this.line.d.allele[0]) - 1;
404             if (strlen(r) <= reflen) {
405                 memcpy(this.line.d.allele[0], r, reflen + 1);   // +1 -> copy a trailing \0 in case original REF was longer than new 
406                 // TODO: do we need to call _sync ?
407             } else {
408                 // slower way: replace allele[0] with r, but keep rest of pointers poitning at existing allele block,
409                 // then call bcf_update_alleles; this will make complete new copy of this.line.d.allele, so forgive the casts
410                 this.line.d.allele[0] = cast(char*) r;
411                 bcf_update_alleles(this.vcfheader.hdr, this.line,
412                     cast( const(char)** ) this.line.d.allele, this.line.n_allele);
413             }
414         }
415     }
416     /// Set alleles; alt can be comma separated
417     void setAlleles(string _ref, string alt)
418     {
419         immutable string alleles = _ref ~ "," ~ alt ~ "\0";
420         bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr);
421     }
422     /// Set alleles; min. 2 alleles (ref, alt1); unlimited alts may be specified
423     void setAlleles(string _ref, string alt, ...)
424     {
425         string alleles = _ref ~ "," ~ alt;
426 
427         for (int i = 0; i < _arguments.length; i++)
428         {
429             alleles ~= "," ~ va_arg!string(_argptr);
430         }
431 
432         alleles ~= "\0";
433 
434         bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr);
435     }
436 
437 
438     /* Quality (QUAL) */
439     /// Get variant quality (QUAL, column 6)
440     @property float qual()
441     out(result) { assert(result >= 0); }
442     do
443     {
444         if (this.line.max_unpack < BCF_UN_FLT) bcf_unpack(this.line, BCF_UN_FLT);
445         return this.line.qual;
446     }
447     /// Set variant quality (QUAL, column 6)
448     @property void qual(float q)
449     in { assert(q >= 0); }
450     do { this.line.qual = q; }
451 
452 
453     /* FILTER */
454     /// Get FILTER column (nothing in htslib sadly)
455     @property string filter()
456     {
457         const(char)[] ret;
458 
459         if (this.line.max_unpack < BCF_UN_FLT) bcf_unpack(this.line, BCF_UN_FLT);
460 
461         if (this.line.d.n_flt) {
462             for(int i; i< this.line.d.n_flt; i++) {
463                 if (i) ret ~= ";";
464                 ret ~= fromStringz(this.vcfheader.hdr.id[BCF_DT_ID][ this.line.d.flt[0] ].key);
465             }
466         } else {
467             ret = ".";
468         }
469 
470         return ret.idup;
471     }
472     /// Remove all entries in FILTER
473     void removeAllFilters()
474     {
475         // always returns zero
476         bcf_update_filter(this.vcfheader.hdr, this.line, null, 0);
477     }
478     /// Set the FILTER column to f
479     @property void filter(string f)
480     {
481         this.filter([f]);
482     }
483     /// Set the FILTER column to f0,f1,f2...
484     /// TODO: determine definitiely whether "." is replaced with "PASS"
485     @property void filter(string[] fs)
486     {
487         int[] fids;
488         foreach(f; fs) {
489             if(f == "") continue;
490             const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
491             if (fid == -1) hts_log_warning(__FUNCTION__, format("filter not found in header (ignoring): %s", f) );
492             else fids ~= fid;
493         }
494         if (fids.length > 0)
495             bcf_update_filter(this.vcfheader.hdr, this.line, fids.ptr, cast(int)fids.length);
496         else
497             hts_log_warning(__FUNCTION__, "No FILTER update was performed due to empty list");
498     }
499     /// Add a filter; from htslib: 
500     /// "If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed."
501     int addFilter(string f)
502     {
503         return bcf_add_filter(this.vcfheader.hdr, this.line, 
504             bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f)));
505     }
506     /// Remove a filter by name
507     int removeFilter(string f)
508     {
509         const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
510         return removeFilter(fid);
511     }
512     /// Remove a filter by numeric id
513     int removeFilter(int fid)
514     {
515         return bcf_remove_filter(this.vcfheader.hdr, this.line, fid, 0);
516     }
517     /// Determine whether FILTER is present. log warning if filter does not exist. "PASS" and "." can be used interchangeably.
518     bool hasFilter(string filter)
519     {
520         char[] f = filter.dup ~ '\0';
521         //const auto id = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, f.ptr);
522         const auto ret = bcf_has_filter(this.vcfheader.hdr, this.line, f.ptr);
523 
524         if (ret > 0) return true;
525         else if (ret == 0) return false;
526         else {
527             hts_log_warning(__FUNCTION__, format("FILTER %s does not exist in the header", filter));
528             return false;
529         }
530     }
531 
532 
533     /** Update INFO (pan-sample info; column 8) 
534      *
535      *  Add a tag:value to the INFO column
536      *  NOTE: tag must already exist in the header
537      *
538      *  Templated on data type, calls one of bcf_update_info_{int32,float,string,flag}
539      *  Both singletons and arrays are supported.
540     */
541     void addInfo(T)(string tag, T data)
542     {
543         int ret = -1;
544 
545         static if(isIntegral!T) {
546             auto integer = cast(int) data;
547             ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1);
548         }
549         
550         else static if(isFloatingPoint!T) {
551             auto flt = cast(float) data;    // simply passing "2.0" (or whatever) => data is a double
552             ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1);
553         }
554 
555         else static if(isSomeString!T)
556             ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), toStringz(data));
557         
558         else static if(isBoolean!T) {
559             immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 
560             ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set);
561         }
562         
563         if (ret == -1)
564             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
565     }
566     /// ditto
567     void addInfo(T)(string tag, T[] data)
568     if(!is(T==immutable(char)))             // otherwise string::immutable(char)[] will match the other template
569     {
570         int ret = -1;
571 
572         static if(isIntegral!T) {
573             auto integer = cast(int) data;
574             ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, data.length);
575         }
576         
577         else static if(isFloatingPoint!T) {
578             auto flt = cast(float) data;    // simply passing "2.0" (or whatever) => data is a double
579             ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, data.length);
580         }
581         
582         if (ret == -1)
583             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
584     }
585 
586     /** Update FORMAT (sample info; column 9+)
587      *  
588      *  Templated on data type, calls one of bc_update_format_{int32,float,string,flag}
589     */
590     void addFormat(T)(string tag, T data)
591     if(!isArray!T)
592     {
593         int ret = -1;
594 
595         static if(isIntegral!T) {
596             auto integer = cast(int) data;
597             ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1);
598         }
599         
600         else static if(isFloatingPoint!T) {
601             auto flt = cast(float) data;    // simply passing "2.0" (or whatever) => data is a double
602             ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1);
603         }
604 
605         else static if(isSomeString!T)
606             ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), toStringz(data));
607         
608         else static if(isBoolean!T) {
609             immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 
610             ret = bcf_update_format_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set);
611         }
612         
613         if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data));
614     }
615     /// ditto
616     void addFormat(T)(string tag, T[] data)
617     {
618                 int ret = -1;
619 
620         static if(isIntegral!T) {
621             auto integer = cast(int[]) data;
622             ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag),
623                                             integer.ptr, cast(int)data.length);
624         }
625         
626         else static if(isFloatingPoint!T) {
627             auto flt = cast(float[]) data;    // simply passing "2.0" (or whatever) => data is a double
628             ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag),
629                                             flt.ptr, cast(int)data.length);
630         }
631         
632         if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data));
633 
634     }
635 
636     /// add INFO or FORMAT key:value pairs to a record
637     /// add a single datapoint OR vector of values, OR, values to each sample (if tagType == FORMAT)
638     void add(string tagType, T)(const(char)[] tag, T data)
639     if((tagType == "INFO" || tagType == "FORMAT") &&
640         (isIntegral!T       || isIntegral!(ElementType!T)   ||
641         isFloatingPoint!T   || isFloatingPoint!(ElementType!T) ||
642         isSomeString!T      || isSomeString!(ElementType!T) ||
643         isBoolean!T         || isBoolean!(ElementType!T)))
644     {
645         int ret = -1;
646         int len;
647 
648         static if (!isDynamicArray!T) {
649             len = 1;
650             static if (isIntegral!T) auto d = data.to!int;
651             else static if (isFloatingPoint!T) auto d = data.to!float;
652             else static if (isSomeString!T) auto d = toStringz(data);
653             else static if (isBoolean!T) immutable int d = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag
654             else static assert(0);
655 
656             const auto ptr = &d;
657         }
658         else static if (isDynamicArray!T) {
659             assert(data.length < int.max);
660             len = cast(int) data.length;
661             static if (isIntegral!(ElementType!T)) auto d = data.to!(int[]);
662             else static if (isFloatingPoint!(ElementType!T)) auto d = data.to!(float[]);
663             else static if (isSomeString!(ElementType!T)) {
664                 char[] d;
665                 foreach(s; data) {
666                     d ~= s ~ "\0";
667                 }
668                 if(d.length == 0) d ~= "\0";    // TODO replace with asserts on data length earlier
669             }
670             else static if (isBoolean!(ElementType!T)) {
671                 int[] d;
672                 foreach(b; data) {
673                     d ~= (b ? 1 : 0);
674                 }
675             }
676 
677             const auto ptr = d.ptr;
678         }
679         else static assert(0);
680 
681         static if (tagType == "INFO") {
682             static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int))
683                 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
684             else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float))
685                 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
686             else static if (isSomeString!T || isSomeString!(ElementType!T))
687                 ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr);
688             else static if (is(T == bool) || is(ElementType!T == bool))
689                 ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
690             else static assert(0, "Type not recognized for INFO tag");
691         }
692         else static if (tagType == "FORMAT") {
693             static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int))
694                 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
695             else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float))
696                 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
697             else static if (isSomeString!T || isSomeString!(ElementType!T))
698                 ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
699             else static assert(0, "Type not recognized for FORMAT tag");
700         }
701         else static assert(0);
702 
703         if (ret == -1)
704             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
705     }
706 
707     /// Return a string representation of the VCFRecord (i.e. as would appear in .vcf)
708     ///
709     /// As a bonus, there is a kstring_t memory leak
710     override string toString() const
711     {
712         kstring_t s;
713 
714         const int ret = vcf_format(this.vcfheader.hdr, this.line, &s);
715         if (ret)
716         {
717             hts_log_error(__FUNCTION__,
718                 format("vcf_format returned nonzero (%d) (likely EINVAL, invalid bcf1_t struct?)", ret));
719             return "[VCFRecord vcf_format parse_error]";
720         }
721 
722         return cast(string) s.s[0 .. s.l];
723     }
724 }
725 
726 /** Basic support for writing VCF, BCF files */
727 struct VCFWriter
728 {
729     // htslib data structures
730     vcfFile     *fp;    /// htsFile
731     //bcf_hdr_t   *hdr;   /// header
732     VCFHeader   *vcfhdr;    /// header wrapper -- no copies
733     bcf1_t*[]    rows;   /// individual records
734 
735     @disable this();
736     /// open file or network resources for writing
737     /// setup incl header allocation
738     /// if mode==w:
739     ///     bcf_hdr_init automatically sets version string (##fileformat=VCFv4.2)
740     ///     bcf_hdr_init automatically adds the PASS filter
741     this(string fn)
742     {
743         if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor");
744         this.fp = vcf_open(toStringz(fn), toStringz("w"c));
745         if (!this.fp) throw new Exception("Could not hts_open file");
746 
747         this.vcfhdr = new VCFHeader( bcf_hdr_init("w\0"c.ptr));
748         addFiledate();
749         bcf_hdr_sync(this.vcfhdr.hdr);
750 
751         /+
752         hts_log_trace(__FUNCTION__, "Displaying header at construction");
753         //     int bcf_hdr_format(const(bcf_hdr_t) *hdr, int is_bcf, kstring_t *str);
754         kstring_t ks;
755         bcf_hdr_format(this.vcfhdr.hdr, 0, &ks);
756         char[] hdr;
757         hdr.length = ks.l;
758         import core.stdc.string: memcpy;
759         memcpy(hdr.ptr, ks.s, ks.l);
760         import std.stdio: writeln;
761         writeln(hdr);
762         +/
763     }
764     /// setup and copy a header from another BCF/VCF as template
765     this(T)(string fn, T other)
766     if(is(T == VCFHeader*) || is(T == bcf_hdr_t*))
767     {
768         if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor");
769         this.fp = vcf_open(toStringz(fn), toStringz("w"c));
770         if (!this.fp) throw new Exception("Could not hts_open file");
771 
772         static if(is(T == VCFHeader*)) { this.vcfhdr      = new VCFHeader( bcf_hdr_dup(other.hdr) ); }
773         else static if(is(T == bcf_hdr_t*)) { this.vcfhdr = new VCFHeader( bcf_hdr_dup(other) ); }
774     }
775     /// dtor
776     ~this()
777     {
778         const ret = vcf_close(this.fp);
779         if (ret != 0) hts_log_error(__FUNCTION__, "couldn't close VCF after writing");
780 
781         // Deallocate header
782         //bcf_hdr_destroy(this.hdr);
783         // 2018-09-15: Do not deallocate header; will be free'd by VCFHeader dtor
784     }
785     invariant
786     {
787         assert(this.vcfhdr != null);
788     }
789 
790     VCFHeader* getHeader()
791     {
792         return this.vcfhdr;
793     }
794 
795     // TODO
796     /// copy header lines from a template without overwiting existing lines
797     void copyHeaderLines(bcf_hdr_t *other)
798     {
799         assert(this.vcfhdr != null);
800         assert(0);
801         //    bcf_hdr_t *bcf_hdr_merge(bcf_hdr_t *dst, const(bcf_hdr_t) *src);
802     }
803 
804     /// Add sample to this VCF
805     /// * int bcf_hdr_add_sample(bcf_hdr_t *hdr, const(char) *sample);
806     int addSample(string name)
807     in { assert(name != ""); }
808     do
809     {
810         assert(this.vcfhdr != null);
811 
812         bcf_hdr_add_sample(this.vcfhdr.hdr, toStringz(name));
813 
814         // AARRRRGGGHHHH
815         // https://github.com/samtools/htslib/issues/767
816         bcf_hdr_sync(this.vcfhdr.hdr);
817 
818         return 0;
819     }
820 
821     /// Add a new header line
822     int addHeaderLineKV(string key, string value)
823     {
824         // TODO check that key is not INFO, FILTER, FORMAT (or contig?)
825         string line = format("##%s=%s", key, value);
826 
827         return bcf_hdr_append(this.vcfhdr.hdr, toStringz(line));
828     }
829     /// Add a new header line -- must be formatted ##key=value
830     int addHeaderLineRaw(string line)
831     {
832         assert(this.vcfhdr != null);
833         //    int bcf_hdr_append(bcf_hdr_t *h, const(char) *line);
834         const auto ret = bcf_hdr_append(this.vcfhdr.hdr, toStringz(line));
835         bcf_hdr_sync(this.vcfhdr.hdr);
836         return ret;
837     }
838     /// Add a filedate= headerline, which is not called out specifically in  the spec,
839     /// but appears in the spec's example files. We could consider allowing a param here.
840     int addFiledate()
841     {
842         return addHeaderLineKV("filedate", (cast(Date) Clock.currTime()).toISOString );
843     }
844     
845     /** Add INFO (§1.2.2) or FORMAT (§1.2.4) tag
846 
847     The INFO tag describes row-specific keys used in the INFO column;
848     The FORMAT tag describes sample-specific keys used in the last, and optional, genotype column.
849 
850     Template parameter: string; must be INFO or FORMAT
851 
852     The first four parameters are required; NUMBER and TYPE have specific allowable values.
853     source and version are optional, but recommended (for INFO only).
854 
855     *   id:     ID tag
856     *   number: NUMBER tag; here a string because it can also take special values {A,R,G,.} (see §1.2.2)
857     *   type:   Integer, Float, Flag, Character, and String
858     *   description: Text description; will be double quoted
859     *   source:      Annotation source  (eg dbSNP)
860     *   version:     Annotation version (eg 142)
861     */
862     void addTag(string tagType, T)( string id,
863                                     T number,
864                                     string type,
865                                     string description,
866                                     string source="",
867                                     string _version="")
868     if((tagType == "INFO" || tagType == "FORMAT") && (isIntegral!T || isSomeString!T))
869     {
870         string line;    //  we'll suffix with \0, don't worry
871 
872         // check ID
873         if (id == "") {
874             hts_log_error(__FUNCTION__, "no ID");
875             return;
876         }
877 
878         // check Number is either a special code {A,R,G,.} or an integer
879         static if(isSomeString!T) {
880         if (number != "A" &&
881             number != "R" &&
882             number != "G" &&
883             number != ".") {
884                 // not a special ; check if integer
885                 try {
886                     number.to!int;  // don't need to store result, will use format/%s
887                 }
888                 catch (ConvException e) {
889                     hts_log_error(__FUNCTION__, "Number not A/R/G/. nor an integer");
890                     return;
891                 }
892         }
893         }
894 
895         // check Type
896         if (type != "Integer" &&
897             type != "Float" &&
898             type != "Flag" &&
899             type != "Character" &&
900             type != "String") {
901                 hts_log_error(__FUNCTION__, "unrecognized type");
902                 return;
903         }
904 
905         // check Description
906         if (description == "") hts_log_error(__FUNCTION__, "no description");
907 
908         // check Source and Version
909         if (source == "" && _version != "") hts_log_error(__FUNCTION__, "version wo source");
910 
911         // Format params
912         if (source != "" && _version != "")
913             line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\",Source=\"%s\",Version=\"%s\">\0",
914                             tagType, id, number, type, description, source, _version);
915         else if (source != "" && _version == "")
916             line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\",Source=\"%s\">\0",
917                             tagType, id, number, type, description, source);
918         else
919             line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\">\0",
920                             tagType, id, number, type, description);
921 
922         bcf_hdr_append(this.vcfhdr.hdr, line.ptr);
923     }
924 
925     /** Add FILTER tag (§1.2.3) */
926     void addTag(string tagType)(string id, string description)
927     if(tagType == "FILTER")
928     {
929         // check ID
930         if (id == "") {
931             hts_log_error(__FUNCTION__, "no ID");
932             return;
933         }
934         // check Description
935         if (description == "") hts_log_error(__FUNCTION__, "no description");
936 
937         string line = format("##FILTER=<ID=%s,Description=\"%s\">\0", id, description);
938         bcf_hdr_append(this.vcfhdr.hdr, line.ptr);
939     }
940 
941     /** Add FILTER tag (§1.2.3) */
942     deprecated void addFilterTag(string id, string description)
943     {
944         string filter = format("##FILTER=<ID=%s,Description=\"%s\">\0", id, description);
945         bcf_hdr_append(this.vcfhdr.hdr, filter.ptr);
946     }
947 
948     /** Add contig definition (§1.2.7) to header meta-info 
949     
950         other: "url=...,md5=...,etc."
951     */
952     auto addTag(string tagType)(const(char)[] id, const int length = 0, string other = "")
953     if(tagType == "contig" || tagType == "CONTIG")
954     {
955         const(char)[] contig = "##contig=<ID=" ~ id ~
956             (length > 0  ? ",length=" ~ length.to!string : "") ~
957             (other != "" ? "," ~ other : "") ~
958             ">\0";
959         
960         return bcf_hdr_append(this.vcfhdr.hdr, contig.ptr);
961     }
962     
963     /**
964         Add a record
965 
966         alleles:    comma-separated string, including ref allele
967         qual:       a float, but accepts any numeric
968     */
969     int addRecord(S, N)(S contig, int pos, S id, S alleles, N qual, S[] filters)
970     if ( (isSomeString!S || is(S == char*) ) && isNumeric!N)
971     {        
972         bcf1_t *line = new bcf1_t;
973 
974         line.rid = bcf_hdr_name2id(this.vcfhdr.hdr, toStringz(contig));
975         if (line.rid == -1) hts_log_error(__FUNCTION__, "contig not found");
976 
977         line.pos = pos;
978 
979         bcf_update_id(this.vcfhdr.hdr, line, (id == "" ? null : toStringz(id)) );  // TODO: could support >1 id with array as with the filters
980 
981         bcf_update_alleles_str(this.vcfhdr.hdr, line, toStringz(alleles));
982 
983         line.qual = qual;
984 
985         // Update filter(s); if/else blocks for speed
986         if(filters.length == 0)
987         {
988             int pass = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz("PASS"c));
989             bcf_update_filter(this.vcfhdr.hdr, line, &pass, 1);
990         }
991         else if(filters.length == 1)
992         {
993             int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(filters[0]));
994             if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", filters[0]) );
995             bcf_update_filter(this.vcfhdr.hdr, line, &fid, 1);
996         }
997         else    // TODO: factor out the check for -1 into a safe_update_filter or something
998         {
999             int[] filter_ids;
1000             foreach(f; filters) {
1001                 const int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(f));
1002                 if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", f) );
1003                 else filter_ids ~= fid;
1004             }
1005             bcf_update_filter(this.vcfhdr.hdr, line, filter_ids.ptr, cast(int)filter_ids.length );
1006         }
1007 
1008         // Add a record
1009         /+
1010         // .. FORMAT
1011         bcf_hdr_append(hdr, "##FORMAT=<ID=TF,Number=1,Type=Float,Description=\"Test Float\">");
1012         float[4] test;
1013         bcf_float_set_missing(test[0]);
1014         test[1] = 47.11f;
1015         bcf_float_set_vector_end(test[2]);
1016         writeln("pre update format float");
1017         bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("TF"), &test[0], 4);
1018         +/
1019         int tmpi = 1;
1020         bcf_update_info_int32(this.vcfhdr.hdr, line, toStringz("NS"c), &tmpi, 1);
1021 
1022         // Add the actual sample
1023         int[4] dp = [ 9000, 1, 2, 3];
1024         bcf_update_format(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1, BCF_HT_INT);
1025         //int dp = 9000;
1026         //bcf_update_format_int32(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1);
1027         //auto f = new float;
1028         //*f = 1.0;
1029         //bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("XF"c), f, 1);
1030 
1031         this.rows ~= line;
1032 
1033         return 0;
1034     }
1035 
1036     /// as expected
1037     int writeHeader()
1038     {
1039         return bcf_hdr_write(this.fp, this.vcfhdr.hdr);
1040     }
1041     /// as expected
1042     int writeRecord(ref VCFRecord r)
1043     {
1044         const ret = bcf_write(this.fp, this.vcfhdr.hdr, r.line);
1045         if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error");
1046         return ret;
1047     }
1048     /// as expected
1049     int writeRecord(bcf_hdr_t *hdr, bcf1_t *rec)
1050     {
1051         hts_log_warning(__FUNCTION__, "pre call");
1052         const ret = bcf_write(this.fp, hdr, rec);
1053         hts_log_warning(__FUNCTION__, "post call");
1054         if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error");
1055         return ret;
1056     }
1057 }
1058 
1059 /** Basic support for reading VCF, BCF files */
1060 struct VCFReader
1061 {
1062     // htslib data structures
1063     vcfFile     *fp;    /// htsFile
1064     //bcf_hdr_t   *hdr;   /// header
1065     VCFHeader   *vcfhdr;    /// header wrapper -- no copies
1066     bcf1_t* b;          /// record for use in iterator, will be recycled
1067 
1068     int MAX_UNPACK;     /// see htslib.vcf
1069 
1070     private static int refct;
1071 
1072     this(this)
1073     {
1074         this.refct++;
1075     }
1076     @disable this();
1077     /// read existing VCF file
1078     /// MAX_UNPACK: setting alternate value could speed reading
1079     this(string fn, int MAX_UNPACK = BCF_UN_ALL)
1080     {
1081         import htslib.hts : hts_set_threads;
1082 
1083         if (fn == "") throw new Exception("Empty filename passed to VCFReader constructor");
1084         this.fp = vcf_open(toStringz(fn), "r"c.ptr);
1085         if (!this.fp) throw new Exception("Could not hts_open file");
1086         
1087         hts_set_threads(this.fp, 1);    // extra decoding thread
1088 
1089         this.vcfhdr = new VCFHeader( bcf_hdr_read(this.fp));
1090 
1091         this.b = bcf_init1();
1092         this.b.max_unpack = MAX_UNPACK;
1093         this.MAX_UNPACK = MAX_UNPACK;
1094     }
1095     /// dtor
1096     ~this()
1097     {
1098         this.refct--;
1099 
1100         // block file close and bcf1_t free() with reference counting
1101         // to allow VCFReader to implement Range interface
1102         if(!this.refct) {
1103             const ret = vcf_close(this.fp);
1104             if (ret != 0) hts_log_error(__FUNCTION__, "couldn't close VCF after reading");
1105 
1106             // Deallocate header
1107             //bcf_hdr_destroy(this.hdr);
1108             // 2018-09-15: Do not deallocate header; will be free'd by VCFHeader dtor
1109 
1110             bcf_destroy1(this.b);
1111         }
1112     }
1113     invariant
1114     {
1115         assert(this.vcfhdr != null);
1116     }
1117 
1118     VCFHeader* getHeader()
1119     {
1120         return this.vcfhdr;
1121     }
1122     
1123     /** VCF version, e.g. VCFv4.2 */
1124     @property string vcfVersion() { return cast(string) fromStringz( bcf_hdr_get_version(this.vcfhdr.hdr) ).idup; }
1125 
1126     /++
1127         bcf_hrec_t *bcf_hdr_get_hrec(const(bcf_hdr_t) *hdr, int type, const(char) *key, const(char) *value,
1128                                                                                     const(char) *str_class);
1129     +/
1130     // TODO: check key for "", fail if empty
1131     // TODO: check value, str_class for "" and replace with NULL
1132     // TODO: Memory leak. We are never freeing the bcf_hrec_t, but, it escapes as pointer inside key/value string
1133     // TODO: handle structured and general lines
1134     string[string] getTagByKV(string tagType, T)(string key, string value, string str_class)
1135     if((tagType == "FILTER" || tagType == "INFO" || tagType == "FORMAT" || tagType == "contig") &&
1136         (isIntegral!T || isSomeString!T))
1137     {
1138         // hlt : header line type
1139         static if (tagType == "FILTER")     const int hlt = BCF_HL_FLT;
1140         else static if (tagType == "INFO")  const int hlt = BCF_HL_INFO; // @suppress(dscanner.suspicious.label_var_same_name)
1141         else static if (tagType == "FORMAT") const int hlt= BCF_HL_FMT; // @suppress(dscanner.suspicious.label_var_same_name)
1142         else static if (tagType == "contig") const int hlt= BCF_HL_CTG; // @suppress(dscanner.suspicious.label_var_same_name)
1143         else assert(0);
1144 
1145         bcf_hrec_t *hrec = bcf_hdr_get_hrec(this.vcfhdr.hdr, hlt,   toStringz(key),
1146                                                                     toStringz(value),
1147                                                                     toStringz(str_class));
1148 
1149         const int nkeys = hrec.nkeys;
1150         string[string] kv;
1151 
1152         foreach(int i; 0 .. nkeys) {
1153             string k = cast(string) fromStringz(hrec.keys[i]);
1154             string v = cast(string) fromStringz(hrec.vals[i]); 
1155             kv[k] = v;
1156         }
1157 
1158         return kv;
1159     }
1160 
1161     /// InputRange interface: iterate over all records
1162     @property bool empty()
1163     {
1164         //     int bcf_read(htsFile *fp, const(bcf_hdr_t) *h, bcf1_t *v);
1165         // documentation claims returns -1 on critical errors, 0 otherwise
1166         // however it looks like -1 is EOF and -2 is critical errors?
1167         immutable success = bcf_read(this.fp, this.vcfhdr.hdr, this.b);
1168         if (success >= 0) return false;
1169         else if (success == -1) {
1170             // EOF
1171             // see htslib my comments https://github.com/samtools/htslib/issues/246
1172             // and commit 9845bc9a947350d0f34e6ce69e79ab81b6339bd2
1173             return true;
1174         }
1175         else {
1176             hts_log_error(__FUNCTION__, "*** CRITICAL ERROR bcf_read < -1");
1177             // TODO: check b->errcode
1178             return true;
1179         }
1180     }
1181     /// ditto
1182     void popFront()
1183     {
1184         // noop? 
1185         // free this.b ?
1186         //bam_destroy1(this.b);
1187 
1188         // TODO: clear (not destroy) the bcf1_t for reuse?
1189     }
1190     /// ditto
1191     VCFRecord front()
1192     {
1193         // note that VCFRecord's constructor will be responsible for
1194         // * unpacking and
1195         // * destroying
1196         // its copy
1197         return new VCFRecord(this.vcfhdr, bcf_dup(this.b), this.MAX_UNPACK);
1198     }
1199 }
1200 
1201 ///
1202 debug(dhtslib_unittest)
1203 unittest
1204 {
1205     import std.exception: assertThrown;
1206     import std.stdio: writeln, writefln;
1207 
1208     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1209 
1210 
1211     auto vw = VCFWriter("/dev/null");
1212 
1213     vw.addHeaderLineRaw("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">");
1214     vw.addHeaderLineKV("INFO", "<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">");
1215     // ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
1216     vw.addTag!"INFO"("AF", "A", "Integer", "Number of Samples With Data");
1217     vw.addHeaderLineRaw("##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>"); // @suppress(dscanner.style.long_line)
1218     vw.addHeaderLineRaw("##FILTER=<ID=q10,Description=\"Quality below 10\">");
1219 
1220     // Exercise header
1221     assert(vw.vcfhdr.nsamples == 0);
1222     vw.addSample("NA12878");
1223     assert(vw.vcfhdr.nsamples == 1);
1224 
1225     auto r = new VCFRecord(vw.vcfhdr, bcf_init1());
1226     
1227     r.chrom = "20";
1228     assert(r.chrom == "20");
1229     assertThrown(r.chrom = "chr20");   // headerline chromosome is "20" not "chr20"
1230 
1231     r.pos = 999_999;
1232     assert(r.pos == 999_999);
1233     r.pos = 62_435_964 + 1;     // Exceeds contig length
1234 
1235     // Test ID field
1236     // note that in an empty freshly initialized bcf1_t, the ID field is "" rather than "."
1237     // Will consider adding initialization code
1238     //writefln("ID: %s", r.id);
1239     r.id = "";
1240     assert(r.id == ".");
1241     r.id = ".";
1242     assert(r.id == ".");
1243     r.id = "rs001";
1244     assert(r.id == "rs001");
1245     r.addID("rs999");
1246     assert(r.id == "rs001;rs999");
1247     r.addID("rs001");   // test duplicate checking
1248     assert(r.id == "rs001;rs999");
1249 
1250     // Test REF/ALT allele setters and getters
1251     // many overloads to test for good coverage
1252     // Test matrix: Set {zero, one, two, three} alleles * {set by string, set by array} = 8 test cases
1253     //  * also, there is setAlleles(ref, alt) and setAlleles(ref, alt1, ...) 
1254     //  * TODO: will also need to retreive alt alleles as array and string
1255 
1256     string[] alleles_array;
1257 
1258     // Zero alleles
1259     r.alleles("");
1260     const auto zs = r.allelesAsArray;
1261     const auto zsr = r.refAllele;
1262     assert(zs.length == 0);
1263     assert(zsr == "");
1264 
1265     r.alleles(alleles_array);
1266     const auto za = r.allelesAsArray;
1267     const auto zar = r.refAllele;
1268     assert(za.length == 0);
1269     assert(zar == "");
1270     
1271     // One allele
1272     r.alleles("C");
1273     const auto os = r.allelesAsArray;
1274     const auto osr = r.refAllele;
1275     assert(os.length == 1 && os[0] == "C");
1276     assert(osr == "C");
1277 
1278     r.alleles(["C"]);
1279     const auto oa = r.allelesAsArray;
1280     const auto oar = r.refAllele;
1281     assert(oa.length == 1 && oa[0] == "C");
1282     assert(oar == "C");
1283 
1284     // Two alleles
1285     r.alleles("C,T");
1286     const auto ts = r.allelesAsArray;
1287     const auto tsr= r.refAllele;
1288     assert(ts == ["C", "T"]);
1289     assert(tsr== "C");
1290 
1291     r.alleles(["C", "T"]);
1292     const auto ta = r.allelesAsArray;
1293     const auto tar= r.refAllele;
1294     assert(ta == ["C", "T"]);
1295     assert(tar== "C");
1296 
1297     const taaa = r.altAllelesAsArray;
1298     const taas = r.altAllelesAsString;
1299     assert(taaa == ["T"]);
1300     assert(taas == "T");
1301 
1302     // alternate setter for >= 2 alleles
1303     r.setAlleles("A", "G");
1304     assert(r.allelesAsArray == ["A", "G"]);
1305 
1306     // Three alleles
1307     r.alleles("A,C,T");
1308     assert(r.allelesAsArray == ["A", "C", "T"]);
1309     assert(r.refAllele == "A");
1310     assert(r.altAllelesAsString == "C,T");
1311 
1312     // alternate the alleles for testing purposes
1313     r.alleles(["G", "A", "C"]);
1314     assert(r.allelesAsArray == ["G", "A", "C"]);
1315     assert(r.refAllele == "G");
1316     assert(r.altAllelesAsString == "A,C");
1317 
1318     r.setAlleles("A", "C", "T");
1319     assert(r.allelesAsArray == ["A", "C", "T"]);
1320     assert(r.refAllele == "A");
1321     assert(r.altAllelesAsString == "C,T");
1322 
1323 
1324     // Test QUAL
1325     r.qual = 1.0;
1326     assert(r.qual == 1.0);
1327     // now test setting qual without unpacking
1328     // TODO: see https://forum.dlang.org/post/hebouvswxlslqhovzaia@forum.dlang.org, once resolved (or once factory function written),
1329     //  add template value param BCF_UN_STR
1330     auto rr = new VCFRecord(vw.vcfhdr, "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;AF=0.017\n"); // @suppress(dscanner.style.long_line)
1331     rr.qual = 3.0;
1332     assert(rr.qual == 3.0);
1333 
1334 
1335     // Test FILTER
1336     assert(r.hasFilter("PASS"));
1337     assert(r.hasFilter("."));
1338     // add q10 filter (is in header)
1339     r.filter = "q10";
1340     assert(r.filter == "q10");
1341     assert(!r.hasFilter("PASS"));   // if has another filter, no longer PASS (unless added explicitly)
1342     assert(!r.hasFilter("."));      // if has another filter, no longer has . (which is PASS, or no filter [spec conflict?])
1343 
1344     // add q30 filteR (not in header)
1345     r.filter = "q30";
1346     assert(r.filter == "q10");  // i.e., unchanged
1347 
1348     assert(r.hasFilter("q10"));
1349     assert(!r.hasFilter("q30"));
1350 
1351     r.removeAllFilters();
1352     assert(r.hasFilter("."));
1353     r.addFilter("q10");
1354     assert(r.hasFilter("q10"));
1355     r.removeFilter("q10");
1356     assert(r.hasFilter("PASS"));
1357 
1358 
1359     // Finally, print the records:
1360     writefln("VCF records via toString:\n%s%s", r, rr);
1361 
1362     writeln("\ndhtslib.vcf: all tests passed\n");
1363 }