1 /**
2 
3 VCFv4.2 (including BCF) reader and writer.
4 
5 Specifications: https://samtools.github.io/hts-specs/VCFv4.2.pdf
6 
7 */
8 module dhtslib.vcf;
9 
10 import core.stdc.string;
11 import core.vararg;
12 
13 import std.conv: to, ConvException;
14 import std.datetime;
15 import std.format: format;
16 import std.range: ElementType;
17 import std.string: fromStringz, toStringz;
18 import std.traits: isArray, isDynamicArray, isBoolean, isIntegral, isFloatingPoint, isNumeric, isSomeString;
19 import std.traits: Unqual;
20 
21 import dhtslib.htslib.hts_log;
22 import dhtslib.htslib.kstring;
23 import dhtslib.htslib.vcf;
24 
25 alias BCFRecord = VCFRecord;
26 alias BCFWriter = VCFWriter;
27 
28 /** Wrapper around bcf_hdr_t
29 
30     Q: Why do we have VCFHeader, but not SAMHeader?
31     A: Most (all)? of the SAM (bam1_t) manipulation functions do not require a ptr to the header,
32     whereas almost all of the VCF (bcf1_t) manipulation functions do. Therefore, we track bcf_hdr_t*
33     inside each VCFRecord; this is wrapped by VCFHeader for future convenience (for example,
34     now we have @property nsamples; may move the tag reader and writer functions here?)
35 
36     In order to avoid double free()'ing an instance bcf_hdr_t,
37     this wrapper will be the authoritative holder of of bcf_hdr_t ptrs,
38     it shall be passed around by reference, and copies are disabled.
39 */
40 struct VCFHeader
41 {
42     /// Pointer to htslib BCF/VCF header struct; will be freed from VCFHeader dtor 
43     bcf_hdr_t *hdr;
44 
45     // Copies have to be disabled to avoid double free()
46     @disable this(this);
47 
48     ~this()
49     {
50         // Deallocate header
51         if (this.hdr != null) bcf_hdr_destroy(this.hdr);
52     }
53 
54     invariant
55     {
56         assert(this.hdr != null);
57     }
58 
59     /// List of contigs in the header
60     @property string[] sequences()
61     {
62         import core.stdc.stdlib : free;
63         int nseqs;
64 
65         /** Creates a list of sequence names. It is up to the caller to free the list (but not the sequence names) */
66         //const(char) **bcf_hdr_seqnames(const(bcf_hdr_t) *h, int *nseqs);
67         const(char) **ary = bcf_hdr_seqnames(this.hdr, &nseqs);
68         if (!nseqs) return [];
69 
70         string[] ret;
71         ret.reserve(nseqs);
72 
73         for(int i; i < nseqs; i++) {
74             ret ~= fromStringz(ary[i]).idup;
75         }
76 
77         free(ary);
78         return ret;        
79     }
80 
81     /// Number of samples in the header
82     pragma(inline, true)
83     @property int nsamples() { return bcf_hdr_nsamples(this.hdr); }
84 
85 }
86 
87 /** Wrapper around bcf1_t 
88 
89     Because it uses bcf1_t internally, it must conform to the BCF2 part
90     of the VCFv4.2 specs, rather than the loosey-goosey VCF specs. i.e.,
91     INFO, CONTIG, FILTER records must exist in the header.
92 
93     TODO: Does this need to be kept in a consistent state?
94     Ideally, VCFWriter would reject invalid ones, but we are informed
95     that it is invalid (e.g. if contig not found) while building this
96     struct; bcf_write1 will actually segfault, unfortunately. I'd like
97     to avoid expensive validate() calls for every record before writing
98     if possible, which means keeping this consistent. However, not
99     sure what to do if error occurs when using the setters herein?
100 
101     2019-01-23 struct->class to mirror SAMRecord -- faster if reference type?
102 
103     2019-01-23 WIP: getters for chrom, pos, id, ref, alt are complete (untested)
104 
105 After parsing a BCF or VCF line, bcf1_t must be unpacked. (not applicable when building bcf1_t from scratch)
106 Depending on information needed, this can be done to various levels with performance tradeoff.
107 Unpacking symbols:
108 BCF_UN_ALL: all
109 BCF_UN_SHR: all shared information (BCF_UN_STR|BCF_UN_FLT|BCF_UN_INFO)
110 
111                         BCF_UN_STR
112                        /               BCF_UN_FLT
113                       |               /      BCF_UN_INFO
114                       |              |      /       ____________________________ BCF_UN_FMT
115                       V              V     V       /       |       |       |
116 #CHROM  POS ID  REF ALT QUAL    FILTER  INFO    FORMAT  NA00001 NA00002 NA00003 ...
117 
118 */
119 class VCFRecord
120 {
121     bcf1_t* line;   /// htslib structured record TODO: change to 'b' for better internal consistency? (vcf.h/c actually use line quite a bit in fn params)
122 
123     VCFHeader *vcfheader;   /// corresponding header (required);
124                             /// is ptr to avoid copying struct containing ptr to bcf_hdr_t (leads to double free())
125 
126     /** VCFRecord
127 
128     Construct a bcf/vcf record, backed by bcf1_t, from: an existing bcf1_t, parameters, or a VCF line.
129 
130     Internal backing by bcf1_t means it must conform to the BCF2 rules -- i.e., header must contain
131     appropriate INFO, CONTIG, and FILTER lines.
132 
133     Protip: specifying alternate MAX_UNPACK can speed it tremendously
134         as it will not unpack all fields, only up to those requested (see htslib.vcf)
135         For example, BCF_UN_STR is up to ALT inclusive, and BCF_UN_STR is up to FILTER
136     */
137     this(T)(T *h, bcf1_t *b, int MAX_UNPACK = BCF_UN_ALL)
138     if(is(T == VCFHeader) || is(T == bcf_hdr_t))
139     {
140         static if (is(T == VCFHeader)) this.vcfheader = h;
141         //else static if (is(T == bcf_hdr_t)) this.vcfheader = new VCFHeader(h); // double free() bug if we don't own bcf_hdr_t h
142         else static if (is(T == bcf_hdr_t)) assert(0);  // ferret out situations that will lead to free() crashes
143         else assert(0);
144 
145         this.line = b;
146 
147         // Now it must be unpacked
148         // Protip: specifying alternate MAX_UNPACK can speed it tremendously
149         immutable int ret = bcf_unpack(this.line, MAX_UNPACK);    // unsure what to do c̄ return value // @suppress(dscanner.suspicious.unused_variable)
150     }
151     /// ditto
152     this(SS)(VCFHeader *vcfhdr, string chrom, int pos, string id, string _ref, string alt, float qual, SS filter, )
153     if (isSomeString!SS || is(SS == string[]))
154     {
155         this.line = bcf_init1();
156         this.vcfheader = vcfhdr;
157         
158         this.chrom = chrom;
159         this.pos = pos;
160         this.id = id;
161 
162         this.setAlleles(_ref, alt);
163 
164         this.qual = qual;
165         this.filter = filter;
166     }
167     /// ditto
168     this(VCFHeader *vcfhdr, string line, int MAX_UNPACK = BCF_UN_ALL)
169     {
170         this.vcfheader = vcfhdr;
171 
172         kstring_t kline;
173         auto dupline = line.dup; // slower, but safer than a cast // @suppress(dscanner.suspicious.unmodified)
174 
175         kline.l = dupline.length;
176         kline.m = dupline.length;
177         kline.s = dupline.ptr;
178 
179         this.line = bcf_init1();
180         this.line.max_unpack = MAX_UNPACK;
181 
182         auto ret = vcf_parse(&kline, this.vcfheader.hdr, this.line);
183         if (ret < 0) {
184             hts_log_error(__FUNCTION__, "vcf_parse returned < 0 -- code error or malformed VCF line");
185         } else {
186             ret = bcf_unpack(this.line, MAX_UNPACK);    // unsure what to do c̄ return value
187         }
188     }
189 
190     /// disable copying to prevent double-free (which should not come up except when writeln'ing)
191     //@disable this(this);  // commented out as class has no postblit ctor (relevant when VCFRecord was struct)
192     /// dtor
193     ~this()
194     {
195         if (this.line) bcf_destroy1(this.line);
196     }
197 
198     //////// FIXED FIELDS ////////
199     
200     /* CHROM */
201     /// Get chromosome (CHROM)
202     @property
203     string chrom()
204     {
205         if (!this.line.unpacked)
206             bcf_unpack(this.line, BCF_UN_STR);
207         
208         return fromStringz(bcf_hdr_id2name(this.vcfheader.hdr, this.line.rid)).idup;
209     }
210     /// Set chromosome (CHROM)
211     @property
212     void chrom(const(char)[] c)
213     {
214         auto rid = bcf_hdr_name2id(this.vcfheader.hdr, toStringz(c));
215         if (rid == -1) {
216             hts_log_error(__FUNCTION__, format("contig not found: %s", c));
217             throw new Exception("contig not found");
218         }
219         else line.rid = rid;
220     }
221 
222 
223     /* POS */
224     /// Get position (POS)
225     // NB: internally BCF is uzing 0 based coordinates; we only show +1 when printing a VCF line with toString (which calls vcf_format)
226     @property
227     int pos()
228     out(coord) { assert(coord >= 0); }
229     do
230     {
231         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
232         return this.line.pos;
233     }
234     /// Set position (POS)
235     @property
236     void pos(int p)
237     in { assert(p >= 0); }
238     do
239     {
240         // TODO: should we check for pos >= 0 && pos < contig length? Could really hamper performance.
241         // TODO: if writing out the file with invalid POS values crashes htslib, will consider it
242         this.line.pos = p;
243     }
244 
245 
246     /* ID */
247     /// Get ID string
248     @property string id()
249     {
250         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
251         return fromStringz(this.line.d.id).idup;
252     }
253     /// Sets new ID string; comma-separated list allowed but no dup checking performed
254     @property int id(const(char)[] id)
255     {
256         // bcf_update_id expects null pointer instead of empty string to mean "no value"
257         if (id == "") return bcf_update_id(this.vcfheader.hdr, this.line, null);
258         else return bcf_update_id(this.vcfheader.hdr, this.line, toStringz(id));
259     }
260     /// Append an ID (htslib performs duplicate checking)
261     int addID(const(char)[] id)
262     {
263         if(id == "") return 0;
264         return bcf_add_id(this.vcfheader.hdr, this.line, toStringz(id));
265     }
266 
267 
268     /* Alleles (REF, ALT)
269     
270         Internally, alleles are stored as a \0 separated array:
271         [C \0 T \0 T T \0.. \0]
272          ^    ^    ^ ^
273          |    |    L L ALT_1 = "TT"
274          |    L ALT_0 
275          L REF
276 
277         TODO: Getters and setters poorly or inconsistently (?) named at this time or there are too many overloads?
278         TODO: need non-overwriting setter for ref and alt alleles
279         TODO: some of these may be inefficent; since they may be used in hot inner loops, pls optimize
280     */
281     /// REF allele length
282     @property int refLen()
283     {
284         version(DigitalMars) pragma(inline);
285         version(LDC) pragma(inline, true);
286         version(GNU) pragma(inline, true);
287         return this.line.rlen;
288     }
289     /// All alleles getter (array)
290     @property string[] allelesAsArray()
291     {
292         string[] ret;
293         ret.length = this.line.n_allele;        // n=0, no reference; n=1, ref but no alt
294         foreach(int i; 0 .. this.line.n_allele) // ref allele is index 0
295         {
296             ret[i] = fromStringz(this.line.d.allele[i]).idup;
297         }
298         return ret;
299     }
300     /// Reference allele getter
301     @property string refAllele()
302     {
303         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
304         if (this.line.n_allele < 1) return ""; // a valid record could have no ref (or alt) alleles
305         else return fromStringz(this.line.d.als).idup;
306     }
307     // NB WIP: there could be zero, or multiple alt alleles
308     /+@property string altAlleles()
309     {
310         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
311         return fromStringz(this.line.d.als)
312     }+/
313     /// Alternate alleles getter version 1: ["A", "ACTG", ...]
314     @property string[] altAllelesAsArray()
315     {
316         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
317 
318         string[] ret;
319         if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt
320         return this.allelesAsArray[1 .. $];     // trim off REF
321     }
322     /// Alternate alleles getter version 2: "A,ACTG,..."
323     @property string altAllelesAsString()
324     {
325         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
326 
327         string ret;
328         if (this.line.n_allele < 2) return ret; // n=0, no reference; n=1, ref but no alt
329 
330         char[] tmp;
331         tmp.length = this.line.d.m_allele;    // max allocated size in htslib
332 
333         char *a = this.line.d.allele[1];    // ref allele is index 0
334         const char *last = this.line.d.allele[this.line.n_allele - 1];    // pointer to start of last allele
335         size_t i = 0;
336 
337         assert( this.line.d.allele[1] == (this.line.d.als + this.line.rlen + 1) );  // ensue data structure is as we think (+1: past ref's term \0)
338 
339         while (i < this.line.d.m_allele)    // safety stop at max allocated size
340         {
341             if (*a) tmp[i] = *a;
342             else {  // '\0'
343                 if (a < last) tmp[i] = ',';
344                 else break;
345             }
346             i++;
347             a++;
348         }
349         tmp.length = i;
350 
351         return tmp.idup;
352     }
353     /// Set alleles; comma-separated list
354     @property void alleles(string a)
355     {
356         if (a == "") {
357             this.line.n_allele = 0;
358             if (this.line.d.m_allele) this.line.d.als[0] = '\0';    // if storage allocated, zero out the REF
359         }
360         else
361             bcf_update_alleles_str(this.vcfheader.hdr, this.line, toStringz(a));
362     }
363     /// Set alleles; array
364     @property void alleles(string[] a)
365     {
366         if (a.length == 0) {
367             this.line.n_allele = 0;
368             if (this.line.d.m_allele) this.line.d.als[0] = '\0';    // if storage allocated, zero out the REF
369         }
370         else {
371             const(char)*[64] allelesp;  // will fail if a locus has more than 63 ALT alleles :-O
372             foreach(i; 0 .. a.length ) {
373                 // In order to zero out all alleles, pass a zero-length allele to the string version,
374                 // or a zero-length array to this version. Zero length string member of nonempty allele array is an error.
375                 if (a[i].length == 0) {
376                     hts_log_error(__FUNCTION__, "Zero-length allele in nonempty allele array. Setting to '.'");
377                     allelesp[i] = ".".ptr;
378                 }
379                 else allelesp[i] = toStringz(a[i]);
380             }
381             bcf_update_alleles(this.vcfheader.hdr, this.line, &allelesp[0], cast(int)a.length);
382         }
383     }
384     /// Set REF allele only
385     /// param r is \0-term Cstring
386     /// TODO: UNTESTED
387     void setRefAllele(const(char)* r)
388     {
389         // first, get REF
390         if (!this.line.unpacked) bcf_unpack(this.line, BCF_UN_STR);
391         // a valid record could have no ref (or alt) alleles
392         if (this.line.n_allele < 2) // if none or 1 (=REF only), just add the REF we receieved
393             bcf_update_alleles(this.vcfheader.hdr, this.line, &r, 1);
394         else {
395             // length of REF allele is allele[1] - allele[0], (minus one more for \0)
396             // TODO ** TODO ** : there is a property line.refLen already
397             const auto reflen = (this.line.d.allele[1] - this.line.d.allele[0]) - 1;
398             if (strlen(r) <= reflen) {
399                 memcpy(this.line.d.allele[0], r, reflen + 1);   // +1 -> copy a trailing \0 in case original REF was longer than new 
400                 // TODO: do we need to call _sync ?
401             } else {
402                 // slower way: replace allele[0] with r, but keep rest of pointers poitning at existing allele block,
403                 // then call bcf_update_alleles; this will make complete new copy of this.line.d.allele, so forgive the casts
404                 this.line.d.allele[0] = cast(char*) r;
405                 bcf_update_alleles(this.vcfheader.hdr, this.line,
406                     cast( const(char)** ) this.line.d.allele, this.line.n_allele);
407             }
408         }
409     }
410     /// Set alleles; alt can be comma separated
411     void setAlleles(string _ref, string alt)
412     {
413         immutable string alleles = _ref ~ "," ~ alt ~ "\0";
414         bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr);
415     }
416     /// Set alleles; min. 2 alleles (ref, alt1); unlimited alts may be specified
417     void setAlleles(string _ref, string alt, ...)
418     {
419         string alleles = _ref ~ "," ~ alt;
420 
421         for (int i = 0; i < _arguments.length; i++)
422         {
423             alleles ~= "," ~ va_arg!string(_argptr);
424         }
425 
426         alleles ~= "\0";
427 
428         bcf_update_alleles_str(this.vcfheader.hdr, this.line, alleles.ptr);
429     }
430 
431 
432     /* Quality (QUAL) */
433     /// Get variant quality (QUAL)
434     @property float qual()
435     out(result) { assert(result >= 0); }
436     do
437     {
438         if (this.line.max_unpack < BCF_UN_FLT) bcf_unpack(this.line, BCF_UN_FLT);
439         return this.line.qual;
440     }
441     /// Set variant quality (QUAL)
442     @property void qual(float q)
443     in { assert(q >= 0); }
444     do { this.line.qual = q; }
445 
446 
447     /* FILTER */
448     /// Get FILTER column (nothing in htslib sadly)
449     @property string filter()
450     {
451         const(char)[] ret;
452 
453         if (this.line.max_unpack < BCF_UN_FLT) bcf_unpack(this.line, BCF_UN_FLT);
454 
455         if (this.line.d.n_flt) {
456             for(int i; i< this.line.d.n_flt; i++) {
457                 if (i) ret ~= ";";
458                 ret ~= fromStringz(this.vcfheader.hdr.id[BCF_DT_ID][ this.line.d.flt[0] ].key);
459             }
460         } else {
461             ret = ".";
462         }
463 
464         return ret.idup;
465     }
466     /// Remove all entries in FILTER
467     void removeAllFilters()
468     {
469         // always returns zero
470         bcf_update_filter(this.vcfheader.hdr, this.line, null, 0);
471     }
472     /// Set the FILTER column to f
473     @property void filter(string f)
474     {
475         this.filter([f]);
476     }
477     /// Set the FILTER column to f0,f1,f2...
478     /// TODO: determine definitiely whether "." is replaced with "PASS"
479     @property void filter(string[] fs)
480     {
481         int[] fids;
482         foreach(f; fs) {
483             if(f == "") continue;
484             const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
485             if (fid == -1) hts_log_warning(__FUNCTION__, format("filter not found in header (ignoring): %s", f) );
486             else fids ~= fid;
487         }
488         if (fids.length > 0)
489             bcf_update_filter(this.vcfheader.hdr, this.line, fids.ptr, cast(int)fids.length);
490         else
491             hts_log_warning(__FUNCTION__, "No FILTER update was performed due to empty list");
492     }
493     /// Add a filter; from htslib: 
494     /// "If flt_id is PASS, all existing filters are removed first. If other than PASS, existing PASS is removed."
495     int addFilter(string f)
496     {
497         return bcf_add_filter(this.vcfheader.hdr, this.line, 
498             bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f)));
499     }
500     /// Remove a filter by name
501     int removeFilter(string f)
502     {
503         const int fid = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, toStringz(f));
504         return removeFilter(fid);
505     }
506     /// Remove a filter by numeric id
507     int removeFilter(int fid)
508     {
509         return bcf_remove_filter(this.vcfheader.hdr, this.line, fid, 0);
510     }
511     /// Determine whether FILTER is present. log warning if filter does not exist. "PASS" and "." can be used interchangeably.
512     bool hasFilter(string filter)
513     {
514         char[] f = filter.dup ~ '\0';
515         //const auto id = bcf_hdr_id2int(this.vcfheader.hdr, BCF_DT_ID, f.ptr);
516         const auto ret = bcf_has_filter(this.vcfheader.hdr, this.line, f.ptr);
517 
518         if (ret > 0) return true;
519         else if (ret == 0) return false;
520         else {
521             hts_log_warning(__FUNCTION__, format("FILTER %s does not exist in the header", filter));
522             return false;
523         }
524     }
525 
526 
527     /* INFO */
528     /+
529         int tmpi = 1;
530         bcf_update_info_int32(this.vcfhdr.hdr, line, toStringz("NS"c), &tmpi, 1);
531     +/
532     /// Add a tag:value to the INFO column -- tag must already exist in the header
533     void addInfo(T)(string tag, T data)
534     {
535         int ret = -1;
536 
537         static if(isIntegral!T) {
538             auto integer = cast(int) data;
539             ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1);
540         }
541         
542         else static if(isFloatingPoint!T) {
543             auto flt = cast(float) data;    // simply passing "2.0" (or whatever) => data is a double
544             ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1);
545         }
546 
547         else static if(isSomeString!T)
548             ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), toStringz(data));
549         
550         else static if(isBoolean!T) {
551             immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 
552             ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set);
553         }
554         
555         if (ret == -1)
556             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
557     }
558     /// ditto
559     /// This handles a vector of values for the tag
560     void addInfo(T)(string tag, T[] data)
561     if(!is(T==immutable(char)))             // otherwise string::immutable(char)[] will match the other template
562     {
563         int ret = -1;
564 
565         static if(isIntegral!T) {
566             auto integer = cast(int) data;
567             ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, data.length);
568         }
569         
570         else static if(isFloatingPoint!T) {
571             auto flt = cast(float) data;    // simply passing "2.0" (or whatever) => data is a double
572             ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, data.length);
573         }
574         
575         if (ret == -1)
576             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
577     }
578 
579     /* FORMAT (sample info) */
580     /// bc_update_format_{int32,flat,string,flag}
581     void addFormat(T)(string tag, T data)
582     if(!isArray!T)
583     {
584         int ret = -1;
585 
586         static if(isIntegral!T) {
587             auto integer = cast(int) data;
588             ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), &integer, 1);
589         }
590         
591         else static if(isFloatingPoint!T) {
592             auto flt = cast(float) data;    // simply passing "2.0" (or whatever) => data is a double
593             ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), &flt, 1);
594         }
595 
596         else static if(isSomeString!T)
597             ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), toStringz(data));
598         
599         else static if(isBoolean!T) {
600             immutable int set = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag 
601             ret = bcf_update_format_flag(this.vcfheader.hdr, this.line, toStringz(tag), null, set);
602         }
603         
604         if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data));
605     }
606     /// auto bcf_update_format_int32(const(bcf_hdr_t) *hdr, bcf1_t *line, const(char) *key, int *values, int n) // @suppress(dscanner.style.undocumented_declaration)
607     void addFormat(T)(string tag, T[] data)
608     {
609                 int ret = -1;
610 
611         static if(isIntegral!T) {
612             auto integer = cast(int[]) data;
613             ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag),
614                                             integer.ptr, cast(int)data.length);
615         }
616         
617         else static if(isFloatingPoint!T) {
618             auto flt = cast(float[]) data;    // simply passing "2.0" (or whatever) => data is a double
619             ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag),
620                                             flt.ptr, cast(int)data.length);
621         }
622         
623         if (ret == -1) hts_log_warning(__FUNCTION__, format("Couldn't add format (ignoring): %s", data));
624 
625     }
626 
627     /// add INFO or FORMAT key:value pairs to a record
628     /// add a single datapoint OR vector of values, OR, values to each sample (if tagType == FORMAT)
629     void add(string tagType, T)(const(char)[] tag, T data)
630     if((tagType == "INFO" || tagType == "FORMAT") &&
631         (isIntegral!T       || isIntegral!(ElementType!T)   ||
632         isFloatingPoint!T   || isFloatingPoint!(ElementType!T) ||
633         isSomeString!T      || isSomeString!(ElementType!T) ||
634         isBoolean!T         || isBoolean!(ElementType!T)))
635     {
636         int ret = -1;
637         int len;
638 
639         static if (!isDynamicArray!T) {
640             len = 1;
641             static if (isIntegral!T) auto d = data.to!int;
642             else static if (isFloatingPoint!T) auto d = data.to!float;
643             else static if (isSomeString!T) auto d = toStringz(data);
644             else static if (isBoolean!T) immutable int d = data ? 1 : 0; // if data == true, pass 1 to bcf_update_info_flag(n=); n=0 => clear flag
645             else static assert(0);
646 
647             const auto ptr = &d;
648         }
649         else static if (isDynamicArray!T) {
650             assert(data.length < int.max);
651             len = cast(int) data.length;
652             static if (isIntegral!(ElementType!T)) auto d = data.to!(int[]);
653             else static if (isFloatingPoint!(ElementType!T)) auto d = data.to!(float[]);
654             else static if (isSomeString!(ElementType!T)) {
655                 char[] d;
656                 foreach(s; data) {
657                     d ~= s ~ "\0";
658                 }
659                 if(d.length == 0) d ~= "\0";    // TODO replace with asserts on data length earlier
660             }
661             else static if (isBoolean!(ElementType!T)) {
662                 int[] d;
663                 foreach(b; data) {
664                     d ~= (b ? 1 : 0);
665                 }
666             }
667 
668             const auto ptr = d.ptr;
669         }
670         else static assert(0);
671 
672         static if (tagType == "INFO") {
673             static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int))
674                 ret = bcf_update_info_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
675             else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float))
676                 ret = bcf_update_info_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
677             else static if (isSomeString!T || isSomeString!(ElementType!T))
678                 ret = bcf_update_info_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr);
679             else static if (is(T == bool) || is(ElementType!T == bool))
680                 ret = bcf_update_info_flag(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
681             else static assert(0, "Type not recognized for INFO tag");
682         }
683         else static if (tagType == "FORMAT") {
684             static if (is(Unqual!T == int) || is(Unqual!(ElementType!T) == int))
685                 ret = bcf_update_format_int32(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
686             else static if (is(Unqual!T == float) || is(Unqual!(ElementType!T) == float))
687                 ret = bcf_update_format_float(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
688             else static if (isSomeString!T || isSomeString!(ElementType!T))
689                 ret = bcf_update_format_string(this.vcfheader.hdr, this.line, toStringz(tag), ptr, len);
690             else static assert(0, "Type not recognized for FORMAT tag");
691         }
692         else static assert(0);
693 
694         if (ret == -1)
695             hts_log_warning(__FUNCTION__, format("Couldn't add tag (ignoring): %s with value %s", tag, data));
696     }
697 
698     override string toString() const
699     {
700         kstring_t s;
701 
702         const int ret = vcf_format(this.vcfheader.hdr, this.line, &s);
703         if (ret)
704         {
705             hts_log_error(__FUNCTION__,
706                 format("vcf_format returned nonzero (%d) (likely EINVAL, invalid bcf1_t struct?)", ret));
707             return "[VCFRecord vcf_format parse_error]";
708         }
709 
710         return cast(string) s.s[0 .. s.l];
711     }
712 }
713 
714 /** Basic support for writing VCF, BCF files */
715 struct VCFWriter
716 {
717     // htslib data structures
718     vcfFile     *fp;    /// htsFile
719     //bcf_hdr_t   *hdr;   /// header
720     VCFHeader   *vcfhdr;    /// header wrapper -- no copies
721     bcf1_t*[]    rows;   /// individual records
722 
723     @disable this();
724     /// open file or network resources for writing
725     /// setup incl header allocation
726     /// if mode==w:
727     ///     bcf_hdr_init automatically sets version string (##fileformat=VCFv4.2)
728     ///     bcf_hdr_init automatically adds the PASS filter
729     this(string fn)
730     {
731         if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor");
732         this.fp = vcf_open(toStringz(fn), toStringz("w"c));
733         if (!this.fp) throw new Exception("Could not hts_open file");
734 
735         this.vcfhdr = new VCFHeader( bcf_hdr_init("w\0"c.ptr));
736         addFiledate();
737         bcf_hdr_sync(this.vcfhdr.hdr);
738 
739         /+
740         hts_log_trace(__FUNCTION__, "Displaying header at construction");
741         //     int bcf_hdr_format(const(bcf_hdr_t) *hdr, int is_bcf, kstring_t *str);
742         kstring_t ks;
743         bcf_hdr_format(this.vcfhdr.hdr, 0, &ks);
744         char[] hdr;
745         hdr.length = ks.l;
746         import core.stdc.string: memcpy;
747         memcpy(hdr.ptr, ks.s, ks.l);
748         import std.stdio: writeln;
749         writeln(hdr);
750         +/
751     }
752     /// setup and copy a header from another BCF/VCF as template
753     this(T)(string fn, T other)
754     if(is(T == VCFHeader*) || is(T == bcf_hdr_t*))
755     {
756         if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor");
757         this.fp = vcf_open(toStringz(fn), toStringz("w"c));
758         if (!this.fp) throw new Exception("Could not hts_open file");
759 
760         static if(is(T == VCFHeader*)) { this.vcfhdr      = new VCFHeader( bcf_hdr_dup(other.hdr) ); }
761         else static if(is(T == bcf_hdr_t*)) { this.vcfhdr = new VCFHeader( bcf_hdr_dup(other) ); }
762     }
763     /// dtor
764     ~this()
765     {
766         const ret = vcf_close(this.fp);
767         if (ret != 0) hts_log_error(__FUNCTION__, "couldn't close VCF after writing");
768 
769         // Deallocate header
770         //bcf_hdr_destroy(this.hdr);
771         // 2018-09-15: Do not deallocate header; will be free'd by VCFHeader dtor
772     }
773     invariant
774     {
775         assert(this.vcfhdr != null);
776     }
777 
778     VCFHeader* getHeader()
779     {
780         return this.vcfhdr;
781     }
782 
783     // TODO
784     /// copy header lines from a template without overwiting existing lines
785     void copyHeaderLines(bcf_hdr_t *other)
786     {
787         assert(this.vcfhdr != null);
788         assert(0);
789         //    bcf_hdr_t *bcf_hdr_merge(bcf_hdr_t *dst, const(bcf_hdr_t) *src);
790     }
791 
792     /// Add sample to this VCF
793     /// * int bcf_hdr_add_sample(bcf_hdr_t *hdr, const(char) *sample);
794     int addSample(string name)
795     in { assert(name != ""); }
796     do
797     {
798         assert(this.vcfhdr != null);
799 
800         bcf_hdr_add_sample(this.vcfhdr.hdr, toStringz(name));
801 
802         // AARRRRGGGHHHH
803         // https://github.com/samtools/htslib/issues/767
804         bcf_hdr_sync(this.vcfhdr.hdr);
805 
806         return 0;
807     }
808 
809     /// Add a new header line
810     int addHeaderLineKV(string key, string value)
811     {
812         // TODO check that key is not INFO, FILTER, FORMAT (or contig?)
813         string line = format("##%s=%s", key, value);
814 
815         return bcf_hdr_append(this.vcfhdr.hdr, toStringz(line));
816     }
817     /// Add a new header line -- must be formatted ##key=value
818     int addHeaderLineRaw(string line)
819     {
820         assert(this.vcfhdr != null);
821         //    int bcf_hdr_append(bcf_hdr_t *h, const(char) *line);
822         const auto ret = bcf_hdr_append(this.vcfhdr.hdr, toStringz(line));
823         bcf_hdr_sync(this.vcfhdr.hdr);
824         return ret;
825     }
826     /// Add a filedate= headerline, which is not called out specifically in  the spec,
827     /// but appears in the spec's example files. We could consider allowing a param here.
828     int addFiledate()
829     {
830         return addHeaderLineKV("filedate", (cast(Date) Clock.currTime()).toISOString );
831     }
832     
833     /** Add INFO (§1.2.2) or FORMAT (§1.2.4) tag
834 
835     The INFO tag describes row-specific keys used in the INFO column;
836     The FORMAT tag describes sample-specific keys used in the last, and optional, genotype column.
837 
838     Template parameter: string; must be INFO or FORMAT
839 
840     The first four parameters are required; NUMBER and TYPE have specific allowable values.
841     source and version are optional, but recommended (for INFO only).
842 
843     *   id:     ID tag
844     *   number: NUMBER tag; here a string because it can also take special values {A,R,G,.} (see §1.2.2)
845     *   type:   Integer, Float, Flag, Character, and String
846     *   description: Text description; will be double quoted
847     *   source:      Annotation source  (eg dbSNP)
848     *   version:     Annotation version (eg 142)
849     */
850     void addTag(string tagType, T)( string id,
851                                     T number,
852                                     string type,
853                                     string description,
854                                     string source="",
855                                     string _version="")
856     if((tagType == "INFO" || tagType == "FORMAT") && (isIntegral!T || isSomeString!T))
857     {
858         string line;    //  we'll suffix with \0, don't worry
859 
860         // check ID
861         if (id == "") {
862             hts_log_error(__FUNCTION__, "no ID");
863             return;
864         }
865 
866         // check Number is either a special code {A,R,G,.} or an integer
867         static if(isSomeString!T) {
868         if (number != "A" &&
869             number != "R" &&
870             number != "G" &&
871             number != ".") {
872                 // not a special ; check if integer
873                 try {
874                     number.to!int;  // don't need to store result, will use format/%s
875                 }
876                 catch (ConvException e) {
877                     hts_log_error(__FUNCTION__, "Number not A/R/G/. nor an integer");
878                     return;
879                 }
880         }
881         }
882 
883         // check Type
884         if (type != "Integer" &&
885             type != "Float" &&
886             type != "Flag" &&
887             type != "Character" &&
888             type != "String") {
889                 hts_log_error(__FUNCTION__, "unrecognized type");
890                 return;
891         }
892 
893         // check Description
894         if (description == "") hts_log_error(__FUNCTION__, "no description");
895 
896         // check Source and Version
897         if (source == "" && _version != "") hts_log_error(__FUNCTION__, "version wo source");
898 
899         // Format params
900         if (source != "" && _version != "")
901             line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\",Source=\"%s\",Version=\"%s\">\0",
902                             tagType, id, number, type, description, source, _version);
903         else if (source != "" && _version == "")
904             line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\",Source=\"%s\">\0",
905                             tagType, id, number, type, description, source);
906         else
907             line = format("##%s=<ID=%s,Number=%s,Type=%s,Description=\"%s\">\0",
908                             tagType, id, number, type, description);
909 
910         bcf_hdr_append(this.vcfhdr.hdr, line.ptr);
911     }
912 
913     /** Add FILTER tag (§1.2.3) */
914     void addTag(string tagType)(string id, string description)
915     if(tagType == "FILTER")
916     {
917         // check ID
918         if (id == "") {
919             hts_log_error(__FUNCTION__, "no ID");
920             return;
921         }
922         // check Description
923         if (description == "") hts_log_error(__FUNCTION__, "no description");
924 
925         string line = format("##FILTER=<ID=%s,Description=\"%s\">\0", id, description);
926         bcf_hdr_append(this.vcfhdr.hdr, line.ptr);
927     }
928 
929     /** Add FILTER tag (§1.2.3) */
930     deprecated void addFilterTag(string id, string description)
931     {
932         string filter = format("##FILTER=<ID=%s,Description=\"%s\">\0", id, description);
933         bcf_hdr_append(this.vcfhdr.hdr, filter.ptr);
934     }
935 
936     /** Add contig definition (§1.2.7) to header meta-info 
937     
938         other: "url=...,md5=...,etc."
939     */
940     auto addTag(string tagType)(const(char)[] id, const int length = 0, string other = "")
941     if(tagType == "contig" || tagType == "CONTIG")
942     {
943         const(char)[] contig = "##contig=<ID=" ~ id ~
944             (length > 0  ? ",length=" ~ length.to!string : "") ~
945             (other != "" ? "," ~ other : "") ~
946             ">\0";
947         
948         return bcf_hdr_append(this.vcfhdr.hdr, contig.ptr);
949     }
950     
951     /**
952         Add a record
953 
954         alleles:    comma-separated string, including ref allele
955         qual:       a float, but accepts any numeric
956     */
957     int addRecord(S, N)(S contig, int pos, S id, S alleles, N qual, S[] filters)
958     if ( (isSomeString!S || is(S == char*) ) && isNumeric!N)
959     {        
960         bcf1_t *line = new bcf1_t;
961 
962         line.rid = bcf_hdr_name2id(this.vcfhdr.hdr, toStringz(contig));
963         if (line.rid == -1) hts_log_error(__FUNCTION__, "contig not found");
964 
965         line.pos = pos;
966 
967         bcf_update_id(this.vcfhdr.hdr, line, (id == "" ? null : toStringz(id)) );  // TODO: could support >1 id with array as with the filters
968 
969         bcf_update_alleles_str(this.vcfhdr.hdr, line, toStringz(alleles));
970 
971         line.qual = qual;
972 
973         // Update filter(s); if/else blocks for speed
974         if(filters.length == 0)
975         {
976             int pass = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz("PASS"c));
977             bcf_update_filter(this.vcfhdr.hdr, line, &pass, 1);
978         }
979         else if(filters.length == 1)
980         {
981             int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(filters[0]));
982             if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", filters[0]) );
983             bcf_update_filter(this.vcfhdr.hdr, line, &fid, 1);
984         }
985         else    // TODO: factor out the check for -1 into a safe_update_filter or something
986         {
987             int[] filter_ids;
988             foreach(f; filters) {
989                 const int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(f));
990                 if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", f) );
991                 else filter_ids ~= fid;
992             }
993             bcf_update_filter(this.vcfhdr.hdr, line, filter_ids.ptr, cast(int)filter_ids.length );
994         }
995 
996         // Add a record
997         /+
998         // .. FORMAT
999         bcf_hdr_append(hdr, "##FORMAT=<ID=TF,Number=1,Type=Float,Description=\"Test Float\">");
1000         float[4] test;
1001         bcf_float_set_missing(test[0]);
1002         test[1] = 47.11f;
1003         bcf_float_set_vector_end(test[2]);
1004         writeln("pre update format float");
1005         bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("TF"), &test[0], 4);
1006         +/
1007         int tmpi = 1;
1008         bcf_update_info_int32(this.vcfhdr.hdr, line, toStringz("NS"c), &tmpi, 1);
1009 
1010         // Add the actual sample
1011         int[4] dp = [ 9000, 1, 2, 3];
1012         bcf_update_format(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1, BCF_HT_INT);
1013         //int dp = 9000;
1014         //bcf_update_format_int32(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1);
1015         //auto f = new float;
1016         //*f = 1.0;
1017         //bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("XF"c), f, 1);
1018 
1019         this.rows ~= line;
1020 
1021         return 0;
1022     }
1023 
1024     /// as expected
1025     int writeHeader()
1026     {
1027         return bcf_hdr_write(this.fp, this.vcfhdr.hdr);
1028     }
1029     /// as expected
1030     int writeRecord(ref VCFRecord r)
1031     {
1032         const ret = bcf_write(this.fp, this.vcfhdr.hdr, r.line);
1033         if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error");
1034         return ret;
1035     }
1036     /// as expected
1037     int writeRecord(bcf_hdr_t *hdr, bcf1_t *rec)
1038     {
1039         hts_log_warning(__FUNCTION__, "pre call");
1040         const ret = bcf_write(this.fp, hdr, rec);
1041         hts_log_warning(__FUNCTION__, "post call");
1042         if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error");
1043         return ret;
1044     }
1045 }
1046 
1047 /** Basic support for reading VCF, BCF files */
1048 struct VCFReader
1049 {
1050     // htslib data structures
1051     vcfFile     *fp;    /// htsFile
1052     //bcf_hdr_t   *hdr;   /// header
1053     VCFHeader   *vcfhdr;    /// header wrapper -- no copies
1054     bcf1_t* b;          /// record for use in iterator, will be recycled
1055 
1056     int MAX_UNPACK;     /// see dhtslib.htslib.vcf
1057 
1058     private static int refct;
1059 
1060     this(this)
1061     {
1062         this.refct++;
1063     }
1064     @disable this();
1065     /// read existing VCF file
1066     /// MAX_UNPACK: setting alternate value could speed reading
1067     this(string fn, int MAX_UNPACK = BCF_UN_ALL)
1068     {
1069         import dhtslib.htslib.hts : hts_set_threads;
1070 
1071         if (fn == "") throw new Exception("Empty filename passed to VCFReader constructor");
1072         this.fp = vcf_open(toStringz(fn), "r"c.ptr);
1073         if (!this.fp) throw new Exception("Could not hts_open file");
1074         
1075         hts_set_threads(this.fp, 1);    // extra decoding thread
1076 
1077         this.vcfhdr = new VCFHeader( bcf_hdr_read(this.fp));
1078 
1079         this.b = bcf_init1();
1080         this.b.max_unpack = MAX_UNPACK;
1081         this.MAX_UNPACK = MAX_UNPACK;
1082     }
1083     /// dtor
1084     ~this()
1085     {
1086         this.refct--;
1087 
1088         // block file close and bcf1_t free() with reference counting
1089         // to allow VCFReader to implement Range interface
1090         if(!this.refct) {
1091             const ret = vcf_close(this.fp);
1092             if (ret != 0) hts_log_error(__FUNCTION__, "couldn't close VCF after reading");
1093 
1094             // Deallocate header
1095             //bcf_hdr_destroy(this.hdr);
1096             // 2018-09-15: Do not deallocate header; will be free'd by VCFHeader dtor
1097 
1098             bcf_destroy1(this.b);
1099         }
1100     }
1101     invariant
1102     {
1103         assert(this.vcfhdr != null);
1104     }
1105 
1106     VCFHeader* getHeader()
1107     {
1108         return this.vcfhdr;
1109     }
1110     
1111     /** VCF version, e.g. VCFv4.2 */
1112     @property string vcfVersion() { return cast(string) fromStringz( bcf_hdr_get_version(this.vcfhdr.hdr) ).idup; }
1113 
1114     /++
1115         bcf_hrec_t *bcf_hdr_get_hrec(const(bcf_hdr_t) *hdr, int type, const(char) *key, const(char) *value,
1116                                                                                     const(char) *str_class);
1117     +/
1118     // TODO: check key for "", fail if empty
1119     // TODO: check value, str_class for "" and replace with NULL
1120     // TODO: Memory leak. We are never freeing the bcf_hrec_t, but, it escapes as pointer inside key/value string
1121     // TODO: handle structured and general lines
1122     string[string] getTagByKV(string tagType, T)(string key, string value, string str_class)
1123     if((tagType == "FILTER" || tagType == "INFO" || tagType == "FORMAT" && tagType == "contig") &&
1124         (isIntegral!T || isSomeString!T))
1125     {
1126         // hlt : header line type
1127         static if (tagType == "FILTER")     const int hlt = BCF_HL_FLT;
1128         else static if (tagType == "INFO")  const int hlt = BCF_HL_INFO; // @suppress(dscanner.suspicious.label_var_same_name)
1129         else static if (tagType == "FORMAT") const int hlt= BCF_HL_FMT; // @suppress(dscanner.suspicious.label_var_same_name)
1130         else static if (tagType == "contig") const int hlt= BCF_HL_CTG; // @suppress(dscanner.suspicious.label_var_same_name)
1131         else assert(0);
1132 
1133         bcf_hrec_t *hrec = bcf_hdr_get_hrec(this.vcfhdr.hdr, hlt,   toStringz(key),
1134                                                                     toStringz(value),
1135                                                                     toStringz(str_class));
1136 
1137         const int nkeys = hrec.nkeys;
1138         string[string] kv;
1139 
1140         foreach(int i; 0 .. nkeys) {
1141             string k = cast(string) fromStringz(hrec.keys[i]);
1142             string v = cast(string) fromStringz(hrec.vals[i]); 
1143             kv[k] = v;
1144         }
1145 
1146         return kv;
1147     }
1148 
1149     /// InputRange interface: iterate over all records
1150     @property bool empty()
1151     {
1152         //     int bcf_read(htsFile *fp, const(bcf_hdr_t) *h, bcf1_t *v);
1153         // documentation claims returns -1 on critical errors, 0 otherwise
1154         // however it looks like -1 is EOF and -2 is critical errors?
1155         immutable success = bcf_read(this.fp, this.vcfhdr.hdr, this.b);
1156         if (success >= 0) return false;
1157         else if (success == -1) {
1158             // EOF
1159             // see htslib my comments https://github.com/samtools/htslib/issues/246
1160             // and commit 9845bc9a947350d0f34e6ce69e79ab81b6339bd2
1161             return true;
1162         }
1163         else {
1164             hts_log_error(__FUNCTION__, "*** CRITICAL ERROR bcf_read < -1");
1165             // TODO: check b->errcode
1166             return true;
1167         }
1168     }
1169     /// ditto
1170     void popFront()
1171     {
1172         // noop? 
1173         // free this.b ?
1174         //bam_destroy1(this.b);
1175 
1176         // TODO: clear (not destroy) the bcf1_t for reuse?
1177     }
1178     /// ditto
1179     VCFRecord front()
1180     {
1181         // note that VCFRecord's constructor will be responsible for
1182         // * unpacking and
1183         // * destroying
1184         // its copy
1185         return new VCFRecord(this.vcfhdr, bcf_dup(this.b), this.MAX_UNPACK);
1186     }
1187 }
1188 
1189 ///
1190 debug(dhtslib_unittest)
1191 unittest
1192 {
1193     import std.exception: assertThrown;
1194     import std.stdio: writeln, writefln;
1195 
1196     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
1197 
1198 
1199     auto vw = VCFWriter("/dev/null");
1200 
1201     vw.addHeaderLineRaw("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">");
1202     vw.addHeaderLineKV("INFO", "<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">");
1203     // ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
1204     vw.addTag!"INFO"("AF", "A", "Integer", "Number of Samples With Data");
1205     vw.addHeaderLineRaw("##contig=<ID=20,length=62435964,assembly=B36,md5=f126cdf8a6e0c7f379d618ff66beb2da,species=\"Homo sapiens\",taxonomy=x>"); // @suppress(dscanner.style.long_line)
1206     vw.addHeaderLineRaw("##FILTER=<ID=q10,Description=\"Quality below 10\">");
1207 
1208     // Exercise header
1209     assert(vw.vcfhdr.nsamples == 0);
1210     vw.addSample("NA12878");
1211     assert(vw.vcfhdr.nsamples == 1);
1212 
1213     auto r = new VCFRecord(vw.vcfhdr, bcf_init1());
1214     
1215     r.chrom = "20";
1216     assert(r.chrom == "20");
1217     assertThrown(r.chrom = "chr20");   // headerline chromosome is "20" not "chr20"
1218 
1219     r.pos = 999_999;
1220     assert(r.pos == 999_999);
1221     r.pos = 62_435_964 + 1;     // Exceeds contig length
1222 
1223     // Test ID field
1224     // note that in an empty freshly initialized bcf1_t, the ID field is "" rather than "."
1225     // Will consider adding initialization code
1226     //writefln("ID: %s", r.id);
1227     r.id = "";
1228     assert(r.id == ".");
1229     r.id = ".";
1230     assert(r.id == ".");
1231     r.id = "rs001";
1232     assert(r.id == "rs001");
1233     r.addID("rs999");
1234     assert(r.id == "rs001;rs999");
1235     r.addID("rs001");   // test duplicate checking
1236     assert(r.id == "rs001;rs999");
1237 
1238     // Test REF/ALT allele setters and getters
1239     // many overloads to test for good coverage
1240     // Test matrix: Set {zero, one, two, three} alleles * {set by string, set by array} = 8 test cases
1241     //  * also, there is setAlleles(ref, alt) and setAlleles(ref, alt1, ...) 
1242     //  * TODO: will also need to retreive alt alleles as array and string
1243 
1244     string[] alleles_array;
1245 
1246     // Zero alleles
1247     r.alleles("");
1248     const auto zs = r.allelesAsArray;
1249     const auto zsr = r.refAllele;
1250     assert(zs.length == 0);
1251     assert(zsr == "");
1252 
1253     r.alleles(alleles_array);
1254     const auto za = r.allelesAsArray;
1255     const auto zar = r.refAllele;
1256     assert(za.length == 0);
1257     assert(zar == "");
1258     
1259     // One allele
1260     r.alleles("C");
1261     const auto os = r.allelesAsArray;
1262     const auto osr = r.refAllele;
1263     assert(os.length == 1 && os[0] == "C");
1264     assert(osr == "C");
1265 
1266     r.alleles(["C"]);
1267     const auto oa = r.allelesAsArray;
1268     const auto oar = r.refAllele;
1269     assert(oa.length == 1 && oa[0] == "C");
1270     assert(oar == "C");
1271 
1272     // Two alleles
1273     r.alleles("C,T");
1274     const auto ts = r.allelesAsArray;
1275     const auto tsr= r.refAllele;
1276     assert(ts == ["C", "T"]);
1277     assert(tsr== "C");
1278 
1279     r.alleles(["C", "T"]);
1280     const auto ta = r.allelesAsArray;
1281     const auto tar= r.refAllele;
1282     assert(ta == ["C", "T"]);
1283     assert(tar== "C");
1284 
1285     const taaa = r.altAllelesAsArray;
1286     const taas = r.altAllelesAsString;
1287     assert(taaa == ["T"]);
1288     assert(taas == "T");
1289 
1290     // alternate setter for >= 2 alleles
1291     r.setAlleles("A", "G");
1292     assert(r.allelesAsArray == ["A", "G"]);
1293 
1294     // Three alleles
1295     r.alleles("A,C,T");
1296     assert(r.allelesAsArray == ["A", "C", "T"]);
1297     assert(r.refAllele == "A");
1298     assert(r.altAllelesAsString == "C,T");
1299 
1300     // alternate the alleles for testing purposes
1301     r.alleles(["G", "A", "C"]);
1302     assert(r.allelesAsArray == ["G", "A", "C"]);
1303     assert(r.refAllele == "G");
1304     assert(r.altAllelesAsString == "A,C");
1305 
1306     r.setAlleles("A", "C", "T");
1307     assert(r.allelesAsArray == ["A", "C", "T"]);
1308     assert(r.refAllele == "A");
1309     assert(r.altAllelesAsString == "C,T");
1310 
1311 
1312     // Test QUAL
1313     r.qual = 1.0;
1314     assert(r.qual == 1.0);
1315     // now test setting qual without unpacking
1316     // TODO: see https://forum.dlang.org/post/hebouvswxlslqhovzaia@forum.dlang.org, once resolved (or once factory function written),
1317     //  add template value param BCF_UN_STR
1318     auto rr = new VCFRecord(vw.vcfhdr, "20\t17330\t.\tT\tA\t3\t.\tNS=3;DP=11;AF=0.017\n"); // @suppress(dscanner.style.long_line)
1319     rr.qual = 3.0;
1320     assert(rr.qual == 3.0);
1321 
1322 
1323     // Test FILTER
1324     assert(r.hasFilter("PASS"));
1325     assert(r.hasFilter("."));
1326     // add q10 filter (is in header)
1327     r.filter = "q10";
1328     assert(r.filter == "q10");
1329     assert(!r.hasFilter("PASS"));   // if has another filter, no longer PASS (unless added explicitly)
1330     assert(!r.hasFilter("."));      // if has another filter, no longer has . (which is PASS, or no filter [spec conflict?])
1331 
1332     // add q30 filteR (not in header)
1333     r.filter = "q30";
1334     assert(r.filter == "q10");  // i.e., unchanged
1335 
1336     assert(r.hasFilter("q10"));
1337     assert(!r.hasFilter("q30"));
1338 
1339     r.removeAllFilters();
1340     assert(r.hasFilter("."));
1341     r.addFilter("q10");
1342     assert(r.hasFilter("q10"));
1343     r.removeFilter("q10");
1344     assert(r.hasFilter("PASS"));
1345 
1346 
1347     // Finally, print the records:
1348     writefln("VCF records via toString:\n%s%s", r, rr);
1349 
1350     writeln("\ndhtslib.vcf: all tests passed\n");
1351 }