1 module dhtslib.vcf.writer;
2 
3 import std.datetime;
4 import std.string: fromStringz, toStringz;
5 import std.traits: isArray, isDynamicArray, isBoolean, isIntegral, isFloatingPoint, isNumeric, isSomeString;
6 import std.conv: to, ConvException;
7 import std.format: format;
8 
9 import dhtslib.memory;
10 import dhtslib.vcf;
11 import htslib.vcf;
12 import htslib.hts_log;
13 
14 alias BCFWriter = VCFWriter;
15 
16 /** Basic support for writing VCF, BCF files */
17 struct VCFWriter
18 {
19     // htslib data structures
20     VcfFile     fp;    /// rc htsFile wrapper
21     VCFHeader   vcfhdr;    /// header wrapper
22     Bcf1[]    rows;   /// individual records
23 
24     @disable this();
25     /// open file or network resources for writing
26     /// setup incl header allocation
27     /// if mode==w:
28     ///     bcf_hdr_init automatically sets version string (##fileformat=VCFv4.2)
29     ///     bcf_hdr_init automatically adds the PASS filter
30     this(string fn)
31     {
32         if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor");
33         this.fp = VcfFile(vcf_open(toStringz(fn), toStringz("w"c)));
34         if (!this.fp) throw new Exception("Could not hts_open file");
35 
36         this.vcfhdr = VCFHeader( bcf_hdr_init("w\0"c.ptr));
37         addFiledate();
38         bcf_hdr_sync(this.vcfhdr.hdr);
39 
40         /+
41         hts_log_trace(__FUNCTION__, "Displaying header at construction");
42         //     int bcf_hdr_format(const(bcf_hdr_t) *hdr, int is_bcf, kstring_t *str);
43         kstring_t ks;
44         bcf_hdr_format(this.vcfhdr.hdr, 0, &ks);
45         char[] hdr;
46         hdr.length = ks.l;
47         import core.stdc.string: memcpy;
48         memcpy(hdr.ptr, ks.s, ks.l);
49         import std.stdio: writeln;
50         writeln(hdr);
51         +/
52     }
53     /// setup and copy a header from another BCF/VCF as template
54     this(T)(string fn, T other)
55     if(is(T == VCFHeader) || is(T == bcf_hdr_t*))
56     {
57         if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor");
58         this.fp = vcf_open(toStringz(fn), toStringz("w"c));
59         if (!this.fp) throw new Exception("Could not hts_open file");
60 
61         static if(is(T == VCFHeader*)) { this.vcfhdr      = VCFHeader( bcf_hdr_dup(other.hdr) ); }
62         else static if(is(T == bcf_hdr_t*)) { this.vcfhdr = VCFHeader( bcf_hdr_dup(other) ); }
63     }
64 
65     invariant
66     {
67         assert(this.vcfhdr.hdr != null);
68     }
69 
70     VCFHeader getHeader()
71     {
72         return this.vcfhdr;
73     }
74 
75     /// Add sample to this VCF
76     /// * int bcf_hdr_add_sample(bcf_hdr_t *hdr, const(char) *sample);
77     deprecated("use VCFHeader methods instead")
78     int addSample(string name)
79     in { assert(name != ""); }
80     do
81     {
82         return this.vcfhdr.addSample(name);
83     }
84 
85     deprecated("use VCFHeader methods instead")
86     /// Add a new header line
87     int addHeaderLineKV(string key, string value)
88     {
89         return this.vcfhdr.addHeaderLineKV(key, value);
90     }
91 
92     deprecated("use VCFHeader methods instead")
93     /// Add a new header line -- must be formatted ##key=value
94     int addHeaderLineRaw(string line)
95     {
96         return this.vcfhdr.addHeaderLineRaw(line);
97     }
98 
99     deprecated("use VCFHeader methods instead")
100     /// Add a filedate= headerline, which is not called out specifically in  the spec,
101     /// but appears in the spec's example files. We could consider allowing a param here.
102     int addFiledate()
103     {
104         return this.vcfhdr.addFiledate;
105     }
106     
107     /** Add INFO (§1.2.2) or FORMAT (§1.2.4) tag
108 
109     The INFO tag describes row-specific keys used in the INFO column;
110     The FORMAT tag describes sample-specific keys used in the last, and optional, genotype column.
111 
112     Template parameter: string; must be INFO or FORMAT
113 
114     The first four parameters are required; NUMBER and TYPE have specific allowable values.
115     source and version are optional, but recommended (for INFO only).
116 
117     *   id:     ID tag
118     *   number: NUMBER tag; here a string because it can also take special values {A,R,G,.} (see §1.2.2)
119     *   type:   Integer, Float, Flag, Character, and String
120     *   description: Text description; will be double quoted
121     *   source:      Annotation source  (eg dbSNP)
122     *   version:     Annotation version (eg 142)
123     */
124     deprecated("use VCFHeader methods instead")
125     void addTag(HeaderRecordType tagType)( string id,
126                                     HeaderLengths number,
127                                     HeaderTypes type,
128                                     string description,
129                                     string source="",
130                                     string _version="")
131     if(tagType == HeaderRecordType.Info || tagType == HeaderRecordType.Format)
132     {
133         this.vcfhdr.addHeaderLine!(tagType)(id, number, type, description, source, _version);
134     }
135 
136     /** Add FILTER tag (§1.2.3) */
137     deprecated("use VCFHeader methods instead")
138     void addTag(HeaderRecordType tagType)(string id, string description)
139     if(tagType == HeaderRecordType.Filter)
140     {
141         this.vcfhdr.addHeaderLine!(tagType)(id, description);
142     }
143 
144     /** Add FILTER tag (§1.2.3) */
145     deprecated("use VCFHeader methods instead")
146     void addFilterTag(string id, string description)
147     {
148         this.vcfhdr.addFilter(id, description);
149     }
150 
151     /** Add contig definition (§1.2.7) to header meta-info 
152     
153         other: "url=...,md5=...,etc."
154     */
155     deprecated("use VCFHeader methods instead")
156     auto addTag(HeaderRecordType tagType)(const(char)[] id, const int length = 0, string other = "")
157     if(tagType == HeaderRecordType.Contig)
158     {
159         return this.vcfhdr.addTag!(tagType)(id, length, other);
160     }
161     
162     /**
163         Add a record
164 
165         alleles:    comma-separated string, including ref allele
166         qual:       a float, but accepts any numeric
167     */
168     int addRecord(S, N)(S contig, int pos, S id, S alleles, N qual, S[] filters)
169     if ( (isSomeString!S || is(S == char*) ) && isNumeric!N)
170     {        
171         Bcf1 line = Bcf1(bcf_init1());
172 
173         line.rid = bcf_hdr_name2id(this.vcfhdr.hdr, toStringz(contig));
174         if (line.rid == -1) hts_log_error(__FUNCTION__, "contig not found");
175 
176         line.pos = pos;
177 
178         bcf_update_id(this.vcfhdr.hdr, line, (id == "" ? null : toStringz(id)) );  // TODO: could support >1 id with array as with the filters
179 
180         bcf_update_alleles_str(this.vcfhdr.hdr, line, toStringz(alleles));
181 
182         line.qual = qual;
183 
184         // Update filter(s); if/else blocks for speed
185         if(filters.length == 0)
186         {
187             int pass = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz("PASS"c));
188             bcf_update_filter(this.vcfhdr.hdr, line, &pass, 1);
189         }
190         else if(filters.length == 1)
191         {
192             int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(filters[0]));
193             if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", filters[0]) );
194             bcf_update_filter(this.vcfhdr.hdr, line, &fid, 1);
195         }
196         else    // TODO: factor out the check for -1 into a safe_update_filter or something
197         {
198             int[] filter_ids;
199             foreach(f; filters) {
200                 const int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(f));
201                 if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", f) );
202                 else filter_ids ~= fid;
203             }
204             bcf_update_filter(this.vcfhdr.hdr, line, filter_ids.ptr, cast(int)filter_ids.length );
205         }
206 
207         // Add a record
208         /+
209         // .. FORMAT
210         bcf_hdr_append(hdr, "##FORMAT=<ID=TF,Number=1,Type=Float,Description=\"Test Float\">");
211         float[4] test;
212         bcf_float_set_missing(test[0]);
213         test[1] = 47.11f;
214         bcf_float_set_vector_end(test[2]);
215         writeln("pre update format float");
216         bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("TF"), &test[0], 4);
217         +/
218         int tmpi = 1;
219         bcf_update_info_int32(this.vcfhdr.hdr, line, toStringz("NS"c), &tmpi, 1);
220 
221         // Add the actual sample
222         int[4] dp = [ 9000, 1, 2, 3];
223         bcf_update_format(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1, BCF_HT_INT);
224         //int dp = 9000;
225         //bcf_update_format_int32(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1);
226         //auto f = new float;
227         //*f = 1.0;
228         //bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("XF"c), f, 1);
229 
230         this.rows ~= line;
231 
232         return 0;
233     }
234 
235     /// as expected
236     int writeHeader()
237     {
238         return bcf_hdr_write(this.fp, this.vcfhdr.hdr);
239     }
240     /// as expected
241     int writeRecord(ref VCFRecord r)
242     {
243         const ret = bcf_write(this.fp, this.vcfhdr.hdr, r.line);
244         if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error");
245         return ret;
246     }
247     /// as expected
248     int writeRecord(bcf_hdr_t *hdr, bcf1_t *rec)
249     {
250         hts_log_warning(__FUNCTION__, "pre call");
251         const ret = bcf_write(this.fp, hdr, rec);
252         hts_log_warning(__FUNCTION__, "post call");
253         if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error");
254         return ret;
255     }
256 }
257 
258 ///
259 debug(dhtslib_unittest)
260 unittest
261 {
262     import std.exception: assertThrown;
263     import std.stdio: writeln, writefln;
264     import dhtslib.vcf.writer;
265 
266     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
267 
268 
269     auto vw = VCFWriter("/dev/null");
270 
271     vw.addHeaderLineRaw("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">");
272     vw.addHeaderLineKV("INFO", "<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">");
273     // ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency">
274     vw.addTag!(HeaderRecordType.Info)("AF", HeaderLengths.OnePerAltAllele, HeaderTypes.Integer, "Number of Samples With Data");
275     vw.addTag!(HeaderRecordType.Filter)("filt","test");
276     vw.addFilterTag("filt2","test2");
277 
278     assert(vw.getHeader.getHeaderRecord(HeaderRecordType.Filter, "filt2").getDescription == "\"test2\"");
279     vw.writeHeader();
280 }