1 module dhtslib.vcf.writer; 2 3 import std.datetime; 4 import std.string: fromStringz, toStringz; 5 import std.traits: isArray, isDynamicArray, isBoolean, isIntegral, isFloatingPoint, isNumeric, isSomeString; 6 import std.conv: to, ConvException; 7 import std.format: format; 8 9 import dhtslib.memory; 10 import dhtslib.vcf; 11 import htslib.vcf; 12 import htslib.hts_log; 13 14 alias BCFWriter = VCFWriter; 15 16 /** Basic support for writing VCF, BCF files */ 17 struct VCFWriter 18 { 19 // htslib data structures 20 VcfFile fp; /// rc htsFile wrapper 21 VCFHeader vcfhdr; /// header wrapper 22 Bcf1[] rows; /// individual records 23 24 @disable this(); 25 /// open file or network resources for writing 26 /// setup incl header allocation 27 /// if mode==w: 28 /// bcf_hdr_init automatically sets version string (##fileformat=VCFv4.2) 29 /// bcf_hdr_init automatically adds the PASS filter 30 this(string fn) 31 { 32 if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor"); 33 this.fp = VcfFile(vcf_open(toStringz(fn), toStringz("w"c))); 34 if (!this.fp) throw new Exception("Could not hts_open file"); 35 36 this.vcfhdr = VCFHeader( bcf_hdr_init("w\0"c.ptr)); 37 addFiledate(); 38 bcf_hdr_sync(this.vcfhdr.hdr); 39 40 /+ 41 hts_log_trace(__FUNCTION__, "Displaying header at construction"); 42 // int bcf_hdr_format(const(bcf_hdr_t) *hdr, int is_bcf, kstring_t *str); 43 kstring_t ks; 44 bcf_hdr_format(this.vcfhdr.hdr, 0, &ks); 45 char[] hdr; 46 hdr.length = ks.l; 47 import core.stdc.string: memcpy; 48 memcpy(hdr.ptr, ks.s, ks.l); 49 import std.stdio: writeln; 50 writeln(hdr); 51 +/ 52 } 53 /// setup and copy a header from another BCF/VCF as template 54 this(T)(string fn, T other) 55 if(is(T == VCFHeader) || is(T == bcf_hdr_t*)) 56 { 57 if (fn == "") throw new Exception("Empty filename passed to VCFWriter constructor"); 58 this.fp = vcf_open(toStringz(fn), toStringz("w"c)); 59 if (!this.fp) throw new Exception("Could not hts_open file"); 60 61 static if(is(T == VCFHeader*)) { this.vcfhdr = VCFHeader( bcf_hdr_dup(other.hdr) ); } 62 else static if(is(T == bcf_hdr_t*)) { this.vcfhdr = VCFHeader( bcf_hdr_dup(other) ); } 63 } 64 65 invariant 66 { 67 assert(this.vcfhdr.hdr != null); 68 } 69 70 VCFHeader getHeader() 71 { 72 return this.vcfhdr; 73 } 74 75 /// Add sample to this VCF 76 /// * int bcf_hdr_add_sample(bcf_hdr_t *hdr, const(char) *sample); 77 deprecated("use VCFHeader methods instead") 78 int addSample(string name) 79 in { assert(name != ""); } 80 do 81 { 82 return this.vcfhdr.addSample(name); 83 } 84 85 deprecated("use VCFHeader methods instead") 86 /// Add a new header line 87 int addHeaderLineKV(string key, string value) 88 { 89 return this.vcfhdr.addHeaderLineKV(key, value); 90 } 91 92 deprecated("use VCFHeader methods instead") 93 /// Add a new header line -- must be formatted ##key=value 94 int addHeaderLineRaw(string line) 95 { 96 return this.vcfhdr.addHeaderLineRaw(line); 97 } 98 99 deprecated("use VCFHeader methods instead") 100 /// Add a filedate= headerline, which is not called out specifically in the spec, 101 /// but appears in the spec's example files. We could consider allowing a param here. 102 int addFiledate() 103 { 104 return this.vcfhdr.addFiledate; 105 } 106 107 /** Add INFO (§1.2.2) or FORMAT (§1.2.4) tag 108 109 The INFO tag describes row-specific keys used in the INFO column; 110 The FORMAT tag describes sample-specific keys used in the last, and optional, genotype column. 111 112 Template parameter: string; must be INFO or FORMAT 113 114 The first four parameters are required; NUMBER and TYPE have specific allowable values. 115 source and version are optional, but recommended (for INFO only). 116 117 * id: ID tag 118 * number: NUMBER tag; here a string because it can also take special values {A,R,G,.} (see §1.2.2) 119 * type: Integer, Float, Flag, Character, and String 120 * description: Text description; will be double quoted 121 * source: Annotation source (eg dbSNP) 122 * version: Annotation version (eg 142) 123 */ 124 deprecated("use VCFHeader methods instead") 125 void addTag(HeaderRecordType tagType)( string id, 126 HeaderLengths number, 127 HeaderTypes type, 128 string description, 129 string source="", 130 string _version="") 131 if(tagType == HeaderRecordType.Info || tagType == HeaderRecordType.Format) 132 { 133 this.vcfhdr.addHeaderLine!(tagType)(id, number, type, description, source, _version); 134 } 135 136 /** Add FILTER tag (§1.2.3) */ 137 deprecated("use VCFHeader methods instead") 138 void addTag(HeaderRecordType tagType)(string id, string description) 139 if(tagType == HeaderRecordType.Filter) 140 { 141 this.vcfhdr.addHeaderLine!(tagType)(id, description); 142 } 143 144 /** Add FILTER tag (§1.2.3) */ 145 deprecated("use VCFHeader methods instead") 146 void addFilterTag(string id, string description) 147 { 148 this.vcfhdr.addFilter(id, description); 149 } 150 151 /** Add contig definition (§1.2.7) to header meta-info 152 153 other: "url=...,md5=...,etc." 154 */ 155 deprecated("use VCFHeader methods instead") 156 auto addTag(HeaderRecordType tagType)(const(char)[] id, const int length = 0, string other = "") 157 if(tagType == HeaderRecordType.Contig) 158 { 159 return this.vcfhdr.addTag!(tagType)(id, length, other); 160 } 161 162 /** 163 Add a record 164 165 alleles: comma-separated string, including ref allele 166 qual: a float, but accepts any numeric 167 */ 168 int addRecord(S, N)(S contig, int pos, S id, S alleles, N qual, S[] filters) 169 if ( (isSomeString!S || is(S == char*) ) && isNumeric!N) 170 { 171 Bcf1 line = Bcf1(bcf_init1()); 172 173 line.rid = bcf_hdr_name2id(this.vcfhdr.hdr, toStringz(contig)); 174 if (line.rid == -1) hts_log_error(__FUNCTION__, "contig not found"); 175 176 line.pos = pos; 177 178 bcf_update_id(this.vcfhdr.hdr, line, (id == "" ? null : toStringz(id)) ); // TODO: could support >1 id with array as with the filters 179 180 bcf_update_alleles_str(this.vcfhdr.hdr, line, toStringz(alleles)); 181 182 line.qual = qual; 183 184 // Update filter(s); if/else blocks for speed 185 if(filters.length == 0) 186 { 187 int pass = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz("PASS"c)); 188 bcf_update_filter(this.vcfhdr.hdr, line, &pass, 1); 189 } 190 else if(filters.length == 1) 191 { 192 int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(filters[0])); 193 if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", filters[0]) ); 194 bcf_update_filter(this.vcfhdr.hdr, line, &fid, 1); 195 } 196 else // TODO: factor out the check for -1 into a safe_update_filter or something 197 { 198 int[] filter_ids; 199 foreach(f; filters) { 200 const int fid = bcf_hdr_id2int(this.vcfhdr.hdr, BCF_DT_ID, toStringz(f)); 201 if(fid == -1) hts_log_error(__FUNCTION__, format("filter not found (ignoring): ", f) ); 202 else filter_ids ~= fid; 203 } 204 bcf_update_filter(this.vcfhdr.hdr, line, filter_ids.ptr, cast(int)filter_ids.length ); 205 } 206 207 // Add a record 208 /+ 209 // .. FORMAT 210 bcf_hdr_append(hdr, "##FORMAT=<ID=TF,Number=1,Type=Float,Description=\"Test Float\">"); 211 float[4] test; 212 bcf_float_set_missing(test[0]); 213 test[1] = 47.11f; 214 bcf_float_set_vector_end(test[2]); 215 writeln("pre update format float"); 216 bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("TF"), &test[0], 4); 217 +/ 218 int tmpi = 1; 219 bcf_update_info_int32(this.vcfhdr.hdr, line, toStringz("NS"c), &tmpi, 1); 220 221 // Add the actual sample 222 int[4] dp = [ 9000, 1, 2, 3]; 223 bcf_update_format(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1, BCF_HT_INT); 224 //int dp = 9000; 225 //bcf_update_format_int32(this.vcfhdr.hdr, line, toStringz("DP"c), &dp, 1); 226 //auto f = new float; 227 //*f = 1.0; 228 //bcf_update_format_float(this.vcfhdr.hdr, line, toStringz("XF"c), f, 1); 229 230 this.rows ~= line; 231 232 return 0; 233 } 234 235 /// as expected 236 int writeHeader() 237 { 238 return bcf_hdr_write(this.fp, this.vcfhdr.hdr); 239 } 240 /// as expected 241 int writeRecord(ref VCFRecord r) 242 { 243 const ret = bcf_write(this.fp, this.vcfhdr.hdr, r.line); 244 if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error"); 245 return ret; 246 } 247 /// as expected 248 int writeRecord(bcf_hdr_t *hdr, bcf1_t *rec) 249 { 250 hts_log_warning(__FUNCTION__, "pre call"); 251 const ret = bcf_write(this.fp, hdr, rec); 252 hts_log_warning(__FUNCTION__, "post call"); 253 if (ret != 0) hts_log_error(__FUNCTION__, "bcf_write error"); 254 return ret; 255 } 256 } 257 258 /// 259 debug(dhtslib_unittest) 260 unittest 261 { 262 import std.exception: assertThrown; 263 import std.stdio: writeln, writefln; 264 import dhtslib.vcf.writer; 265 266 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 267 268 269 auto vw = VCFWriter("/dev/null"); 270 271 vw.addHeaderLineRaw("##INFO=<ID=NS,Number=1,Type=Integer,Description=\"Number of Samples With Data\">"); 272 vw.addHeaderLineKV("INFO", "<ID=DP,Number=1,Type=Integer,Description=\"Total Depth\">"); 273 // ##INFO=<ID=AF,Number=A,Type=Float,Description="Allele Frequency"> 274 vw.addTag!(HeaderRecordType.Info)("AF", HeaderLengths.OnePerAltAllele, HeaderTypes.Integer, "Number of Samples With Data"); 275 vw.addTag!(HeaderRecordType.Filter)("filt","test"); 276 vw.addFilterTag("filt2","test2"); 277 278 assert(vw.getHeader.getHeaderRecord(HeaderRecordType.Filter, "filt2").getDescription == "\"test2\""); 279 vw.writeHeader(); 280 }