1 /** 2 3 This module provides a struct that encapsulates a SAMHeader 4 5 `SAMHeader` encapsulates and owns a `sam_hdr_t*`, 6 and provides convenience functions to read and write header records. 7 8 */ 9 module dhtslib.sam.header; 10 11 import htslib.sam; 12 import htslib.kstring; 13 import dhtslib.memory; 14 15 import core.stdc.stdlib : free; 16 import core.stdc.string : memcpy; 17 18 import std.conv : to; 19 import std.string : toStringz, fromStringz; 20 import std.traits : isSomeString; 21 22 /// SAM specifications Section 1.3 23 /// Each header line begins with the character '@' followed by one of the 24 /// two-letter header record type codes defined in this section. 25 enum RecordType : immutable(char)[2] 26 { 27 HD = "HD", 28 SQ = "SQ", 29 RG = "RG", 30 PG = "PG", 31 CO = "CO", 32 } 33 34 /** SAMHeader encapsulates `sam_hdr_t*` 35 and provides convenience wrappers to manipulate the header metadata/records. 36 */ 37 struct SAMHeader 38 { 39 BamHdr h; /// rc bam_hdr_t * wrapper 40 41 /// Construct from existing pointer 42 this(bam_hdr_t * h) 43 { 44 this.h = BamHdr(h); 45 } 46 47 bool isNull(){ 48 return this.h is null; 49 } 50 51 /// Copy a SAMHeader 52 auto dup(){ 53 return SAMHeader(sam_hdr_dup(this.h)); 54 } 55 56 /* contig info */ 57 /// number of reference sequences; from bam_hdr_t 58 @property int nTargets() const 59 { 60 return this.h.n_targets; 61 } 62 63 /// length of specific reference sequence by number (`tid`) 64 uint targetLength(int target) const 65 in (target >= 0) 66 in (target < this.nTargets) 67 { 68 return this.h.target_len[target]; 69 } 70 71 /// lengths of the reference sequences 72 @property uint[] targetLengths() const 73 { 74 return this.h.target_len[0 .. this.nTargets].dup; 75 } 76 77 /// names of the reference sequences 78 @property string[] targetNames() const 79 { 80 string[] names; 81 names.length = this.nTargets; 82 foreach (i; 0 .. this.nTargets) 83 { 84 names[i] = fromStringz(this.h.target_name[i]).idup; 85 } 86 return names; 87 } 88 89 /// reference contig name to integer id 90 int targetId(string name) 91 { 92 return sam_hdr_name2tid(this.h, toStringz(name)); 93 } 94 95 /// reference contig name to integer id 96 const(char)[] targetName(int tid) 97 { 98 return fromStringz(sam_hdr_tid2name(this.h, tid)); 99 } 100 101 /* Array-like indexing */ 102 103 /// 'in' membership operator. 104 /// usage: RecordType.SQ in hdr; => <bool> 105 bool opBinaryRight(string op)(RecordType lhs) 106 if (op == "in") 107 { 108 if (numRecords(lhs)) return true; 109 return false; 110 } 111 112 /// For position-based lookups of key, 113 /// e.g. a sample-name lookup in Pysam is ["RG"][0]["SM"] , 114 /// while in dhtslib: 115 /// [RecordType.RG, 0, "SN"] 116 const(char)[] opIndex(RecordType rt, size_t pos, const(char)[] key) 117 { 118 return this.valueByPos(rt, pos, key); 119 } 120 121 /// number of records (lines) of type e.g. SQ, RG, etc. 122 size_t numRecords(RecordType rt) 123 { 124 return sam_hdr_count_lines(this.h, rt.ptr); 125 } 126 127 /* ==== Line level methods ==== */ 128 129 /// add multiple \n-terminated full SAM header records, eg "@SQ\tSN:foo\tLN:100" 130 /// (single line or last of multiple does not require terminal \n) 131 auto addLines(const(char)[][] lines) 132 { 133 if (lines.length == 0) 134 return 0; // (sam_hdr_add_lines also returns 0 for nothing added) 135 else if (lines.length == 1) 136 return sam_hdr_add_lines(this.h, lines[0].ptr, lines[0].length); 137 else { 138 char[] buf; 139 foreach(line; lines) 140 buf ~= line ~ '\n'; 141 return sam_hdr_add_lines(this.h, buf.ptr, buf.length); 142 } 143 } 144 145 /// Add a single line to an existing header 146 auto addLine(T...)(RecordType type, T kvargs) 147 if(kvargs.length > 0 && isSomeString!(T[0])) 148 { 149 static assert (kvargs.length %2 == 0); // K-V pairs => even number of variadic args 150 /* 151 // NOTE: both (runtime) type safe variadic params, and compile-time variadic templates 152 // use dynamic arrays, which cannot be passed to C variadic functions no matter what. 153 // complicating this, we also need to convert to char*. The below won't work period; 154 // the analogous variadic template won't work either without the mixin foolishness below. 155 const(char)*[] varargs; 156 varargs.length = kvargs.length + 1; 157 for(int i=0; i < kvargs.length; i++) 158 varargs[i] = kvargs[i].ptr; 159 varargs[$-1] = null; // last vararg param null signals to sam_hdr_add_line end of list 160 161 return sam_hdr_add_line(this.h, type.ptr, varargs.ptr); 162 */ 163 string varargMagic(size_t len) 164 { 165 string args = "sam_hdr_add_line(this.h, type.ptr, "; 166 for(int i=0; i<len; i++) 167 args ~= "toStringz(kvargs[" ~ i.to!string ~ "]), "; 168 args ~= "null)"; 169 return args; 170 } 171 172 // if mixin result is "toStringz(kvargs[0], ..." error is: 173 // Error: Using the result of a comma expression is not allowed 174 //return sam_hdr_add_line(this.h, type.ptr, mixin(varargMagic(kvargs.length)) ); 175 return mixin(varargMagic(kvargs.length)); 176 } 177 178 /// Return a complete line of formatted text for a given type and ID, 179 /// or if no ID, first line matching type. 180 /// 181 /// Parameters: 182 /// * type - enum 183 /// * id_key - may be empty, in which case the first line matching type is returned 184 /// * id_val - may be empty IFF id_key empty; otherwise must be value for key 185 const(char)[] lineById(RecordType type, string id_key = "", string id_val = "") 186 in (id_key.length == 0 ? id_val.length == 0 : id_val.length > 0) 187 { 188 189 kstring_t ks_line; 190 191 // looking at samtools header.c sam_hrecs_Find_type_id (called by sam_hdr_find_line_id), 192 // passing non-null terminated two-char char* appears safe 193 auto res = sam_hdr_find_line_id(this.h, type.ptr, 194 id_key == "" ? null : id_key.ptr, 195 id_val == "" ? null : id_val.ptr, 196 &ks_line); 197 198 // 0: success, -1: no match found, -2: error 199 if (res < 0) 200 return ""; 201 202 char[] line; 203 line.length = ks_line.l; 204 memcpy(line.ptr, ks_line.s, ks_line.l); 205 free(ks_line.s); 206 return line; 207 } 208 209 /* 210 int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type, 211 int pos, kstring_t *ks); 212 */ 213 214 /* int sam_hdr_remove_line_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); */ 215 216 /* int sam_hdr_remove_line_pos(sam_hdr_t *h, const char *type, int position); */ 217 218 /* int sam_hdr_update_line(sam_hdr_t *h, const char *type, 219 const char *ID_key, const char *ID_value, ...); */ 220 221 /* int sam_hdr_remove_except(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); */ 222 223 /* int sam_hdr_remove_lines(sam_hdr_t *h, const char *type, const char *id, void *rh); */ 224 225 /+ 226 int sam_hdr_count_lines(sam_hdr_t *h, const char *type); 227 int sam_hdr_line_index(sam_hdr_t *bh, const char *type, const char *key); 228 const char *sam_hdr_line_name(sam_hdr_t *bh, const char *type, int pos); 229 +/ 230 231 //// //// int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value, const char *key, kstring_t *ks); 232 233 234 /// Return the value associated with a key for a header line identified by position 235 const(char)[] valueByPos(RecordType type, size_t pos, const(char)[] key) 236 in (pos <= int.max) 237 in (key.length > 0) 238 { 239 kstring_t ks; 240 auto res = sam_hdr_find_tag_pos(this.h, type.ptr, cast(int)pos, toStringz(key), &ks); 241 242 // 0: success, -1: tag DNE, -2: error 243 if (res < 0) 244 return ""; 245 246 char[] ret; 247 ret.length = ks.l; 248 memcpy(ret.ptr, ks.s, ks.l); 249 free(ks.s); 250 return ret; 251 } 252 } 253 /// Example 254 debug (dhtslib_unittest) unittest 255 { 256 import std; 257 258 auto h = sam_hdr_init(); 259 auto hdr = SAMHeader(h); 260 261 assert(!(RecordType.RG in hdr)); 262 263 //sam_hdr_add_line(h, RecordType.RG.ptr, "ID".ptr, "001".ptr, "SM".ptr, "sample1".ptr, null); 264 hdr.addLine(RecordType.RG, "ID", "001", "SM", "sample1"); 265 266 assert(RecordType.RG in hdr); 267 268 auto line = hdr.lineById(RecordType.RG, "ID", "001"); 269 assert(line == "@RG ID:001 SM:sample1"); 270 271 auto val = hdr.valueByPos(RecordType.RG, 0, "SM"); 272 assert(val == "sample1"); 273 assert(hdr[RecordType.RG, 0, "SM"] == "sample1"); 274 assert(hdr[RecordType.RG, 0, "XX"] == ""); 275 }