1 /** 2 3 This module provides a struct that encapsulates a SAMHeader 4 5 `SAMHeader` encapsulates and owns a `sam_hdr_t*`, 6 and provides convenience functions to read and write header records. 7 8 */ 9 module dhtslib.sam.header; 10 11 import htslib.sam; 12 import htslib.kstring; 13 import dhtslib.memory; 14 15 import core.stdc.stdlib : free; 16 import core.stdc.string : memcpy; 17 18 import std.conv : to; 19 import std.string : toStringz, fromStringz; 20 import std.traits : isSomeString; 21 22 /// SAM specifications Section 1.3 23 /// Each header line begins with the character '@' followed by one of the 24 /// two-letter header record type codes defined in this section. 25 enum RecordType : immutable(char)[2] 26 { 27 HD = "HD", 28 SQ = "SQ", 29 RG = "RG", 30 PG = "PG", 31 CO = "CO", 32 } 33 34 /** SAMHeader encapsulates `sam_hdr_t*` 35 and provides convenience wrappers to manipulate the header metadata/records. 36 */ 37 struct SAMHeader 38 { 39 BamHdr h; /// rc bam_hdr_t * wrapper 40 41 /// Construct from existing pointer 42 this(bam_hdr_t * h) 43 { 44 this.h = BamHdr(h); 45 } 46 47 /// Explicit postblit to avoid 48 /// https://github.com/blachlylab/dhtslib/issues/122 49 this(this) 50 { 51 this.h = h; 52 } 53 54 bool isNull(){ 55 return this.h is null; 56 } 57 58 /// Copy a SAMHeader 59 auto dup(){ 60 return SAMHeader(sam_hdr_dup(this.h)); 61 } 62 63 /* contig info */ 64 /// number of reference sequences; from bam_hdr_t 65 @property int nTargets() const 66 { 67 return this.h.n_targets; 68 } 69 70 /// length of specific reference sequence by number (`tid`) 71 uint targetLength(int target) const 72 in (target >= 0) 73 in (target < this.nTargets) 74 { 75 return this.h.target_len[target]; 76 } 77 78 /// lengths of the reference sequences 79 @property uint[] targetLengths() const 80 { 81 return this.h.target_len[0 .. this.nTargets].dup; 82 } 83 84 /// names of the reference sequences 85 @property string[] targetNames() const 86 { 87 string[] names; 88 names.length = this.nTargets; 89 foreach (i; 0 .. this.nTargets) 90 { 91 names[i] = fromStringz(this.h.target_name[i]).idup; 92 } 93 return names; 94 } 95 96 /// reference contig name to integer id 97 int targetId(string name) 98 { 99 return sam_hdr_name2tid(this.h, toStringz(name)); 100 } 101 102 /// reference contig name to integer id 103 const(char)[] targetName(int tid) 104 { 105 return fromStringz(sam_hdr_tid2name(this.h, tid)); 106 } 107 108 /* Array-like indexing */ 109 110 /// 'in' membership operator. 111 /// usage: RecordType.SQ in hdr; => <bool> 112 bool opBinaryRight(string op)(RecordType lhs) 113 if (op == "in") 114 { 115 if (numRecords(lhs)) return true; 116 return false; 117 } 118 119 /// For position-based lookups of key, 120 /// e.g. a sample-name lookup in Pysam is ["RG"][0]["SM"] , 121 /// while in dhtslib: 122 /// [RecordType.RG, 0, "SN"] 123 const(char)[] opIndex(RecordType rt, size_t pos, const(char)[] key) 124 { 125 return this.valueByPos(rt, pos, key); 126 } 127 128 /// number of records (lines) of type e.g. SQ, RG, etc. 129 size_t numRecords(RecordType rt) 130 { 131 return sam_hdr_count_lines(this.h, rt.ptr); 132 } 133 134 /* ==== Line level methods ==== */ 135 136 /// add multiple \n-terminated full SAM header records, eg "@SQ\tSN:foo\tLN:100" 137 /// (single line or last of multiple does not require terminal \n) 138 auto addLines(const(char)[][] lines) 139 { 140 if (lines.length == 0) 141 return 0; // (sam_hdr_add_lines also returns 0 for nothing added) 142 else if (lines.length == 1) 143 return sam_hdr_add_lines(this.h, lines[0].ptr, lines[0].length); 144 else { 145 char[] buf; 146 foreach(line; lines) 147 buf ~= line ~ '\n'; 148 return sam_hdr_add_lines(this.h, buf.ptr, buf.length); 149 } 150 } 151 152 /// Add a single line to an existing header 153 auto addLine(T...)(RecordType type, T kvargs) 154 if(kvargs.length > 0 && isSomeString!(T[0])) 155 { 156 static assert (kvargs.length %2 == 0); // K-V pairs => even number of variadic args 157 /* 158 // NOTE: both (runtime) type safe variadic params, and compile-time variadic templates 159 // use dynamic arrays, which cannot be passed to C variadic functions no matter what. 160 // complicating this, we also need to convert to char*. The below won't work period; 161 // the analogous variadic template won't work either without the mixin foolishness below. 162 const(char)*[] varargs; 163 varargs.length = kvargs.length + 1; 164 for(int i=0; i < kvargs.length; i++) 165 varargs[i] = kvargs[i].ptr; 166 varargs[$-1] = null; // last vararg param null signals to sam_hdr_add_line end of list 167 168 return sam_hdr_add_line(this.h, type.ptr, varargs.ptr); 169 */ 170 string varargMagic(size_t len) 171 { 172 string args = "sam_hdr_add_line(this.h, type.ptr, "; 173 for(int i=0; i<len; i++) 174 args ~= "toStringz(kvargs[" ~ i.to!string ~ "]), "; 175 args ~= "null)"; 176 return args; 177 } 178 179 // if mixin result is "toStringz(kvargs[0], ..." error is: 180 // Error: Using the result of a comma expression is not allowed 181 //return sam_hdr_add_line(this.h, type.ptr, mixin(varargMagic(kvargs.length)) ); 182 return mixin(varargMagic(kvargs.length)); 183 } 184 185 /// Return a complete line of formatted text for a given type and ID, 186 /// or if no ID, first line matching type. 187 /// 188 /// Parameters: 189 /// * type - enum 190 /// * id_key - may be empty, in which case the first line matching type is returned 191 /// * id_val - may be empty IFF id_key empty; otherwise must be value for key 192 const(char)[] lineById(RecordType type, string id_key = "", string id_val = "") 193 in (id_key.length == 0 ? id_val.length == 0 : id_val.length > 0) 194 { 195 196 kstring_t ks_line; 197 198 // looking at samtools header.c sam_hrecs_Find_type_id (called by sam_hdr_find_line_id), 199 // passing non-null terminated two-char char* appears safe 200 auto res = sam_hdr_find_line_id(this.h, type.ptr, 201 id_key == "" ? null : id_key.ptr, 202 id_val == "" ? null : id_val.ptr, 203 &ks_line); 204 205 // 0: success, -1: no match found, -2: error 206 if (res < 0) 207 return ""; 208 209 char[] line; 210 line.length = ks_line.l; 211 memcpy(line.ptr, ks_line.s, ks_line.l); 212 free(ks_line.s); 213 return line; 214 } 215 216 /* 217 int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type, 218 int pos, kstring_t *ks); 219 */ 220 221 /* int sam_hdr_remove_line_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); */ 222 223 /* int sam_hdr_remove_line_pos(sam_hdr_t *h, const char *type, int position); */ 224 225 /* int sam_hdr_update_line(sam_hdr_t *h, const char *type, 226 const char *ID_key, const char *ID_value, ...); */ 227 228 /* int sam_hdr_remove_except(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); */ 229 230 /* int sam_hdr_remove_lines(sam_hdr_t *h, const char *type, const char *id, void *rh); */ 231 232 /+ 233 int sam_hdr_count_lines(sam_hdr_t *h, const char *type); 234 int sam_hdr_line_index(sam_hdr_t *bh, const char *type, const char *key); 235 const char *sam_hdr_line_name(sam_hdr_t *bh, const char *type, int pos); 236 +/ 237 238 //// //// int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value, const char *key, kstring_t *ks); 239 240 241 /// Return the value associated with a key for a header line identified by position 242 const(char)[] valueByPos(RecordType type, size_t pos, const(char)[] key) 243 in (pos <= int.max) 244 in (key.length > 0) 245 { 246 kstring_t ks; 247 auto res = sam_hdr_find_tag_pos(this.h, type.ptr, cast(int)pos, toStringz(key), &ks); 248 249 // 0: success, -1: tag DNE, -2: error 250 if (res < 0) 251 return ""; 252 253 char[] ret; 254 ret.length = ks.l; 255 memcpy(ret.ptr, ks.s, ks.l); 256 free(ks.s); 257 return ret; 258 } 259 } 260 /// Example 261 debug (dhtslib_unittest) unittest 262 { 263 import std; 264 265 auto h = sam_hdr_init(); 266 auto hdr = SAMHeader(h); 267 268 assert(!(RecordType.RG in hdr)); 269 270 //sam_hdr_add_line(h, RecordType.RG.ptr, "ID".ptr, "001".ptr, "SM".ptr, "sample1".ptr, null); 271 hdr.addLine(RecordType.RG, "ID", "001", "SM", "sample1"); 272 273 assert(RecordType.RG in hdr); 274 275 auto line = hdr.lineById(RecordType.RG, "ID", "001"); 276 assert(line == "@RG ID:001 SM:sample1"); 277 278 auto val = hdr.valueByPos(RecordType.RG, 0, "SM"); 279 assert(val == "sample1"); 280 assert(hdr[RecordType.RG, 0, "SM"] == "sample1"); 281 assert(hdr[RecordType.RG, 0, "XX"] == ""); 282 }