1 module dhtslib.sam.header; 2 3 import htslib.sam; 4 import htslib.kstring; 5 6 import core.stdc.stdlib : free; 7 import core.stdc..string : memcpy; 8 9 import std.conv : to; 10 import std..string : toStringz; 11 import std.traits : isSomeString; 12 13 /// SAM specifications Section 1.3 14 /// Each header line begins with the character '@' followed by one of the 15 /// two-letter header record type codes defined in this section. 16 enum RecordType : immutable(char)[2] 17 { 18 HD = "HD", 19 SQ = "SQ", 20 RG = "RG", 21 PG = "PG", 22 CO = "CO", 23 } 24 25 struct SAMHeader 26 { 27 private sam_hdr_t* h; 28 29 this(sam_hdr_t* h) 30 { 31 this.h = h; 32 } 33 // no destructor 34 35 /* Array-like indexing */ 36 37 /// 'in' membership operator. 38 /// usage: RecordType.SQ in hdr; => <bool> 39 bool opBinaryRight(string op)(RecordType lhs) 40 if (op == "in") 41 { 42 if (numRecords(lhs)) return true; 43 return false; 44 } 45 46 /// For position-based lookups of key, 47 /// e.g. a sample-name lookup in Pysam is ["RG"][0]["SM"] , 48 /// while in dhtslib: 49 /// [RecordType.RG, 0, "SN"] 50 const(char)[] opIndex(RecordType rt, size_t pos, const(char)[] key) 51 { 52 return this.valueByPos(rt, pos, key); 53 } 54 55 /// number of records (lines) of type e.g. SQ, RG, etc. 56 size_t numRecords(RecordType rt) 57 { 58 return sam_hdr_count_lines(this.h, rt.ptr); 59 } 60 61 /* ==== Line level methods ==== */ 62 63 /// add multiple \n-terminated full SAM header records, eg "@SQ\tSN:foo\tLN:100" 64 /// (passed line does not require \n) 65 auto addLines(const(char)[] lines) 66 { 67 return sam_hdr_add_lines(this.h, lines.ptr, lines.length); 68 } 69 70 /// Add a single line to an existing header 71 auto addLine(T...)(RecordType type, T kvargs) 72 if(kvargs.length > 0 && isSomeString!(T[0])) 73 { 74 static assert (kvargs.length %2 == 0); // K-V pairs => even number of variadic args 75 /* 76 // NOTE: both (runtime) type safe variadic params, and compile-time variadic templates 77 // use dynamic arrays, which cannot be passed to C variadic functions no matter what. 78 // complicating this, we also need to convert to char*. The below won't work period; 79 // the analogous variadic template won't work either without the mixin foolishness below. 80 const(char)*[] varargs; 81 varargs.length = kvargs.length + 1; 82 for(int i=0; i < kvargs.length; i++) 83 varargs[i] = kvargs[i].ptr; 84 varargs[$-1] = null; // last vararg param null signals to sam_hdr_add_line end of list 85 86 return sam_hdr_add_line(this.h, type.ptr, varargs.ptr); 87 */ 88 string varargMagic(size_t len) 89 { 90 string args = "sam_hdr_add_line(this.h, type.ptr, "; 91 for(int i=0; i<len; i++) 92 args ~= "toStringz(kvargs[" ~ i.to!string ~ "]), "; 93 args ~= "null)"; 94 return args; 95 } 96 97 // if mixin result is "toStringz(kvargs[0], ..." error is: 98 // Error: Using the result of a comma expression is not allowed 99 //return sam_hdr_add_line(this.h, type.ptr, mixin(varargMagic(kvargs.length)) ); 100 return mixin(varargMagic(kvargs.length)); 101 } 102 103 /// Return a complete line of formatted text for a given type and ID, 104 /// or if no ID, first line matching type. 105 /// 106 /// Parameters: 107 /// * type - enum 108 /// * id_key - may be empty, in which case the first line matching type is returned 109 /// * id_val - may be empty IFF id_key empty; otherwise must be value for key 110 const(char)[] lineById(RecordType type, string id_key = "", string id_val = "") 111 in (id_key.length == 0 ? id_val.length == 0 : id_val.length > 0) 112 { 113 114 kstring_t ks_line; 115 116 // looking at samtools header.c sam_hrecs_Find_type_id (called by sam_hdr_find_line_id), 117 // passing non-null terminated two-char char* appears safe 118 auto res = sam_hdr_find_line_id(this.h, type.ptr, 119 id_key == "" ? null : id_key.ptr, 120 id_val == "" ? null : id_val.ptr, 121 &ks_line); 122 123 // 0: success, -1: no match found, -2: error 124 if (res < 0) 125 return ""; 126 127 char[] line; 128 line.length = ks_line.l; 129 memcpy(line.ptr, ks_line.s, ks_line.l); 130 free(ks_line.s); 131 return line; 132 } 133 134 /* 135 int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type, 136 int pos, kstring_t *ks); 137 */ 138 139 /* int sam_hdr_remove_line_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); */ 140 141 /* int sam_hdr_remove_line_pos(sam_hdr_t *h, const char *type, int position); */ 142 143 /* int sam_hdr_update_line(sam_hdr_t *h, const char *type, 144 const char *ID_key, const char *ID_value, ...); */ 145 146 /* int sam_hdr_remove_except(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); */ 147 148 /* int sam_hdr_remove_lines(sam_hdr_t *h, const char *type, const char *id, void *rh); */ 149 150 /+ 151 int sam_hdr_count_lines(sam_hdr_t *h, const char *type); 152 int sam_hdr_line_index(sam_hdr_t *bh, const char *type, const char *key); 153 const char *sam_hdr_line_name(sam_hdr_t *bh, const char *type, int pos); 154 +/ 155 156 //// //// int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value, const char *key, kstring_t *ks); 157 158 159 /// Return the value associated with a key for a header line identified by position 160 const(char)[] valueByPos(RecordType type, size_t pos, const(char)[] key) 161 in (pos <= int.max) 162 in (key.length > 0) 163 { 164 kstring_t ks; 165 auto res = sam_hdr_find_tag_pos(this.h, type.ptr, cast(int)pos, toStringz(key), &ks); 166 167 // 0: success, -1: tag DNE, -2: error 168 if (res < 0) 169 return ""; 170 171 char[] ret; 172 ret.length = ks.l; 173 memcpy(ret.ptr, ks.s, ks.l); 174 free(ks.s); 175 return ret; 176 } 177 } 178 179 unittest 180 { 181 import std; 182 183 auto h = sam_hdr_init(); 184 auto hdr = SAMHeader(h); 185 186 assert(!(RecordType.RG in hdr)); 187 188 //sam_hdr_add_line(h, RecordType.RG.ptr, "ID".ptr, "001".ptr, "SM".ptr, "sample1".ptr, null); 189 hdr.addLine(RecordType.RG, "ID", "001", "SM", "sample1"); 190 191 assert(RecordType.RG in hdr); 192 193 auto line = hdr.lineById(RecordType.RG, "ID", "001"); 194 assert(line == "@RG ID:001 SM:sample1"); 195 196 auto val = hdr.valueByPos(RecordType.RG, 0, "SM"); 197 assert(val == "sample1"); 198 assert(hdr[RecordType.RG, 0, "SM"] == "sample1"); 199 assert(hdr[RecordType.RG, 0, "XX"] == ""); 200 }