1 module dhtslib.sam.header;
2 
3 import htslib.sam;
4 import htslib.kstring;
5 
6 import core.stdc.stdlib : free;
7 import core.stdc..string : memcpy;
8 
9 import std.conv : to;
10 import std..string : toStringz;
11 import std.traits : isSomeString;
12 
13 /// SAM specifications Section 1.3
14 /// Each header line begins with the character '@' followed by one of the
15 /// two-letter header record type codes defined in this section.
16 enum RecordType : immutable(char)[2]
17 {
18     HD = "HD",
19     SQ = "SQ",
20     RG = "RG",
21     PG = "PG",
22     CO = "CO",
23 }
24 
25 struct SAMHeader
26 {
27     private sam_hdr_t* h;
28 
29     this(sam_hdr_t* h)
30     {
31         this.h = h;
32     }
33     // no destructor
34 
35     /* Array-like indexing */
36 
37     /// 'in' membership operator.
38     /// usage: RecordType.SQ in hdr; => <bool>
39     bool opBinaryRight(string op)(RecordType lhs)
40     if (op == "in")
41     {
42         if (numRecords(lhs)) return true;
43         return false;
44     }
45 
46     /// For position-based lookups of key,
47     /// e.g. a sample-name lookup in Pysam is ["RG"][0]["SM"] ,
48     /// while in dhtslib:
49     /// [RecordType.RG, 0, "SN"]
50     const(char)[] opIndex(RecordType rt, size_t pos, const(char)[] key)
51     {
52         return this.valueByPos(rt, pos, key);
53     }
54 
55     /// number of records (lines) of type e.g. SQ, RG, etc.
56     size_t numRecords(RecordType rt)
57     {
58         return sam_hdr_count_lines(this.h, rt.ptr);
59     }
60 
61     /* ==== Line level methods ==== */
62 
63     /// add multiple \n-terminated full SAM header records, eg "@SQ\tSN:foo\tLN:100"
64     /// (passed line does not require \n)
65     auto addLines(const(char)[] lines)
66     {
67         return sam_hdr_add_lines(this.h, lines.ptr, lines.length);
68     }
69 
70     /// Add a single line to an existing header
71     auto addLine(T...)(RecordType type, T kvargs)
72     if(kvargs.length > 0 && isSomeString!(T[0]))
73     {
74         static assert (kvargs.length %2 == 0);   // K-V pairs => even number of variadic args
75 /*
76         // NOTE: both (runtime) type safe variadic params, and compile-time variadic templates
77         // use dynamic arrays, which cannot be passed to C variadic functions no matter what.
78         // complicating this, we also need to convert to char*. The below won't work period;
79         // the analogous variadic template won't work either without the mixin foolishness below.
80         const(char)*[] varargs;
81         varargs.length = kvargs.length + 1;
82         for(int i=0; i < kvargs.length; i++)
83             varargs[i] = kvargs[i].ptr;
84         varargs[$-1] = null;  // last vararg param null signals to sam_hdr_add_line end of list
85         
86         return sam_hdr_add_line(this.h, type.ptr, varargs.ptr);
87 */
88         string varargMagic(size_t len)
89         {
90             string args = "sam_hdr_add_line(this.h, type.ptr, ";
91             for(int i=0; i<len; i++)
92                 args ~= "toStringz(kvargs[" ~ i.to!string ~ "]), ";
93             args ~= "null)";
94             return args;
95         }
96 
97         // if mixin result is "toStringz(kvargs[0], ..." error is:
98         // Error: Using the result of a comma expression is not allowed
99         //return sam_hdr_add_line(this.h, type.ptr, mixin(varargMagic(kvargs.length)) );
100         return mixin(varargMagic(kvargs.length));
101     }
102 
103     /// Return a complete line of formatted text for a given type and ID,
104     /// or if no ID, first line matching type.
105     ///
106     /// Parameters:
107     ///     * type      - enum
108     ///     * id_key    - may be empty, in which case the first line matching type is returned
109     ///     * id_val    - may be empty IFF id_key empty; otherwise must be value for key
110     const(char)[] lineById(RecordType type, string id_key = "", string id_val = "")
111     in (id_key.length == 0 ? id_val.length == 0 : id_val.length > 0)
112     {
113 
114         kstring_t ks_line;
115         
116         // looking at samtools header.c sam_hrecs_Find_type_id (called by sam_hdr_find_line_id),
117         // passing non-null terminated two-char char* appears safe
118         auto res = sam_hdr_find_line_id(this.h, type.ptr,
119                                         id_key == "" ? null : id_key.ptr,
120                                         id_val == "" ? null : id_val.ptr,
121                                         &ks_line);
122 
123         // 0: success, -1: no match found, -2: error
124         if (res < 0)
125             return "";
126 
127         char[] line;
128         line.length = ks_line.l;
129         memcpy(line.ptr, ks_line.s, ks_line.l);
130         free(ks_line.s);
131         return line;
132     }
133 
134     /*
135 int sam_hdr_find_line_pos(sam_hdr_t *h, const char *type,
136                           int pos, kstring_t *ks);
137     */
138 
139     /* int sam_hdr_remove_line_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); */
140 
141     /* int sam_hdr_remove_line_pos(sam_hdr_t *h, const char *type, int position); */
142 
143     /* int sam_hdr_update_line(sam_hdr_t *h, const char *type,
144         const char *ID_key, const char *ID_value, ...); */
145 
146     /* int sam_hdr_remove_except(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value); */
147 
148     /* int sam_hdr_remove_lines(sam_hdr_t *h, const char *type, const char *id, void *rh); */
149 
150     /+
151     int sam_hdr_count_lines(sam_hdr_t *h, const char *type);
152     int sam_hdr_line_index(sam_hdr_t *bh, const char *type, const char *key);
153     const char *sam_hdr_line_name(sam_hdr_t *bh, const char *type, int pos);
154     +/
155 
156     //// //// int sam_hdr_find_tag_id(sam_hdr_t *h, const char *type, const char *ID_key, const char *ID_value, const char *key, kstring_t *ks);
157 
158 
159     /// Return the value associated with a key for a header line identified by position
160     const(char)[] valueByPos(RecordType type, size_t pos, const(char)[] key)
161     in (pos <= int.max)
162     in (key.length > 0)
163     {
164         kstring_t ks;
165         auto res = sam_hdr_find_tag_pos(this.h, type.ptr, cast(int)pos, toStringz(key), &ks);
166         
167         // 0: success, -1: tag DNE, -2: error
168         if (res < 0)
169             return "";
170 
171         char[] ret;
172         ret.length = ks.l;
173         memcpy(ret.ptr, ks.s, ks.l);
174         free(ks.s);
175         return ret;
176     }
177 }
178 
179 unittest
180 {
181     import std;
182 
183     auto h = sam_hdr_init();
184     auto hdr = SAMHeader(h);
185 
186     assert(!(RecordType.RG in hdr));
187 
188     //sam_hdr_add_line(h, RecordType.RG.ptr, "ID".ptr, "001".ptr, "SM".ptr, "sample1".ptr, null);
189     hdr.addLine(RecordType.RG, "ID", "001", "SM", "sample1");
190 
191     assert(RecordType.RG in hdr);
192 
193     auto line = hdr.lineById(RecordType.RG, "ID", "001");
194     assert(line == "@RG	ID:001	SM:sample1");
195 
196     auto val = hdr.valueByPos(RecordType.RG, 0, "SM");
197     assert(val == "sample1");
198     assert(hdr[RecordType.RG, 0, "SM"] == "sample1");
199     assert(hdr[RecordType.RG, 0, "XX"] == "");
200 }