1 module dhtslib.tagvalue;
2 
3 import std.stdio;
4 import std.meta : AliasSeq, staticIndexOf;
5 import std.string : fromStringz;
6 import dhtslib.htslib.sam : bam_aux_get, bam1_t, bam_aux2i;
7 import dhtslib.htslib.hts_log;
8 import std.conv : to;
9 
10 alias Types = AliasSeq!(byte, ubyte, short, ushort, int, uint, float, string, char);
11 enum TypeIndex(T) = staticIndexOf!(T, Types);
12 /// See https://samtools.github.io/hts-specs/SAMv1.pdf sec 1.5
13 char[9] TypeChars = ['c', 'C', 's', 'S', 'i', 'I', 'f', 'Z', 'A'];
14 
15 /**
16 This represents a tag value from a bam record
17 This should be to the bam specification.
18 It stores only a pointer to the tag and from there
19 can be parsed into any of the tag types but only if
20 the tag matches that type.
21 c byte
22 C ubyte
23 s short
24 S ushort
25 i int
26 I uint
27 f float
28 Bc array of type byte
29 Z char array
30 H hex?
31 
32 Memory layout
33 pipes delimit byte boundaries in an array
34 8/9 are example values
35 2 is a count of the array
36 the ubyte * starts at the type char
37 c | 8|
38 s |  | 8|
39 i |  |  |  | 8|
40 B |i |  |  |  | 2|  |  |  | 8|  |  |  | 9|
41 
42 
43 Alias seq allows us to have an enum of types.
44 https://forum.dlang.org/post/kmdjfzpugudmwfrdgson@forum.dlang.org
45 Thanks Paul!
46 
47 Usage: auto t = TagValue(b, 'XX') where b is bam1_t* BAM record and XX is tag
48 */
49 struct TagValue
50 {
51     private ubyte* data;
52 
53     /** Constructor
54 
55     Usage: auto t = TagValue(b, 'XX') where b is bam1_t* BAM record and XX is tag
56     */
57     this(bam1_t* b, char[2] tag)
58     {
59         data = bam_aux_get(b, tag);
60         debug
61         {
62             if (data == null)
63                 hts_log_warning(__FUNCTION__, (tag ~ " doesn't exist for this record").idup);
64         }
65     }
66 
67     /// check if empty/exists/null
68     @property
69     bool exists()
70     {
71         if (this.data is null) return false;
72         return true;
73     }
74 
75     /// Convert tag value
76     string to(T : string)()
77     {
78         assert(this.data !is null);
79         return fromStringz(cast(char*)&data[1]).idup;
80     }
81     /// Convert tag value
82     T to(T)()
83     {
84         assert(this.data !is null);
85         return *cast(T*) data[1 .. T.sizeof + 1].ptr;
86     }
87     /// Convert tag value
88     T[] to(T : T[])()
89     {
90         assert(this.data !is null);
91         int n = *cast(int*) data[2 .. 6].ptr;
92         return (cast(T*)(data[6 .. T.sizeof + 6].ptr))[0 .. n];
93     }
94     /// Check if tag type is type T
95     bool check(T)()
96     {
97         assert(this.data !is null);
98         return TypeChars[TypeIndex!T] == cast(char) data[0];
99     }
100     /// Check if tag type is type T
101     bool check(T : string)()
102     {
103         assert(this.data !is null);
104         return TypeChars[TypeIndex!T] == cast(char) data[0];
105     }
106     /// Check if tag type is type T
107     bool check(T : T[])()
108     {
109         assert(this.data !is null);
110         return (cast(char) data[0] == 'B') && (TypeChars[TypeIndex!T] == cast(char) data[1]);
111     }
112     /// Convert tag value to string
113     string toString() const
114     {
115         if (data !is null && cast(char) data[0] == 'Z')
116         {
117             return fromStringz(cast(char*)&data[1]).idup;
118         }
119         return "";
120     }
121     /// Convert tag value to integer
122     long toInt()
123     {
124         assert(this.data !is null);
125         switch (cast(char) data[0])
126         {
127         case 'c':
128             return cast(long)(to!byte);
129         case 'C':
130             return cast(long)(to!ubyte);
131         case 's':
132             return cast(long)(to!short);
133         case 'S':
134             return cast(long)(to!ushort);
135         case 'i':
136             return cast(long)(to!int);
137         case 'I':
138             return cast(long)(to!uint);
139         default:
140             return long.min;
141         }
142     }
143     /// Convert tag value to integer array
144     long[] toIntArray()
145     {
146         assert(this.data !is null);
147         switch (cast(char) data[1])
148         {
149         case 'c':
150             return (to!(byte[]).to!(long[]));
151         case 'C':
152             return (to!(ubyte[]).to!(long[]));
153         case 's':
154             return (to!(short[]).to!(long[]));
155         case 'S':
156             return (to!(ushort[]).to!(long[]));
157         case 'i':
158             return (to!(int[]).to!(long[]));
159         case 'I':
160             return (to!(uint[]).to!(long[]));
161         default:
162             return [];
163         }
164     }
165     /// Convert tag value to float array
166     float[] toFloatArray()
167     {
168         assert(this.data !is null);
169         return to!(float[]);
170     }
171 }
172 
173 debug (dhtslib_unittest) unittest
174 {
175     TagValue v;
176     ubyte[12] testdata;
177     testdata[0] = cast(ubyte) 'B';
178     testdata[1] = cast(ubyte) 'C';
179     *cast(int*) testdata[2 .. 6].ptr = 3;
180     testdata[6] = 1;
181     testdata[8] = 2;
182     testdata[10] = 3;
183     v.data = testdata.ptr;
184     writeln("testing array");
185     assert(v.to!(ushort[]) == [1, 2, 3]);
186     ubyte[5] testdata2;
187     testdata2[0] = cast(ubyte) 'i';
188     *cast(int*) testdata2[1 .. 5].ptr = 3;
189     v.data = testdata2.ptr;
190     writeln("testing int");
191     assert(v.to!int == 3);
192 }
193 
194 debug (dhtslib_unittest) unittest
195 {
196     import dhtslib.sam; // @suppress(dscanner.suspicious.local_imports)
197     import dhtslib.htslib.hts_log : hts_log_info;
198     import std.path : buildPath, dirName;
199 
200     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
201     hts_log_info(__FUNCTION__, "Testing tagvalue");
202     hts_log_info(__FUNCTION__, "Loading test file");
203     auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))), "htslib",
204             "test", "auxf#values.sam"), 0);
205     hts_log_info(__FUNCTION__, "Getting read 1");
206     auto readrange = bam.all_records(); // @suppress(dscanner.suspicious.unmodified)
207     auto read = readrange.front;
208     hts_log_info(__FUNCTION__, "Testing string");
209     assert(read["RG"].to!string == "ID");
210     hts_log_info(__FUNCTION__, "Testing char");
211     assert(read["A!"].to!char == '!');
212     assert(read["Ac"].to!char == 'c');
213     assert(read["AC"].to!char == 'C');
214     hts_log_info(__FUNCTION__, "Testing int");
215     assert(read["I0"].to!ubyte == 0);
216     assert(read["I1"].to!ubyte == 1);
217     assert(read["I2"].to!ubyte == 127);
218     assert(read["I3"].to!ubyte == 128);
219     assert(read["I4"].to!ubyte == 255);
220     assert(read["I5"].to!ushort == 256);
221     assert(read["I6"].to!ushort == 32_767);
222     assert(read["I7"].to!ushort == 32_768);
223     assert(read["I8"].to!ushort == 65_535);
224     assert(read["I9"].to!uint == 65_536);
225     assert(read["IA"].to!uint == 2_147_483_647);
226     assert(read["i1"].to!byte == -1);
227     assert(read["i2"].to!byte == -127);
228     assert(read["i3"].to!byte == -128);
229     assert(read["i4"].to!short == -255);
230     assert(read["i5"].to!short == -256);
231     assert(read["i6"].to!short == -32_767);
232     assert(read["i7"].to!short == -32_768);
233     assert(read["i8"].to!int == -65_535);
234     assert(read["i9"].to!int == -65_536);
235     assert(read["iA"].to!int == -2_147_483_647);
236     assert(read["iB"].to!int == -2_147_483_648);
237     assert(read["I0"].toInt == 0);
238     assert(read["I1"].toInt == 1);
239     assert(read["I2"].toInt == 127);
240     assert(read["I3"].toInt == 128);
241     assert(read["I4"].toInt == 255);
242     assert(read["I5"].toInt == 256);
243     assert(read["I6"].toInt == 32_767);
244     assert(read["I7"].toInt == 32_768);
245     assert(read["I8"].toInt == 65_535);
246     assert(read["I9"].toInt == 65_536);
247     assert(read["IA"].toInt == 2_147_483_647);
248     assert(read["i1"].toInt == -1);
249     assert(read["i2"].toInt == -127);
250     assert(read["i3"].toInt == -128);
251     assert(read["i4"].toInt == -255);
252     assert(read["i5"].toInt == -256);
253     assert(read["i6"].toInt == -32_767);
254     assert(read["i7"].toInt == -32_768);
255     assert(read["i8"].toInt == -65_535);
256     assert(read["i9"].toInt == -65_536);
257     assert(read["iA"].toInt == -2_147_483_647);
258     assert(read["iB"].toInt == -2_147_483_648);
259     hts_log_info(__FUNCTION__, "Testing float");
260     assert(read["F0"].to!float == -1.0);
261     assert(read["F1"].to!float == 0.0);
262     assert(read["F2"].to!float == 1.0);
263     hts_log_info(__FUNCTION__, "Running tag checking");
264     assert(read["I0"].check!ubyte == true);
265     assert(read["I5"].check!ushort == true);
266     assert(read["I9"].check!uint == true);
267     assert(read["i1"].check!byte == true);
268     assert(read["i4"].check!short == true);
269     assert(read["i8"].check!int == true);
270     assert(read["F0"].check!float == true);
271     readrange.popFront;
272     read = readrange.front;
273     hts_log_info(__FUNCTION__, "Testing arrays");
274     assert(read["Bs"].to!(short[]) == [-32_768, -32_767, 0, 32_767]);
275     assert(read["Bi"].to!(int[]) == [
276             -2_147_483_648, -2_147_483_647, 0, 2_147_483_647
277             ]);
278     assert(read["BS"].to!(ushort[]) == [0, 32_767, 32_768, 65_535]);
279     assert(read["BI"].to!(uint[]) == [
280             0, 2_147_483_647, 2_147_483_648, 4_294_967_295
281             ]);
282     writeln(read["Bs"].toIntArray);
283     assert(read["Bs"].toIntArray == [-32_768, -32_767, 0, 32_767]);
284     assert(read["Bi"].toIntArray == [
285             -2_147_483_648, -2_147_483_647, 0, 2_147_483_647
286             ]);
287     assert(read["BS"].toIntArray == [0, 32_767, 32_768, 65_535]);
288     assert(read["BI"].toIntArray == [
289             0, 2_147_483_647, 2_147_483_648, 4_294_967_295
290             ]);
291     hts_log_info(__FUNCTION__, "Running tag checking");
292     assert(read["Bs"].check!(short[]) == true);
293     assert(read["Bi"].check!(int[]) == true);
294     assert(read["BS"].check!(ushort[]) == true);
295     assert(read["BI"].check!(uint[]) == true);
296 }