1 module dhtslib.tagvalue; 2 3 import std.stdio; 4 import std.meta : AliasSeq, staticIndexOf; 5 import std.string : fromStringz; 6 import dhtslib.htslib.sam : bam_aux_get, bam1_t, bam_aux2i; 7 import dhtslib.htslib.hts_log; 8 import std.conv : to; 9 10 alias Types = AliasSeq!(byte, ubyte, short, ushort, int, uint, float, string, char); 11 enum TypeIndex(T) = staticIndexOf!(T, Types); 12 /// See https://samtools.github.io/hts-specs/SAMv1.pdf sec 1.5 13 char[9] TypeChars = ['c', 'C', 's', 'S', 'i', 'I', 'f', 'Z', 'A']; 14 15 /** 16 This represents a tag value from a bam record 17 This should be to the bam specification. 18 It stores only a pointer to the tag and from there 19 can be parsed into any of the tag types but only if 20 the tag matches that type. 21 c byte 22 C ubyte 23 s short 24 S ushort 25 i int 26 I uint 27 f float 28 Bc array of type byte 29 Z char array 30 H hex? 31 32 Memory layout 33 pipes delimit byte boundaries in an array 34 8/9 are example values 35 2 is a count of the array 36 the ubyte * starts at the type char 37 c | 8| 38 s | | 8| 39 i | | | | 8| 40 B |i | | | | 2| | | | 8| | | | 9| 41 42 43 Alias seq allows us to have an enum of types. 44 https://forum.dlang.org/post/kmdjfzpugudmwfrdgson@forum.dlang.org 45 Thanks Paul! 46 47 Usage: auto t = TagValue(b, 'XX') where b is bam1_t* BAM record and XX is tag 48 */ 49 struct TagValue 50 { 51 private ubyte* data; 52 53 /** Constructor 54 55 Usage: auto t = TagValue(b, 'XX') where b is bam1_t* BAM record and XX is tag 56 */ 57 this(bam1_t* b, char[2] tag) 58 { 59 data = bam_aux_get(b, tag); 60 debug 61 { 62 if (data == null) 63 hts_log_warning(__FUNCTION__, (tag ~ " doesn't exist for this record").idup); 64 } 65 } 66 67 /// check if empty/exists/null 68 @property 69 bool exists() 70 { 71 if (this.data is null) return false; 72 return true; 73 } 74 75 /// Convert tag value 76 string to(T : string)() 77 { 78 assert(this.data !is null); 79 return fromStringz(cast(char*)&data[1]).idup; 80 } 81 /// Convert tag value 82 T to(T)() 83 { 84 assert(this.data !is null); 85 return *cast(T*) data[1 .. T.sizeof + 1].ptr; 86 } 87 /// Convert tag value 88 T[] to(T : T[])() 89 { 90 assert(this.data !is null); 91 int n = *cast(int*) data[2 .. 6].ptr; 92 return (cast(T*)(data[6 .. T.sizeof + 6].ptr))[0 .. n]; 93 } 94 /// Check if tag type is type T 95 bool check(T)() 96 { 97 assert(this.data !is null); 98 return TypeChars[TypeIndex!T] == cast(char) data[0]; 99 } 100 /// Check if tag type is type T 101 bool check(T : string)() 102 { 103 assert(this.data !is null); 104 return TypeChars[TypeIndex!T] == cast(char) data[0]; 105 } 106 /// Check if tag type is type T 107 bool check(T : T[])() 108 { 109 assert(this.data !is null); 110 return (cast(char) data[0] == 'B') && (TypeChars[TypeIndex!T] == cast(char) data[1]); 111 } 112 /// Convert tag value to string 113 string toString() const 114 { 115 if (data !is null && cast(char) data[0] == 'Z') 116 { 117 return fromStringz(cast(char*)&data[1]).idup; 118 } 119 return ""; 120 } 121 /// Convert tag value to integer 122 long toInt() 123 { 124 assert(this.data !is null); 125 switch (cast(char) data[0]) 126 { 127 case 'c': 128 return cast(long)(to!byte); 129 case 'C': 130 return cast(long)(to!ubyte); 131 case 's': 132 return cast(long)(to!short); 133 case 'S': 134 return cast(long)(to!ushort); 135 case 'i': 136 return cast(long)(to!int); 137 case 'I': 138 return cast(long)(to!uint); 139 default: 140 return long.min; 141 } 142 } 143 /// Convert tag value to integer array 144 long[] toIntArray() 145 { 146 assert(this.data !is null); 147 switch (cast(char) data[1]) 148 { 149 case 'c': 150 return (to!(byte[]).to!(long[])); 151 case 'C': 152 return (to!(ubyte[]).to!(long[])); 153 case 's': 154 return (to!(short[]).to!(long[])); 155 case 'S': 156 return (to!(ushort[]).to!(long[])); 157 case 'i': 158 return (to!(int[]).to!(long[])); 159 case 'I': 160 return (to!(uint[]).to!(long[])); 161 default: 162 return []; 163 } 164 } 165 /// Convert tag value to float array 166 float[] toFloatArray() 167 { 168 assert(this.data !is null); 169 return to!(float[]); 170 } 171 } 172 173 debug (dhtslib_unittest) unittest 174 { 175 TagValue v; 176 ubyte[12] testdata; 177 testdata[0] = cast(ubyte) 'B'; 178 testdata[1] = cast(ubyte) 'C'; 179 *cast(int*) testdata[2 .. 6].ptr = 3; 180 testdata[6] = 1; 181 testdata[8] = 2; 182 testdata[10] = 3; 183 v.data = testdata.ptr; 184 writeln("testing array"); 185 assert(v.to!(ushort[]) == [1, 2, 3]); 186 ubyte[5] testdata2; 187 testdata2[0] = cast(ubyte) 'i'; 188 *cast(int*) testdata2[1 .. 5].ptr = 3; 189 v.data = testdata2.ptr; 190 writeln("testing int"); 191 assert(v.to!int == 3); 192 } 193 194 debug (dhtslib_unittest) unittest 195 { 196 import dhtslib.sam; // @suppress(dscanner.suspicious.local_imports) 197 import dhtslib.htslib.hts_log : hts_log_info; 198 import std.path : buildPath, dirName; 199 200 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 201 hts_log_info(__FUNCTION__, "Testing tagvalue"); 202 hts_log_info(__FUNCTION__, "Loading test file"); 203 auto bam = SAMFile(buildPath(dirName(dirName(dirName(__FILE__))), "htslib", 204 "test", "auxf#values.sam"), 0); 205 hts_log_info(__FUNCTION__, "Getting read 1"); 206 auto readrange = bam.all_records(); // @suppress(dscanner.suspicious.unmodified) 207 auto read = readrange.front; 208 hts_log_info(__FUNCTION__, "Testing string"); 209 assert(read["RG"].to!string == "ID"); 210 hts_log_info(__FUNCTION__, "Testing char"); 211 assert(read["A!"].to!char == '!'); 212 assert(read["Ac"].to!char == 'c'); 213 assert(read["AC"].to!char == 'C'); 214 hts_log_info(__FUNCTION__, "Testing int"); 215 assert(read["I0"].to!ubyte == 0); 216 assert(read["I1"].to!ubyte == 1); 217 assert(read["I2"].to!ubyte == 127); 218 assert(read["I3"].to!ubyte == 128); 219 assert(read["I4"].to!ubyte == 255); 220 assert(read["I5"].to!ushort == 256); 221 assert(read["I6"].to!ushort == 32_767); 222 assert(read["I7"].to!ushort == 32_768); 223 assert(read["I8"].to!ushort == 65_535); 224 assert(read["I9"].to!uint == 65_536); 225 assert(read["IA"].to!uint == 2_147_483_647); 226 assert(read["i1"].to!byte == -1); 227 assert(read["i2"].to!byte == -127); 228 assert(read["i3"].to!byte == -128); 229 assert(read["i4"].to!short == -255); 230 assert(read["i5"].to!short == -256); 231 assert(read["i6"].to!short == -32_767); 232 assert(read["i7"].to!short == -32_768); 233 assert(read["i8"].to!int == -65_535); 234 assert(read["i9"].to!int == -65_536); 235 assert(read["iA"].to!int == -2_147_483_647); 236 assert(read["iB"].to!int == -2_147_483_648); 237 assert(read["I0"].toInt == 0); 238 assert(read["I1"].toInt == 1); 239 assert(read["I2"].toInt == 127); 240 assert(read["I3"].toInt == 128); 241 assert(read["I4"].toInt == 255); 242 assert(read["I5"].toInt == 256); 243 assert(read["I6"].toInt == 32_767); 244 assert(read["I7"].toInt == 32_768); 245 assert(read["I8"].toInt == 65_535); 246 assert(read["I9"].toInt == 65_536); 247 assert(read["IA"].toInt == 2_147_483_647); 248 assert(read["i1"].toInt == -1); 249 assert(read["i2"].toInt == -127); 250 assert(read["i3"].toInt == -128); 251 assert(read["i4"].toInt == -255); 252 assert(read["i5"].toInt == -256); 253 assert(read["i6"].toInt == -32_767); 254 assert(read["i7"].toInt == -32_768); 255 assert(read["i8"].toInt == -65_535); 256 assert(read["i9"].toInt == -65_536); 257 assert(read["iA"].toInt == -2_147_483_647); 258 assert(read["iB"].toInt == -2_147_483_648); 259 hts_log_info(__FUNCTION__, "Testing float"); 260 assert(read["F0"].to!float == -1.0); 261 assert(read["F1"].to!float == 0.0); 262 assert(read["F2"].to!float == 1.0); 263 hts_log_info(__FUNCTION__, "Running tag checking"); 264 assert(read["I0"].check!ubyte == true); 265 assert(read["I5"].check!ushort == true); 266 assert(read["I9"].check!uint == true); 267 assert(read["i1"].check!byte == true); 268 assert(read["i4"].check!short == true); 269 assert(read["i8"].check!int == true); 270 assert(read["F0"].check!float == true); 271 readrange.popFront; 272 read = readrange.front; 273 hts_log_info(__FUNCTION__, "Testing arrays"); 274 assert(read["Bs"].to!(short[]) == [-32_768, -32_767, 0, 32_767]); 275 assert(read["Bi"].to!(int[]) == [ 276 -2_147_483_648, -2_147_483_647, 0, 2_147_483_647 277 ]); 278 assert(read["BS"].to!(ushort[]) == [0, 32_767, 32_768, 65_535]); 279 assert(read["BI"].to!(uint[]) == [ 280 0, 2_147_483_647, 2_147_483_648, 4_294_967_295 281 ]); 282 writeln(read["Bs"].toIntArray); 283 assert(read["Bs"].toIntArray == [-32_768, -32_767, 0, 32_767]); 284 assert(read["Bi"].toIntArray == [ 285 -2_147_483_648, -2_147_483_647, 0, 2_147_483_647 286 ]); 287 assert(read["BS"].toIntArray == [0, 32_767, 32_768, 65_535]); 288 assert(read["BI"].toIntArray == [ 289 0, 2_147_483_647, 2_147_483_648, 4_294_967_295 290 ]); 291 hts_log_info(__FUNCTION__, "Running tag checking"); 292 assert(read["Bs"].check!(short[]) == true); 293 assert(read["Bi"].check!(int[]) == true); 294 assert(read["BS"].check!(ushort[]) == true); 295 assert(read["BI"].check!(uint[]) == true); 296 }