1 /** 2 Module provides a parser for SAM/BAM record auxillary tags. 3 4 Reference: https://samtools.github.io/hts-specs/SAMtags.pdf 5 */ 6 module dhtslib.sam.tagvalue; 7 8 import std.stdio; 9 import std.meta : AliasSeq, staticIndexOf; 10 import std.string : fromStringz; 11 import htslib.sam : bam_aux_get, bam1_t, bam_aux2i; 12 import htslib.hts_log; 13 import std.conv : to; 14 import std.exception : enforce, assertThrown; 15 import std.math : approxEqual; 16 import dhtslib.memory; 17 18 alias Types = AliasSeq!(byte, ubyte, short, ushort, int, uint, float, string, char); 19 enum TypeIndex(T) = staticIndexOf!(T, Types); 20 /// See https://samtools.github.io/hts-specs/SAMv1.pdf sec 1.5 21 char[9] TypeChars = ['c', 'C', 's', 'S', 'i', 'I', 'f', 'Z', 'A']; 22 23 /** 24 25 This represents a SAM/BAM record tag value, as outlined in the SAM specs §1.5. 26 27 The struct itself stores only a pointer to the tag, and has member functions 28 to parse into any of the tag types (but only if the tag matches that type) (TODO: is this true?) 29 30 Primary Types: 31 A Printable character 32 i Signed integer (see specs §1.5 footnote on size) 33 f Single-precision float 34 Z Printable string, including space 35 H Byte array in the Hex format (network byte order / big-endian) //unknown if still supported 36 B Integer or numeric array 37 38 Byte-array (B) types: 39 c byte 40 C ubyte 41 s short 42 S ushort 43 i int32 44 I uint32 45 f float (spec does not indicate precision) 46 47 Memory layout 48 pipes delimit byte boundaries in an array 49 8/9 are example values 50 2 is a count of the array 51 the ubyte * starts at the type char 52 c | 8| 53 s | | 8| 54 i | | | | 8| 55 B |i | | | | 2| | | | 8| | | | 9| 56 57 58 Alias seq allows us to have an enum of types. 59 https://forum.dlang.org/post/kmdjfzpugudmwfrdgson@forum.dlang.org 60 Thanks Paul! 61 62 Usage: auto t = TagValue(b, 'XX') where b is bam1_t* BAM record and XX is tag 63 */ 64 struct TagValue 65 { 66 67 private ubyte* data; 68 69 private Bam1 b; 70 71 /** Constructor 72 73 Usage: auto t = TagValue(b, 'XX') where b is bam1_t* BAM record and XX is tag 74 */ 75 this(Bam1 b, char[2] tag) 76 { 77 this.b = b; 78 data = bam_aux_get(b, tag); 79 } 80 81 /// check if empty/exists/null 82 @property 83 bool exists() 84 { 85 return this.data is null ? false : true; 86 } 87 88 /* Tag type checking */ 89 90 /// Check if tag type is type T 91 bool check(T)() 92 { 93 enforce(this.exists,"Tag doesn't exist"); 94 return TypeChars[TypeIndex!T] == cast(char) data[0]; 95 } 96 /// Check if tag type is type T 97 bool check(T : string)() 98 { 99 enforce(this.exists,"Tag doesn't exist"); 100 return TypeChars[TypeIndex!T] == cast(char) data[0]; 101 } 102 /// Check if tag type is type T 103 bool check(T : T[])() 104 { 105 enforce(this.exists,"Tag doesn't exist"); 106 return (cast(char) data[0] == 'B') && (TypeChars[TypeIndex!T] == cast(char) data[1]); 107 } 108 109 /// Check if tag type is type T 110 bool checkArray() 111 { 112 enforce(this.exists,"Tag doesn't exist"); 113 return cast(char) data[0] == 'B'; 114 } 115 116 /// Check if tag type is type T 117 bool checkHexByteArray() 118 { 119 enforce(this.exists,"Tag doesn't exist"); 120 return cast(char) data[0] == 'H'; 121 } 122 123 /* Tag conversion */ 124 125 /// Convert tag value to D string 126 string to(T : string)() 127 { 128 enforce(this.check!string || this.checkHexByteArray,"Tag is not type Z or H"); 129 return fromStringz(cast(char*)&data[1]).idup; 130 } 131 /// Convert tag value to D type 132 T to(T)() 133 { 134 enforce(this.check!T,"Tag is not type " ~ T.stringof); 135 return *cast(T*) data[1 .. T.sizeof + 1].ptr; 136 } 137 /// Convert array tag value D array 138 T[] to(T : T[])() 139 { 140 enforce(this.check!(T[]),"Tag is not type " ~ T.stringof); 141 int n = *cast(int*) data[2 .. 6].ptr; 142 return (cast(T*)(data[6 .. T.sizeof + 6].ptr))[0 .. n]; 143 } 144 145 146 /// Convert any tag value to string 147 string toString() 148 { 149 enforce(this.exists,"Tag doesn't exist"); 150 switch (cast(char) data[0]) 151 { 152 case 'c': 153 return to!byte.to!string; 154 case 'C': 155 return to!ubyte.to!string; 156 case 's': 157 return to!short.to!string; 158 case 'S': 159 return to!ushort.to!string; 160 case 'i': 161 return to!int.to!string; 162 case 'I': 163 return to!uint.to!string; 164 case 'f': 165 return to!float.to!string; 166 case 'Z': 167 case 'H': 168 return to!string; 169 case 'B': 170 switch (cast(char) data[1]) 171 { 172 case 'c': 173 return to!(byte[]).to!string; 174 case 'C': 175 return to!(ubyte[]).to!string; 176 case 's': 177 return to!(short[]).to!string; 178 case 'S': 179 return to!(ushort[]).to!string; 180 case 'i': 181 return to!(int[]).to!string; 182 case 'I': 183 return to!(uint[]).to!string; 184 case 'f': 185 return to!(float[]).to!string; 186 default: 187 throw new Exception("Array Tag malformed"); 188 } 189 default: 190 throw new Exception("Tag malformed"); 191 } 192 } 193 /// Convert tag value to integer 194 long toInt() 195 { 196 enforce(this.exists,"Tag doesn't exist"); 197 switch (cast(char) data[0]) 198 { 199 case 'c': 200 return cast(long)(to!byte); 201 case 'C': 202 return cast(long)(to!ubyte); 203 case 's': 204 return cast(long)(to!short); 205 case 'S': 206 return cast(long)(to!ushort); 207 case 'i': 208 return cast(long)(to!int); 209 case 'I': 210 return cast(long)(to!uint); 211 default: 212 throw new Exception("Tag is not numeric or is malformed"); 213 } 214 } 215 /// Convert tag value to integer array 216 long[] toIntArray() 217 { 218 enforce(this.exists,"Tag doesn't exist"); 219 enforce(this.checkArray,"Tag is not a numeric array"); 220 switch (cast(char) data[1]) 221 { 222 case 'c': 223 return (to!(byte[]).to!(long[])); 224 case 'C': 225 return (to!(ubyte[]).to!(long[])); 226 case 's': 227 return (to!(short[]).to!(long[])); 228 case 'S': 229 return (to!(ushort[]).to!(long[])); 230 case 'i': 231 return (to!(int[]).to!(long[])); 232 case 'I': 233 return (to!(uint[]).to!(long[])); 234 default: 235 throw new Exception("Tag is malformed"); 236 } 237 } 238 /// Convert tag value to float array 239 float[] toFloatArray() 240 { 241 enforce(this.exists,"Tag doesn't exist"); 242 enforce(this.checkArray,"Tag is not an array"); 243 enforce(this.check!(float[]),"Tag is not a float array"); 244 return to!(float[]); 245 } 246 } 247 248 debug (dhtslib_unittest) unittest 249 { 250 TagValue v; 251 assert(!v.exists); 252 ubyte[12] testdata; 253 assertThrown(v.toIntArray); 254 assertThrown(v.toInt); 255 assertThrown(v.toString); 256 testdata[0] = cast(ubyte) 'B'; 257 testdata[1] = cast(ubyte) 'S'; 258 *cast(int*) testdata[2 .. 6].ptr = 3; 259 testdata[6] = 1; 260 testdata[8] = 2; 261 testdata[10] = 3; 262 v.data = testdata.ptr; 263 writeln("testing array"); 264 assert(v.to!(ushort[]) == [1, 2, 3]); 265 ubyte[5] testdata2; 266 testdata2[0] = cast(ubyte) 'i'; 267 *cast(int*) testdata2[1 .. 5].ptr = 3; 268 v.data = testdata2.ptr; 269 writeln("testing int"); 270 assert(v.to!int == 3); 271 } 272 273 debug (dhtslib_unittest) unittest 274 { 275 import dhtslib.sam; // @suppress(dscanner.suspicious.local_imports) 276 import htslib.hts_log : hts_log_info; 277 import std.path : buildPath, dirName; 278 279 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 280 hts_log_info(__FUNCTION__, "Testing tagvalue"); 281 hts_log_info(__FUNCTION__, "Loading test file"); 282 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))), "htslib", 283 "test", "auxf#values.sam"), 0); 284 285 hts_log_info(__FUNCTION__, "Getting read 1"); 286 auto readrange = bam.allRecords(); // @suppress(dscanner.suspicious.unmodified) 287 assert(readrange.empty == false); 288 auto read = readrange.front; 289 290 hts_log_info(__FUNCTION__, "Testing string"); 291 assert(read["RG"].to!string == "ID"); 292 293 hts_log_info(__FUNCTION__, "Testing char"); 294 assert(read["A!"].to!char == '!'); 295 assert(read["Ac"].to!char == 'c'); 296 assert(read["AC"].to!char == 'C'); 297 298 hts_log_info(__FUNCTION__, "Testing integral checks"); 299 assert(read["I0"].check!ubyte); 300 assert(read["I1"].check!ubyte); 301 assert(read["I2"].check!ubyte); 302 assert(read["I3"].check!ubyte); 303 assert(read["I4"].check!ubyte); 304 assert(read["I5"].check!ushort); 305 assert(read["I6"].check!ushort); 306 assert(read["I7"].check!ushort); 307 assert(read["I8"].check!ushort); 308 assert(read["I9"].check!uint); 309 assert(read["IA"].check!uint); 310 assert(read["i1"].check!byte); 311 assert(read["i2"].check!byte); 312 assert(read["i3"].check!byte); 313 assert(read["i4"].check!short); 314 assert(read["i5"].check!short); 315 assert(read["i6"].check!short); 316 assert(read["i7"].check!short); 317 assert(read["i8"].check!int); 318 assert(read["i9"].check!int); 319 assert(read["iA"].check!int); 320 assert(read["iB"].check!int); 321 322 hts_log_info(__FUNCTION__, "Testing integral conversion"); 323 assert(read["I0"].to!ubyte == 0); 324 assert(read["I1"].to!ubyte == 1); 325 assert(read["I2"].to!ubyte == 127); 326 assert(read["I3"].to!ubyte == 128); 327 assert(read["I4"].to!ubyte == 255); 328 assert(read["I5"].to!ushort == 256); 329 assert(read["I6"].to!ushort == 32_767); 330 assert(read["I7"].to!ushort == 32_768); 331 assert(read["I8"].to!ushort == 65_535); 332 assert(read["I9"].to!uint == 65_536); 333 assert(read["IA"].to!uint == 2_147_483_647); 334 assert(read["i1"].to!byte == -1); 335 assert(read["i2"].to!byte == -127); 336 assert(read["i3"].to!byte == -128); 337 assert(read["i4"].to!short == -255); 338 assert(read["i5"].to!short == -256); 339 assert(read["i6"].to!short == -32_767); 340 assert(read["i7"].to!short == -32_768); 341 assert(read["i8"].to!int == -65_535); 342 assert(read["i9"].to!int == -65_536); 343 assert(read["iA"].to!int == -2_147_483_647); 344 assert(read["iB"].to!int == -2_147_483_648); 345 346 hts_log_info(__FUNCTION__, "Testing integral toString"); 347 assert(read["I0"].toString == "0"); 348 assert(read["I1"].toString == "1"); 349 assert(read["I2"].toString == "127"); 350 assert(read["I3"].toString == "128"); 351 assert(read["I4"].toString == "255"); 352 assert(read["I5"].toString == "256"); 353 assert(read["I6"].toString == "32767"); 354 assert(read["I7"].toString == "32768"); 355 assert(read["I8"].toString == "65535"); 356 assert(read["I9"].toString == "65536"); 357 assert(read["IA"].toString == "2147483647"); 358 assert(read["i1"].toString == "-1"); 359 assert(read["i2"].toString == "-127"); 360 assert(read["i3"].toString == "-128"); 361 assert(read["i4"].toString == "-255"); 362 assert(read["i5"].toString == "-256"); 363 assert(read["i6"].toString == "-32767"); 364 assert(read["i7"].toString == "-32768"); 365 assert(read["i8"].toString == "-65535"); 366 assert(read["i9"].toString == "-65536"); 367 assert(read["iA"].toString == "-2147483647"); 368 assert(read["iB"].toString == "-2147483648"); 369 370 hts_log_info(__FUNCTION__, "Testing integral toInt"); 371 assert(read["I0"].toInt == 0); 372 assert(read["I1"].toInt == 1); 373 assert(read["I2"].toInt == 127); 374 assert(read["I3"].toInt == 128); 375 assert(read["I4"].toInt == 255); 376 assert(read["I5"].toInt == 256); 377 assert(read["I6"].toInt == 32_767); 378 assert(read["I7"].toInt == 32_768); 379 assert(read["I8"].toInt == 65_535); 380 assert(read["I9"].toInt == 65_536); 381 assert(read["IA"].toInt == 2_147_483_647); 382 assert(read["i1"].toInt == -1); 383 assert(read["i2"].toInt == -127); 384 assert(read["i3"].toInt == -128); 385 assert(read["i4"].toInt == -255); 386 assert(read["i5"].toInt == -256); 387 assert(read["i6"].toInt == -32_767); 388 assert(read["i7"].toInt == -32_768); 389 assert(read["i8"].toInt == -65_535); 390 assert(read["i9"].toInt == -65_536); 391 assert(read["iA"].toInt == -2_147_483_647); 392 assert(read["iB"].toInt == -2_147_483_648); 393 394 hts_log_info(__FUNCTION__, "Testing float checks"); 395 396 assert(read["F0"].check!float); 397 assert(read["F1"].check!float); 398 assert(read["F2"].check!float); 399 400 hts_log_info(__FUNCTION__, "Testing float conversion"); 401 assert(read["F0"].to!float == -1.0); 402 assert(read["F1"].to!float == 0.0); 403 assert(read["F2"].to!float == 1.0); 404 405 hts_log_info(__FUNCTION__, "Testing float toString"); 406 407 assert(approxEqual(read["F0"].toString.to!float, -1.0)); 408 assert(approxEqual(read["F1"].toString.to!float, 0.0)); 409 assert(approxEqual(read["F2"].toString.to!float, 1.0)); 410 411 hts_log_info(__FUNCTION__, "Running tag checking"); 412 assert(read["I0"].check!ubyte == true); 413 assert(read["I5"].check!ushort == true); 414 assert(read["I9"].check!uint == true); 415 assert(read["i1"].check!byte == true); 416 assert(read["i4"].check!short == true); 417 assert(read["i8"].check!int == true); 418 assert(read["F0"].check!float == true); 419 readrange.popFront; 420 read = readrange.front; 421 hts_log_info(__FUNCTION__, "Testing arrays"); 422 assert(read["Bs"].to!(short[]) == [-32_768, -32_767, 0, 32_767]); 423 assert(read["Bi"].to!(int[]) == [ 424 -2_147_483_648, -2_147_483_647, 0, 2_147_483_647 425 ]); 426 assert(read["BS"].to!(ushort[]) == [0, 32_767, 32_768, 65_535]); 427 assert(read["BI"].to!(uint[]) == [ 428 0, 2_147_483_647, 2_147_483_648, 4_294_967_295 429 ]); 430 431 hts_log_info(__FUNCTION__, "Testing array toString"); 432 assert(read["Bs"].toString == "[-32768, -32767, 0, 32767]"); 433 assert(read["Bi"].toString == "[-2147483648, -2147483647, 0, 2147483647]"); 434 assert(read["BS"].toString == "[0, 32767, 32768, 65535]"); 435 assert(read["BI"].toString == "[0, 2147483647, 2147483648, 4294967295]"); 436 437 writeln(read["Bs"].toIntArray); 438 assert(read["Bs"].toIntArray == [-32_768, -32_767, 0, 32_767]); 439 assert(read["Bi"].toIntArray == [ 440 -2_147_483_648, -2_147_483_647, 0, 2_147_483_647 441 ]); 442 assert(read["BS"].toIntArray == [0, 32_767, 32_768, 65_535]); 443 assert(read["BI"].toIntArray == [ 444 0, 2_147_483_647, 2_147_483_648, 4_294_967_295 445 ]); 446 hts_log_info(__FUNCTION__, "Running tag checking"); 447 assert(read["Bs"].check!(short[]) == true); 448 assert(read["Bi"].check!(int[]) == true); 449 assert(read["BS"].check!(ushort[]) == true); 450 assert(read["BI"].check!(uint[]) == true); 451 452 hts_log_info(__FUNCTION__, "Testing float Array"); 453 float[] arr = [10.0,11.0,12.1]; 454 read["fA"] = arr; 455 assert(read["fA"].to!(float[]) == arr); 456 assert(read["fA"].toFloatArray == arr); 457 assert(read["fA"].toString == "[10, 11, 12.1]"); 458 459 hts_log_info(__FUNCTION__, "Testing byte Array"); 460 byte[] arr2 = [10, -10]; 461 read["cA"] = arr2; 462 assert(read["cA"].to!(byte[]) == arr2); 463 assert(read["cA"].toIntArray == arr2.to!(long[])); 464 assert(read["cA"].toString == "[10, -10]"); 465 466 hts_log_info(__FUNCTION__, "Testing ubyte Array"); 467 ubyte[] arr3 = [10, 11]; 468 read["CA"] = arr3; 469 assert(read["CA"].to!(ubyte[]) == arr3); 470 assert(read["CA"].toIntArray == arr3.to!(long[])); 471 assert(read["CA"].toString == "[10, 11]"); 472 473 }