1 /** 2 Module provides a parser for SAM/BAM record auxillary tags. 3 4 Reference: https://samtools.github.io/hts-specs/SAMtags.pdf 5 */ 6 module dhtslib.sam.tagvalue; 7 8 import std.stdio; 9 import std.meta : AliasSeq, staticIndexOf; 10 import std.string : fromStringz; 11 import htslib.sam : bam_aux_get, bam1_t, bam_aux2i; 12 import htslib.hts_log; 13 import std.conv : to; 14 import std.exception : enforce, assertThrown; 15 import std.math : approxEqual; 16 import dhtslib.memory; 17 18 alias Types = AliasSeq!(byte, ubyte, short, ushort, int, uint, float, string, char); 19 enum TypeIndex(T) = staticIndexOf!(T, Types); 20 /// See https://samtools.github.io/hts-specs/SAMv1.pdf sec 1.5 21 char[9] TypeChars = ['c', 'C', 's', 'S', 'i', 'I', 'f', 'Z', 'A']; 22 23 /** 24 25 This represents a SAM/BAM record tag value, as outlined in the SAM specs §1.5. 26 27 The struct itself stores only a pointer to the tag, and has member functions 28 to parse into any of the tag types (but only if the tag matches that type) (TODO: is this true?) 29 30 Primary Types: 31 A Printable character 32 i Signed integer (see specs §1.5 footnote on size) 33 f Single-precision float 34 Z Printable string, including space 35 H Byte array in the Hex format (network byte order / big-endian) //unknown if still supported 36 B Integer or numeric array 37 38 Byte-array (B) types: 39 c byte 40 C ubyte 41 s short 42 S ushort 43 i int32 44 I uint32 45 f float (spec does not indicate precision) 46 47 Memory layout 48 pipes delimit byte boundaries in an array 49 8/9 are example values 50 2 is a count of the array 51 the ubyte * starts at the type char 52 c | 8| 53 s | | 8| 54 i | | | | 8| 55 B |i | | | | 2| | | | 8| | | | 9| 56 57 58 Alias seq allows us to have an enum of types. 59 https://forum.dlang.org/post/kmdjfzpugudmwfrdgson@forum.dlang.org 60 Thanks Paul! 61 62 Usage: auto t = TagValue(b, 'XX') where b is bam1_t* BAM record and XX is tag 63 */ 64 struct TagValue 65 { 66 67 private ubyte* data; 68 69 private Bam1 b; 70 71 /** Constructor 72 73 Usage: auto t = TagValue(b, 'XX') where b is bam1_t* BAM record and XX is tag 74 */ 75 this(Bam1 b, char[2] tag) 76 { 77 this.b = b; 78 data = bam_aux_get(b, tag); 79 } 80 81 /// Explicit postblit to avoid 82 /// https://github.com/blachlylab/dhtslib/issues/122 83 this(this) 84 { 85 this.data = data; 86 this.b = b; 87 } 88 89 /// check if empty/exists/null 90 @property 91 bool exists() 92 { 93 return this.data is null ? false : true; 94 } 95 96 /* Tag type checking */ 97 98 /// Check if tag type is type T 99 bool check(T)() 100 { 101 enforce(this.exists,"Tag doesn't exist"); 102 return TypeChars[TypeIndex!T] == cast(char) data[0]; 103 } 104 /// Check if tag type is type T 105 bool check(T : string)() 106 { 107 enforce(this.exists,"Tag doesn't exist"); 108 return TypeChars[TypeIndex!T] == cast(char) data[0]; 109 } 110 /// Check if tag type is type T 111 bool check(T : T[])() 112 { 113 enforce(this.exists,"Tag doesn't exist"); 114 return (cast(char) data[0] == 'B') && (TypeChars[TypeIndex!T] == cast(char) data[1]); 115 } 116 117 /// Check if tag type is type T 118 bool checkArray() 119 { 120 enforce(this.exists,"Tag doesn't exist"); 121 return cast(char) data[0] == 'B'; 122 } 123 124 /// Check if tag type is type T 125 bool checkHexByteArray() 126 { 127 enforce(this.exists,"Tag doesn't exist"); 128 return cast(char) data[0] == 'H'; 129 } 130 131 /* Tag conversion */ 132 133 /// Convert tag value to D string 134 string to(T : string)() 135 { 136 enforce(this.check!string || this.checkHexByteArray,"Tag is not type Z or H"); 137 return fromStringz(cast(char*)&data[1]).idup; 138 } 139 /// Convert tag value to D type 140 T to(T)() 141 { 142 enforce(this.check!T,"Tag is not type " ~ T.stringof); 143 return *cast(T*) data[1 .. T.sizeof + 1].ptr; 144 } 145 /// Convert array tag value D array 146 T[] to(T : T[])() 147 { 148 enforce(this.check!(T[]),"Tag is not type " ~ T.stringof); 149 int n = *cast(int*) data[2 .. 6].ptr; 150 return (cast(T*)(data[6 .. T.sizeof + 6].ptr))[0 .. n]; 151 } 152 153 154 /// Convert any tag value to string 155 string toString() 156 { 157 enforce(this.exists,"Tag doesn't exist"); 158 switch (cast(char) data[0]) 159 { 160 case 'c': 161 return to!byte.to!string; 162 case 'C': 163 return to!ubyte.to!string; 164 case 's': 165 return to!short.to!string; 166 case 'S': 167 return to!ushort.to!string; 168 case 'i': 169 return to!int.to!string; 170 case 'I': 171 return to!uint.to!string; 172 case 'f': 173 return to!float.to!string; 174 case 'Z': 175 case 'H': 176 return to!string; 177 case 'B': 178 switch (cast(char) data[1]) 179 { 180 case 'c': 181 return to!(byte[]).to!string; 182 case 'C': 183 return to!(ubyte[]).to!string; 184 case 's': 185 return to!(short[]).to!string; 186 case 'S': 187 return to!(ushort[]).to!string; 188 case 'i': 189 return to!(int[]).to!string; 190 case 'I': 191 return to!(uint[]).to!string; 192 case 'f': 193 return to!(float[]).to!string; 194 default: 195 throw new Exception("Array Tag malformed"); 196 } 197 default: 198 throw new Exception("Tag malformed"); 199 } 200 } 201 /// Convert tag value to integer 202 long toInt() 203 { 204 enforce(this.exists,"Tag doesn't exist"); 205 switch (cast(char) data[0]) 206 { 207 case 'c': 208 return cast(long)(to!byte); 209 case 'C': 210 return cast(long)(to!ubyte); 211 case 's': 212 return cast(long)(to!short); 213 case 'S': 214 return cast(long)(to!ushort); 215 case 'i': 216 return cast(long)(to!int); 217 case 'I': 218 return cast(long)(to!uint); 219 default: 220 throw new Exception("Tag is not numeric or is malformed"); 221 } 222 } 223 /// Convert tag value to integer array 224 long[] toIntArray() 225 { 226 enforce(this.exists,"Tag doesn't exist"); 227 enforce(this.checkArray,"Tag is not a numeric array"); 228 switch (cast(char) data[1]) 229 { 230 case 'c': 231 return (to!(byte[]).to!(long[])); 232 case 'C': 233 return (to!(ubyte[]).to!(long[])); 234 case 's': 235 return (to!(short[]).to!(long[])); 236 case 'S': 237 return (to!(ushort[]).to!(long[])); 238 case 'i': 239 return (to!(int[]).to!(long[])); 240 case 'I': 241 return (to!(uint[]).to!(long[])); 242 default: 243 throw new Exception("Tag is malformed"); 244 } 245 } 246 /// Convert tag value to float array 247 float[] toFloatArray() 248 { 249 enforce(this.exists,"Tag doesn't exist"); 250 enforce(this.checkArray,"Tag is not an array"); 251 enforce(this.check!(float[]),"Tag is not a float array"); 252 return to!(float[]); 253 } 254 } 255 256 debug (dhtslib_unittest) unittest 257 { 258 TagValue v; 259 assert(!v.exists); 260 ubyte[12] testdata; 261 assertThrown(v.toIntArray); 262 assertThrown(v.toInt); 263 assertThrown(v.toString); 264 testdata[0] = cast(ubyte) 'B'; 265 testdata[1] = cast(ubyte) 'S'; 266 *cast(int*) testdata[2 .. 6].ptr = 3; 267 testdata[6] = 1; 268 testdata[8] = 2; 269 testdata[10] = 3; 270 v.data = testdata.ptr; 271 writeln("testing array"); 272 assert(v.to!(ushort[]) == [1, 2, 3]); 273 ubyte[5] testdata2; 274 testdata2[0] = cast(ubyte) 'i'; 275 *cast(int*) testdata2[1 .. 5].ptr = 3; 276 v.data = testdata2.ptr; 277 writeln("testing int"); 278 assert(v.to!int == 3); 279 } 280 281 debug (dhtslib_unittest) unittest 282 { 283 import dhtslib.sam; // @suppress(dscanner.suspicious.local_imports) 284 import htslib.hts_log : hts_log_info; 285 import std.path : buildPath, dirName; 286 287 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 288 hts_log_info(__FUNCTION__, "Testing tagvalue"); 289 hts_log_info(__FUNCTION__, "Loading test file"); 290 auto bam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))), "htslib", 291 "test", "auxf#values.sam"), 0); 292 293 hts_log_info(__FUNCTION__, "Getting read 1"); 294 auto readrange = bam.allRecords(); // @suppress(dscanner.suspicious.unmodified) 295 assert(readrange.empty == false); 296 auto read = readrange.front; 297 298 hts_log_info(__FUNCTION__, "Testing string"); 299 assert(read["RG"].to!string == "ID"); 300 301 hts_log_info(__FUNCTION__, "Testing char"); 302 assert(read["A!"].to!char == '!'); 303 assert(read["Ac"].to!char == 'c'); 304 assert(read["AC"].to!char == 'C'); 305 306 hts_log_info(__FUNCTION__, "Testing integral checks"); 307 assert(read["I0"].check!ubyte); 308 assert(read["I1"].check!ubyte); 309 assert(read["I2"].check!ubyte); 310 assert(read["I3"].check!ubyte); 311 assert(read["I4"].check!ubyte); 312 assert(read["I5"].check!ushort); 313 assert(read["I6"].check!ushort); 314 assert(read["I7"].check!ushort); 315 assert(read["I8"].check!ushort); 316 assert(read["I9"].check!uint); 317 assert(read["IA"].check!uint); 318 assert(read["i1"].check!byte); 319 assert(read["i2"].check!byte); 320 assert(read["i3"].check!byte); 321 assert(read["i4"].check!short); 322 assert(read["i5"].check!short); 323 assert(read["i6"].check!short); 324 assert(read["i7"].check!short); 325 assert(read["i8"].check!int); 326 assert(read["i9"].check!int); 327 assert(read["iA"].check!int); 328 assert(read["iB"].check!int); 329 330 hts_log_info(__FUNCTION__, "Testing integral conversion"); 331 assert(read["I0"].to!ubyte == 0); 332 assert(read["I1"].to!ubyte == 1); 333 assert(read["I2"].to!ubyte == 127); 334 assert(read["I3"].to!ubyte == 128); 335 assert(read["I4"].to!ubyte == 255); 336 assert(read["I5"].to!ushort == 256); 337 assert(read["I6"].to!ushort == 32_767); 338 assert(read["I7"].to!ushort == 32_768); 339 assert(read["I8"].to!ushort == 65_535); 340 assert(read["I9"].to!uint == 65_536); 341 assert(read["IA"].to!uint == 2_147_483_647); 342 assert(read["i1"].to!byte == -1); 343 assert(read["i2"].to!byte == -127); 344 assert(read["i3"].to!byte == -128); 345 assert(read["i4"].to!short == -255); 346 assert(read["i5"].to!short == -256); 347 assert(read["i6"].to!short == -32_767); 348 assert(read["i7"].to!short == -32_768); 349 assert(read["i8"].to!int == -65_535); 350 assert(read["i9"].to!int == -65_536); 351 assert(read["iA"].to!int == -2_147_483_647); 352 assert(read["iB"].to!int == -2_147_483_648); 353 354 hts_log_info(__FUNCTION__, "Testing integral toString"); 355 assert(read["I0"].toString == "0"); 356 assert(read["I1"].toString == "1"); 357 assert(read["I2"].toString == "127"); 358 assert(read["I3"].toString == "128"); 359 assert(read["I4"].toString == "255"); 360 assert(read["I5"].toString == "256"); 361 assert(read["I6"].toString == "32767"); 362 assert(read["I7"].toString == "32768"); 363 assert(read["I8"].toString == "65535"); 364 assert(read["I9"].toString == "65536"); 365 assert(read["IA"].toString == "2147483647"); 366 assert(read["i1"].toString == "-1"); 367 assert(read["i2"].toString == "-127"); 368 assert(read["i3"].toString == "-128"); 369 assert(read["i4"].toString == "-255"); 370 assert(read["i5"].toString == "-256"); 371 assert(read["i6"].toString == "-32767"); 372 assert(read["i7"].toString == "-32768"); 373 assert(read["i8"].toString == "-65535"); 374 assert(read["i9"].toString == "-65536"); 375 assert(read["iA"].toString == "-2147483647"); 376 assert(read["iB"].toString == "-2147483648"); 377 378 hts_log_info(__FUNCTION__, "Testing integral toInt"); 379 assert(read["I0"].toInt == 0); 380 assert(read["I1"].toInt == 1); 381 assert(read["I2"].toInt == 127); 382 assert(read["I3"].toInt == 128); 383 assert(read["I4"].toInt == 255); 384 assert(read["I5"].toInt == 256); 385 assert(read["I6"].toInt == 32_767); 386 assert(read["I7"].toInt == 32_768); 387 assert(read["I8"].toInt == 65_535); 388 assert(read["I9"].toInt == 65_536); 389 assert(read["IA"].toInt == 2_147_483_647); 390 assert(read["i1"].toInt == -1); 391 assert(read["i2"].toInt == -127); 392 assert(read["i3"].toInt == -128); 393 assert(read["i4"].toInt == -255); 394 assert(read["i5"].toInt == -256); 395 assert(read["i6"].toInt == -32_767); 396 assert(read["i7"].toInt == -32_768); 397 assert(read["i8"].toInt == -65_535); 398 assert(read["i9"].toInt == -65_536); 399 assert(read["iA"].toInt == -2_147_483_647); 400 assert(read["iB"].toInt == -2_147_483_648); 401 402 hts_log_info(__FUNCTION__, "Testing float checks"); 403 404 assert(read["F0"].check!float); 405 assert(read["F1"].check!float); 406 assert(read["F2"].check!float); 407 408 hts_log_info(__FUNCTION__, "Testing float conversion"); 409 assert(read["F0"].to!float == -1.0); 410 assert(read["F1"].to!float == 0.0); 411 assert(read["F2"].to!float == 1.0); 412 413 hts_log_info(__FUNCTION__, "Testing float toString"); 414 415 assert(approxEqual(read["F0"].toString.to!float, -1.0)); 416 assert(approxEqual(read["F1"].toString.to!float, 0.0)); 417 assert(approxEqual(read["F2"].toString.to!float, 1.0)); 418 419 hts_log_info(__FUNCTION__, "Running tag checking"); 420 assert(read["I0"].check!ubyte == true); 421 assert(read["I5"].check!ushort == true); 422 assert(read["I9"].check!uint == true); 423 assert(read["i1"].check!byte == true); 424 assert(read["i4"].check!short == true); 425 assert(read["i8"].check!int == true); 426 assert(read["F0"].check!float == true); 427 readrange.popFront; 428 read = readrange.front; 429 hts_log_info(__FUNCTION__, "Testing arrays"); 430 assert(read["Bs"].to!(short[]) == [-32_768, -32_767, 0, 32_767]); 431 assert(read["Bi"].to!(int[]) == [ 432 -2_147_483_648, -2_147_483_647, 0, 2_147_483_647 433 ]); 434 assert(read["BS"].to!(ushort[]) == [0, 32_767, 32_768, 65_535]); 435 assert(read["BI"].to!(uint[]) == [ 436 0, 2_147_483_647, 2_147_483_648, 4_294_967_295 437 ]); 438 439 hts_log_info(__FUNCTION__, "Testing array toString"); 440 assert(read["Bs"].toString == "[-32768, -32767, 0, 32767]"); 441 assert(read["Bi"].toString == "[-2147483648, -2147483647, 0, 2147483647]"); 442 assert(read["BS"].toString == "[0, 32767, 32768, 65535]"); 443 assert(read["BI"].toString == "[0, 2147483647, 2147483648, 4294967295]"); 444 445 writeln(read["Bs"].toIntArray); 446 assert(read["Bs"].toIntArray == [-32_768, -32_767, 0, 32_767]); 447 assert(read["Bi"].toIntArray == [ 448 -2_147_483_648, -2_147_483_647, 0, 2_147_483_647 449 ]); 450 assert(read["BS"].toIntArray == [0, 32_767, 32_768, 65_535]); 451 assert(read["BI"].toIntArray == [ 452 0, 2_147_483_647, 2_147_483_648, 4_294_967_295 453 ]); 454 hts_log_info(__FUNCTION__, "Running tag checking"); 455 assert(read["Bs"].check!(short[]) == true); 456 assert(read["Bi"].check!(int[]) == true); 457 assert(read["BS"].check!(ushort[]) == true); 458 assert(read["BI"].check!(uint[]) == true); 459 460 hts_log_info(__FUNCTION__, "Testing float Array"); 461 float[] arr = [10.0,11.0,12.1]; 462 read["fA"] = arr; 463 assert(read["fA"].to!(float[]) == arr); 464 assert(read["fA"].toFloatArray == arr); 465 assert(read["fA"].toString == "[10, 11, 12.1]"); 466 467 hts_log_info(__FUNCTION__, "Testing byte Array"); 468 byte[] arr2 = [10, -10]; 469 read["cA"] = arr2; 470 assert(read["cA"].to!(byte[]) == arr2); 471 assert(read["cA"].toIntArray == arr2.to!(long[])); 472 assert(read["cA"].toString == "[10, -10]"); 473 474 hts_log_info(__FUNCTION__, "Testing ubyte Array"); 475 ubyte[] arr3 = [10, 11]; 476 read["CA"] = arr3; 477 assert(read["CA"].to!(ubyte[]) == arr3); 478 assert(read["CA"].toIntArray == arr3.to!(long[])); 479 assert(read["CA"].toString == "[10, 11]"); 480 481 }