1 /** Coordinates and Coordinate Systems 2 3 Interval include `start` and `end`, but no reference sequence id (chr, contig, etc.) 4 5 The `Coordinate` type is templated on `CoordSystem` enum, so that the actual coordinate 6 system in use -- and by this, we mean zero- or one-based, and half-open vs. closed -- 7 is encoded within the type itself. e.g. `Coordinate!(CoordSystem.zbho)(0, 100)` 8 9 In this way, dhtslib functions that take type `Coordinate` can enforce safety checks, 10 or optionally, interconversions, to avoid off-by-one errors when dealing with different 11 systems, as is common in different HTS/NGS file formats. 12 13 In general, zero-based half-open (ZBHO) coordinates are easy to compute on, whereas 14 many data sources intended to be read and understood by humans (including NCBI) are 15 one-based closed systems. You may enjoy reading the following articles: 16 17 https://www.biostars.org/p/84686/ 18 https://www.biostars.org/p/6373/ 19 http://genome.ucsc.edu/blog/the-ucsc-genome-browser-coordinate-counting-systems/ 20 http://genomewiki.ucsc.edu/index.php/Coordinate_Transforms 21 22 Zero-based, half open: BED, BAM 23 Zero-based, closed: HTSlib function faidx_fetch_seq , strangely 24 One-based, half open: ? 25 One-based, closed: GFF3, SAM (text; not BAM), VCF (text; not BCF) 26 */ 27 28 /* 29 30 Translation table for my reference: 31 zbho zbc obho obc 32 zbho - 0,-1 +1,+1 +1, 0 33 zbc 0,+1 - +1,+2 +1,+1 34 obho -1,-1 -1,-2 - 0,-1 35 obc -1,0 -1,-1 0,+1 - 36 37 */ 38 39 module dhtslib.coordinates; 40 import std.traits : isIntegral; 41 import std.conv : to; 42 43 /// Represents 0-based vs 1-based coordinate types 44 enum Basis 45 { 46 zero = 0, 47 one 48 } 49 50 /// Represents whether a coordinate set's end 51 /// coordinate is open or closed 52 enum End 53 { 54 open = 0, 55 closed 56 } 57 58 /// Coordinate type for a single position with in a Coordinate set, 59 /// where the type itself encodes the coordinate system details 60 /// (zero or one-based) 61 struct Coordinate(Basis bs) 62 { 63 @safe @nogc nothrow pure: 64 /// Coordinate value 65 long pos; 66 alias pos this; 67 68 invariant 69 { 70 assert(pos >= 0); 71 // in one based systems, pos cannot be zero 72 static if(bs == Basis.one) 73 { 74 assert(pos != 0); 75 } 76 } 77 78 /// Convert coordinate to another based system 79 auto to(Basis tobs)() 80 { 81 // return same Base type 82 static if (bs == tobs) return Coordinate!bs(pos); 83 // one to zero base, decrement 84 else static if (tobs == Basis.zero) return Coordinate!bs(pos - 1); 85 // zero to one base, increment 86 else static if (tobs == Basis.one) return Coordinate!bs(pos + 1); 87 else static assert(0, "Coordinate Type error"); 88 } 89 90 /// Convert coordinate to another based system using shortcuts 91 auto to(T: ZB)() 92 { 93 return this.to!(Basis.zero); 94 } 95 96 auto to(T: OB)() 97 { 98 return this.to!(Basis.one); 99 } 100 101 /// make a new coordinate with a value of this.pos + off 102 /// this.pos + off 103 auto offset(T)(T off) 104 if(isIntegral!T) 105 { 106 return Coordinate!(bs)(cast(long)(this.pos + off)); 107 } 108 109 /// opbinary + or - to use offset 110 auto opBinary(string op, T)(T off) 111 if ((op == "+" || op == "-") && isIntegral!T) 112 { 113 static if(op == "+") 114 return this.offset(off); 115 else static if(op == "-") 116 return this.offset(-off); 117 else static assert(0,"Operator "~op~" not implemented"); 118 } 119 } 120 121 alias ZB = Coordinate!(Basis.zero); 122 alias OB = Coordinate!(Basis.one); 123 124 alias ZeroBased = Coordinate!(Basis.zero); 125 alias OneBased = Coordinate!(Basis.one); 126 127 /// template to convert Basis, End enum combination to 128 /// respective CoordinateSystem enum 129 template getCoordinateSystem(Basis bs, End es){ 130 static if(bs == Basis.zero){ 131 static if(es == End.open) 132 enum CoordSystem getCoordinateSystem = CoordSystem.zbho; 133 else static if(es == End.closed) 134 enum CoordSystem getCoordinateSystem = CoordSystem.zbc; 135 } 136 else static if(bs == Basis.one){ 137 static if(es == End.open) 138 enum CoordSystem getCoordinateSystem = CoordSystem.obho; 139 else static if(es == End.closed) 140 enum CoordSystem getCoordinateSystem = CoordSystem.obc; 141 } 142 } 143 144 // just sanity checking 145 static assert(getCoordinateSystem!(Basis.zero, End.open) == CoordSystem.zbho); 146 static assert(getCoordinateSystem!(Basis.zero, End.closed) == CoordSystem.zbc); 147 static assert(getCoordinateSystem!(Basis.one, End.open) == CoordSystem.obho); 148 static assert(getCoordinateSystem!(Basis.one, End.closed) == CoordSystem.obc); 149 150 /// template to convert CoordinateSystem enum to 151 /// respective Basis enum 152 template coordinateSystemToBasis(CoordSystem cs){ 153 static if(cs == CoordSystem.zbho){ 154 enum Basis coordinateSystemToBasis = Basis.zero; 155 } 156 else static if(cs == CoordSystem.zbc){ 157 enum Basis coordinateSystemToBasis = Basis.zero; 158 } 159 else static if(cs == CoordSystem.obho){ 160 enum Basis coordinateSystemToBasis = Basis.one; 161 } 162 else static if(cs == CoordSystem.obc){ 163 enum Basis coordinateSystemToBasis = Basis.one; 164 } 165 } 166 167 // just sanity checking 168 static assert(coordinateSystemToBasis!(CoordSystem.zbho) == Basis.zero); 169 static assert(coordinateSystemToBasis!(CoordSystem.zbc) == Basis.zero); 170 static assert(coordinateSystemToBasis!(CoordSystem.obho) == Basis.one); 171 static assert(coordinateSystemToBasis!(CoordSystem.obc) == Basis.one); 172 173 /// template to convert CoordinateSystem enum to 174 /// respective End enum 175 template coordinateSystemToEnd(CoordSystem cs){ 176 static if(cs == CoordSystem.zbho){ 177 enum End coordinateSystemToEnd = End.open; 178 } 179 else static if(cs == CoordSystem.zbc){ 180 enum End coordinateSystemToEnd = End.closed; 181 } 182 else static if(cs == CoordSystem.obho){ 183 enum End coordinateSystemToEnd = End.open; 184 } 185 else static if(cs == CoordSystem.obc){ 186 enum End coordinateSystemToEnd = End.closed; 187 } 188 } 189 190 // just sanity checking 191 static assert(coordinateSystemToEnd!(CoordSystem.zbho) == End.open); 192 static assert(coordinateSystemToEnd!(CoordSystem.zbc) == End.closed); 193 static assert(coordinateSystemToEnd!(CoordSystem.obho) == End.open); 194 static assert(coordinateSystemToEnd!(CoordSystem.obc) == End.closed); 195 196 /// Coordsytem types 197 enum CoordSystem 198 { 199 zbho = 0, /// zero-based, half-open (BED, BAM) 200 zbc, /// zero-based, closed (HTSlib.faidx_fetch_seq) 201 obho, /// one-based, half-open 202 obc /// one-based, closed (GFF3, VCF [not BCF], SAM [not BAM]) 203 } 204 205 /// Labels for each CoordSystem Type 206 enum CoordSystemLabels = __traits(allMembers, CoordSystem); 207 208 /// The (start, end) coordinates within a coordinate system, 209 /// where the type itself encodes the coordinate system details 210 /// (zero or one-based; half-open vs. closed) 211 struct Interval(CoordSystem cs) 212 { 213 @safe nothrow pure: 214 215 /// alias Basis and End enums for this Coordsystem type 216 alias basetype = coordinateSystemToBasis!cs; 217 alias endtype = coordinateSystemToEnd!cs; 218 219 /// Starting coordinate 220 Coordinate!basetype start; 221 /// Ending coordinate 222 Coordinate!basetype end; 223 224 /// long constructor 225 this(long start, long end) @nogc 226 { 227 this.start.pos = start; 228 this.end.pos = end; 229 } 230 231 invariant 232 { 233 // In half-open systems, ensure start strictly less than end, 234 // unless the zero-length range is truly desired, in which case use (0,0) 235 static if (cs == CoordSystem.zbho || cs == CoordSystem.obho) 236 assert(this.start < this.end || (this.start == 0 && this.end == 0)); 237 else 238 assert(this.start <= this.end); 239 } 240 241 /// Return the size of the interval spanned by start and end 242 long size() @nogc 243 { 244 static if (cs == CoordSystem.zbho || cs == CoordSystem.obho) 245 return end - start; 246 else static if (cs == CoordSystem.zbc || cs == CoordSystem.obc) 247 return end - start + 1; 248 else 249 static assert(0, "Coordinate Type error"); 250 } 251 252 /// Convert coordinates to another coordinate system 253 auto to(CoordSystem tocs)() @nogc 254 { 255 256 /// alias Basis and End enums for the tocs Coordsystem type 257 alias tobasetype = coordinateSystemToBasis!tocs; 258 alias toendtype = coordinateSystemToEnd!tocs; 259 260 // new coordinates 261 auto newStart = this.start; 262 auto newEnd = this.end; 263 264 // return same CoordinateSystem type 265 static if (cs == tocs) 266 return Interval!(tocs)(newStart, newEnd); 267 else{ 268 // convert coordinates to new base type 269 newStart = newStart.to!tobasetype; 270 newEnd = newEnd.to!tobasetype; 271 272 // if going between end types 273 static if (endtype != toendtype){ 274 // open to closed end, decrement end 275 // closed to open end, increment end 276 static if(toendtype == End.closed){ 277 newEnd--; 278 }else{ 279 newEnd++; 280 } 281 } 282 return Interval!(tocs)(newStart, newEnd); 283 } 284 } 285 286 /// Convert coordinate to another based system using shortcuts 287 auto to(T: ZBHO)() @nogc 288 { 289 return this.to!(CoordSystem.zbho); 290 } 291 292 /// Convert coordinate to another based system using shortcuts 293 auto to(T: OBHO)() @nogc 294 { 295 return this.to!(CoordSystem.obho); 296 } 297 298 /// Convert coordinate to another based system using shortcuts 299 auto to(T: ZBC)() @nogc 300 { 301 return this.to!(CoordSystem.zbc); 302 } 303 304 /// Convert coordinate to another based system using shortcuts 305 auto to(T: OBC)() @nogc 306 { 307 return this.to!(CoordSystem.obc); 308 } 309 310 /// make a new coordinate pair with a value of 311 /// this.start + off and this.end + off 312 auto offset(T)(T off) @nogc 313 if(isIntegral!T) 314 { 315 return Interval!(cs)(cast(long)(this.start + off), cast(long)(this.end + off)); 316 } 317 318 /// intersection of two regions 319 auto intersectImpl(Interval!cs other) @nogc 320 { 321 if(!this.isOverlap(other)){ 322 return Interval!(cs).init; 323 } 324 return Interval!(cs)( 325 this.getMaxStart(other), 326 this.getMinEnd(other) 327 ); 328 } 329 330 /// union of two regions 331 /// specifically computes the convex hull of the two intervals 332 /// if Intervals don't overlap returns the init Interval 333 /// 334 /// https://en.wikipedia.org/wiki/Convex_hull 335 /// 336 /// Thanks Arne! 337 /// https://forum.dlang.org/post/duzdclxvvrifoastdztv@forum.dlang.org 338 auto unionImpl(Interval!cs other) @nogc 339 { 340 if(!this.isOverlap(other)){ 341 return Interval!(cs).init; 342 } 343 return Interval!(cs)( 344 this.getMinStart(other), 345 this.getMaxEnd(other) 346 ); 347 } 348 349 /// does this interval overlap 350 /// with the other interval? 351 auto isOverlap(Interval!cs other) @nogc 352 { 353 static if(endtype == End.closed){ 354 return this.getMaxStart(other) <= this.getMinEnd(other); 355 }else{ 356 return this.getMaxStart(other) < this.getMinEnd(other); 357 } 358 } 359 360 /// get minimum start value between this interval 361 /// and other interval 362 auto getMinStart(Interval!cs other) @nogc 363 { 364 return this.start < other.start ? this.start : other.start; 365 } 366 367 /// get maximum start value between this interval 368 /// and other interval 369 auto getMaxStart(Interval!cs other) @nogc 370 { 371 return this.start > other.start ? this.start : other.start; 372 } 373 374 /// get minimum end value between this interval 375 /// and other interval 376 auto getMinEnd(Interval!cs other) @nogc 377 { 378 return this.end < other.end ? this.end : other.end; 379 } 380 381 /// get maximum end value between this interval 382 /// and other interval 383 auto getMaxEnd(Interval!cs other) @nogc 384 { 385 return this.end > other.end ? this.end : other.end; 386 } 387 388 /// Set operators for interval intersection (|) and union (&). 389 /// If the intervals are non-overlapping, an empty interval is returned i.e ZBHO.init 390 /// union is not a true union but returns the convex hull. 391 auto opBinary(string op)(Interval!cs other) @nogc 392 if(op == "|" || op == "&") 393 { 394 static if(op == "|") return unionImpl(other); 395 else static if(op == "&") return intersectImpl(other); 396 else static assert(0,"Operator "~op~" not implemented"); 397 } 398 399 /// opbinary + or - to use offset 400 auto opBinary(string op, T)(T off) @nogc 401 if ((op == "+" || op == "-") && isIntegral!T) 402 { 403 static if(op == "+") 404 return this.offset(off); 405 else static if(op == "-") 406 return this.offset(-off); 407 else static assert(0,"Operator "~op~" not implemented"); 408 } 409 410 /// Get string representation for printing 411 string toString() const{ 412 return "[" ~ CoordSystemLabels[cs] ~ "] " ~ this.start.pos.to!string ~ "-" ~ this.end.pos.to!string; 413 } 414 } 415 416 alias ZBHO = Interval!(CoordSystem.zbho); 417 alias OBHO = Interval!(CoordSystem.obho); 418 alias ZBC = Interval!(CoordSystem.zbc); 419 alias OBC = Interval!(CoordSystem.obc); 420 421 alias ZeroBasedHalfOpen = Interval!(CoordSystem.zbho); 422 alias OneBasedHalfOpen = Interval!(CoordSystem.obho); 423 alias ZeroBasedClosed = Interval!(CoordSystem.zbc); 424 alias OneBasedClosed = Interval!(CoordSystem.obc); 425 426 427 debug(dhtslib_unittest) @safe @nogc nothrow pure unittest 428 { 429 auto c0 = Interval!(CoordSystem.zbho)(0, 100); 430 assert(c0.size == 100); 431 432 auto c1 = c0.to!(CoordSystem.zbc); 433 auto c2 = c0.to!(CoordSystem.obc); 434 auto c3 = c0.to!(CoordSystem.obho); 435 auto c4 = c0.to!(CoordSystem.zbho); 436 437 assert(c1 == ZBC(0, 99)); 438 assert(c2 == OBC(1, 100)); 439 assert(c3 == OBHO(1, 101)); 440 assert(c4 == ZBHO(0, 100)); 441 // ... 442 } 443 444 debug(dhtslib_unittest) @safe @nogc nothrow pure unittest 445 { 446 ZB coord = ZB(1); 447 assert(coord + 1 == ZB(2)); 448 assert(coord - 1 == ZB(0)); 449 450 ZBHO coords = ZBHO(1, 2); 451 assert(coords + 1 == ZBHO(2, 3)); 452 assert(coords - 1 == ZBHO(0, 1)); 453 // ... 454 } 455 456 debug(dhtslib_unittest) @safe @nogc nothrow pure unittest 457 { 458 ZBHO coords = ZBHO(1, 5); 459 assert((coords & ZBHO(2, 6)) == ZBHO(2, 5)); 460 assert((coords | ZBHO(3, 10)) == ZBHO(1, 10)); 461 assert((coords | ZBHO(6, 10)) == ZBHO(0, 0)); 462 // ... 463 } 464 465 466 debug(dhtslib_unittest) @safe @nogc nothrow pure unittest 467 { 468 auto c0 = Interval!(CoordSystem.zbho)(0, 100); 469 assert(c0.size == 100); 470 471 auto c1 = c0.to!(CoordSystem.zbc); 472 auto c2 = c0.to!(CoordSystem.obc); 473 auto c3 = c0.to!(CoordSystem.obho); 474 auto c4 = c0.to!(CoordSystem.zbho); 475 476 assert(c1 == ZBC(0, 99)); 477 assert(c2 == OBC(1, 100)); 478 assert(c3 == OBHO(1, 101)); 479 assert(c4 == ZBHO(0, 100)); 480 } 481 482 debug(dhtslib_unittest) @safe @nogc nothrow pure unittest 483 { 484 auto c0 = ZBHO(0, 100); 485 assert(c0.size == 100); 486 487 auto c1 = c0.to!ZBC; 488 auto c2 = c0.to!OBC; 489 auto c3 = c0.to!OBHO; 490 auto c4 = c0.to!ZBHO; 491 492 assert(c1 == ZBC(0, 99)); 493 assert(c2 == OBC(1, 100)); 494 assert(c3 == OBHO(1, 101)); 495 assert(c4 == ZBHO(0, 100)); 496 } 497 498 debug(dhtslib_unittest) @safe @nogc nothrow pure unittest 499 { 500 auto c0 = Interval!(CoordSystem.obho)(1, 101); 501 assert(c0.size == 100); 502 503 auto c1 = c0.to!(CoordSystem.zbc); 504 auto c2 = c0.to!(CoordSystem.obc); 505 auto c3 = c0.to!(CoordSystem.obho); 506 auto c4 = c0.to!(CoordSystem.zbho); 507 508 assert(c1 == ZBC(0, 99)); 509 assert(c2 == OBC(1, 100)); 510 assert(c3 == OBHO(1, 101)); 511 assert(c4 == ZBHO(0, 100)); 512 // ... 513 } 514 515 debug(dhtslib_unittest) @safe @nogc nothrow pure unittest 516 { 517 ZBHO c0 = ZBHO(0, 100); 518 assert(c0.size == 100); 519 520 auto c1 = c0.offset(50); 521 assert(c1 == ZBHO(50, 150)); 522 assert((c0 & c1) == ZBHO(50, 100)); 523 assert((c0 | c1) == ZBHO(0, 150)); 524 525 c1 = c0.offset(99); 526 assert(c1 == ZBHO(99, 199)); 527 528 assert((c0 & c1) == ZBHO(99, 100)); 529 assert((c0 | c1) == ZBHO(0, 199)); 530 } 531 532 debug(dhtslib_unittest) unittest 533 { 534 OBC c0 = OBC(1, 100); 535 assert(c0.size == 100); 536 537 auto c1 = c0.offset(50); 538 assert(c1 == OBC(51, 150)); 539 540 assert((c0 & c1) == OBC(51, 100)); 541 assert((c0 | c1) == OBC(1, 150)); 542 543 c1 = c0.offset(99); 544 assert(c1 == OBC(100, 199)); 545 import std.stdio; 546 writeln((c0 & c1)); 547 assert((c0 & c1) == OBC(100, 100)); 548 assert((c0 | c1) == OBC(1, 199)); 549 } 550 551 /// 552 // debug(dhtslib_unittest) unittest 553 // { 554 // import dhtslib.sam; 555 // import htslib.hts_log; 556 // import std.path : buildPath, dirName; 557 // import std.string : fromStringz; 558 // import std.array : array; 559 560 // hts_set_log_level(htsLogLevel.HTS_LOG_WARNING); 561 // hts_log_info(__FUNCTION__, "Testing SAMFile & SAMRecord"); 562 // hts_log_info(__FUNCTION__, "Loading test file"); 563 // auto sam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","auxf#values.sam"), 0); 564 565 // auto reg = getIntervalFromString("sheila:1-10",sam.header); 566 // assert(reg.tid == 0); 567 // assert(reg.interval == ZBHO(0,11)); 568 // }