1 module dhtslib.tabix; 2 3 import std.stdio; 4 import std.string; 5 import core.stdc.stdlib : malloc, free; 6 7 import dhtslib.htslib.hts; 8 import dhtslib.htslib.kstring; 9 import dhtslib.htslib.tbx; 10 //import dhtslib.htslib.regidx; 11 12 /** Encapsulates a position-sorted record-oriented NGS flat file, 13 * indexed with Tabix, including BED, GFF3, VCF. 14 * 15 * region(string r) returns an InputRange that iterates through rows of the file intersecting or contained within the requested range 16 */ 17 struct TabixIndexedFile { 18 19 htsFile *fp; /// pointer to htsFile struct 20 tbx_t *tbx; /// pointer to tabix handle 21 22 string header; /// NGS flat file's header (if any; e.g. BED may not have one) 23 24 /// Initialize with a complete file path name to the tabix-indexed file 25 /// The tabix index (.tbi) must already exist alongside 26 this(const(char)[] fn, const(char)[] fntbi = "") 27 { 28 debug(dhtslib_debug) { writeln("TabixIndexedFile ctor"); } 29 this.fp = hts_open( toStringz(fn), "r"); 30 if ( !this.fp ) { 31 writefln("Could not read %s\n", fn); 32 throw new Exception("Couldn't read file"); 33 } 34 //enum htsExactFormat format = hts_get_format(fp)->format; 35 if(fntbi!="") this.tbx = tbx_index_load2( toStringz(fn), toStringz(fntbi) ); 36 else this.tbx = tbx_index_load( toStringz(fn) ); 37 if (!this.tbx) { 38 writefln("Could not load .tbi index of %s\n", fn ); 39 throw new Exception("Couldn't load tabix index file"); 40 } 41 42 loadHeader(); 43 } 44 ~this() 45 { 46 debug(dhtslib_debug) { writeln("TabixIndexedFile dtor"); } 47 tbx_destroy(this.tbx); 48 49 if ( hts_close(this.fp) ) writefln("hts_close returned non-zero status: %s\n", fromStringz(this.fp.fn)); 50 } 51 52 private void loadHeader() 53 { 54 kstring_t str; 55 scope(exit) { free(str.s); } 56 57 while ( hts_getline(this.fp, '\n', &str) >= 0 ) 58 { 59 if ( !str.l || str.s[0] != this.tbx.conf.meta_char ) break; 60 this.header ~= fromStringz(str.s) ~ '\n'; 61 } 62 63 debug(dhtslib_debug) { writeln("end loadHeader"); } 64 } 65 66 /// tbx.d: const(char **) tbx_seqnames(tbx_t *tbx, int *n); // free the array but not the values 67 @property string[] sequenceNames() 68 { 69 // TODO: Check for memory leaks (free the array after copy into sequence_names) 70 int nseqs; 71 72 string[] sequence_names; 73 74 const(char **) seqnames = tbx_seqnames(this.tbx, &nseqs); 75 76 for(int i; i<nseqs; i++) { 77 sequence_names ~= cast(string) fromStringz(seqnames[i]); 78 } 79 80 return sequence_names; 81 } 82 83 /** region(r) 84 * returns an InputRange that iterates through rows of the file intersecting or contained within the requested range 85 */ 86 auto region(const(char)[] r) 87 { 88 struct Region { 89 90 /** TODO: determine how thread-(un)safe this is (i.e., using a potentially shared *fp and *tbx) */ 91 private htsFile *fp; 92 private tbx_t *tbx; 93 94 private hts_itr_t *itr; 95 private string next; 96 97 // necessary because the alternative strategy of preloading the first row 98 // leads to problems when Struct inst is blitted -> 99 // re-iterating always returns first row only (since *itr is expended 100 // but first row was preloaded in this.next) 101 private bool active; 102 103 this(htsFile *fp, tbx_t *tbx, const(char)[] r) 104 { 105 this.fp = fp; 106 this.tbx= tbx; 107 108 this.itr = tbx_itr_querys(tbx, toStringz(r) ); 109 debug(dhtslib_debug) { writeln("Region ctor // this.itr: ", this.itr); } 110 if (this.itr) { 111 // Load the first record 112 //this.popFront(); // correction, do not load the first record 113 } 114 else { 115 // TODO handle error 116 throw new Exception("could not allocate this.itr"); 117 } 118 } 119 ~this() 120 { 121 debug(dhtslib_debug) { writeln("Region dtor // this.itr: ", this.itr); } 122 //tbx_itr_destroy(itr); 123 //free(this.kstr.s); 124 } 125 126 // had to remove "const" property from empty() due to manipulation of this.active 127 @property bool empty() { 128 129 if (!this.active) { 130 // this is the first call to empty() (and use of the range) 131 // Let's make it active and attempt to load the first record, if one exists 132 this.active = true; 133 this.popFront(); 134 } 135 136 if (!this.next) return true; 137 else return false; 138 } 139 140 @property string front() const { 141 return this.next; 142 } 143 144 void popFront() { 145 // closure over fp and tbx? (i.e. potentially unsafe?) 146 147 // Get next entry 148 kstring_t kstr; 149 immutable res = tbx_itr_next(this.fp, this.tbx, this.itr, &kstr); 150 if (res < 0) { 151 // we are done 152 this.next = null; 153 } else { 154 // Otherwise load into next 155 this.next = fromStringz(kstr.s).idup; 156 free(kstr.s); 157 } 158 } 159 } 160 161 return Region(this.fp, this.tbx, r); 162 } 163 164 }