1 /** 2 Module providing a wrapper, `TabixIndexedFile` over a line-oriented NGS flat file, 3 such as BED, GFF3, VCF that has been indexed with tabix. 4 5 The wrapper provides a list of reference sequence names, as well as iterator over 6 all rows falling within a sequence range, e.g. "chr1:1000-2000" 7 */ 8 9 module dhtslib.tabix; 10 11 import std.stdio; 12 import std..string; 13 import core.stdc.stdlib : malloc, free; 14 15 import htslib.hts; 16 import htslib.kstring; 17 import htslib.tbx; 18 //import htslib.regidx; 19 20 /** Encapsulates a position-sorted record-oriented NGS flat file, 21 * indexed with Tabix, including BED, GFF3, VCF. 22 * 23 * region(string r) returns an InputRange that iterates through rows of the file intersecting or contained within the requested range 24 */ 25 struct TabixIndexedFile { 26 27 htsFile *fp; /// pointer to htsFile struct 28 tbx_t *tbx; /// pointer to tabix handle 29 30 string header; /// NGS flat file's header (if any; e.g. BED may not have one) 31 32 /// Initialize with a complete file path name to the tabix-indexed file 33 /// The tabix index (.tbi) must already exist alongside 34 this(const(char)[] fn, const(char)[] fntbi = "") 35 { 36 debug(dhtslib_debug) { writeln("TabixIndexedFile ctor"); } 37 this.fp = hts_open( toStringz(fn), "r"); 38 if ( !this.fp ) { 39 writefln("Could not read %s\n", fn); 40 throw new Exception("Couldn't read file"); 41 } 42 //enum htsExactFormat format = hts_get_format(fp)->format; 43 if(fntbi!="") this.tbx = tbx_index_load2( toStringz(fn), toStringz(fntbi) ); 44 else this.tbx = tbx_index_load( toStringz(fn) ); 45 if (!this.tbx) { 46 writefln("Could not load .tbi index of %s\n", fn ); 47 throw new Exception("Couldn't load tabix index file"); 48 } 49 50 loadHeader(); 51 } 52 ~this() 53 { 54 debug(dhtslib_debug) { writeln("TabixIndexedFile dtor"); } 55 tbx_destroy(this.tbx); 56 57 if ( hts_close(this.fp) ) writefln("hts_close returned non-zero status: %s\n", fromStringz(this.fp.fn)); 58 } 59 60 private void loadHeader() 61 { 62 kstring_t str; 63 scope(exit) { free(str.s); } 64 65 while ( hts_getline(this.fp, '\n', &str) >= 0 ) 66 { 67 if ( !str.l || str.s[0] != this.tbx.conf.meta_char ) break; 68 this.header ~= fromStringz(str.s) ~ '\n'; 69 } 70 71 debug(dhtslib_debug) { writeln("end loadHeader"); } 72 } 73 74 /// tbx.d: const(char **) tbx_seqnames(tbx_t *tbx, int *n); // free the array but not the values 75 @property string[] sequenceNames() 76 { 77 // TODO: Check for memory leaks (free the array after copy into sequence_names) 78 int nseqs; 79 80 string[] sequence_names; 81 82 const(char **) seqnames = tbx_seqnames(this.tbx, &nseqs); 83 84 for(int i; i<nseqs; i++) { 85 sequence_names ~= cast(string) fromStringz(seqnames[i]); 86 } 87 88 return sequence_names; 89 } 90 91 /** region(r) 92 * returns an InputRange that iterates through rows of the file intersecting or contained within the requested range 93 */ 94 auto region(const(char)[] r) 95 { 96 struct Region { 97 98 /** TODO: determine how thread-(un)safe this is (i.e., using a potentially shared *fp and *tbx) */ 99 private htsFile *fp; 100 private tbx_t *tbx; 101 102 private hts_itr_t *itr; 103 private string next; 104 105 // necessary because the alternative strategy of preloading the first row 106 // leads to problems when Struct inst is blitted -> 107 // re-iterating always returns first row only (since *itr is expended 108 // but first row was preloaded in this.next) 109 private bool active; 110 111 this(htsFile *fp, tbx_t *tbx, const(char)[] r) 112 { 113 this.fp = fp; 114 this.tbx= tbx; 115 116 this.itr = tbx_itr_querys(tbx, toStringz(r) ); 117 debug(dhtslib_debug) { writeln("Region ctor // this.itr: ", this.itr); } 118 if (this.itr) { 119 // Load the first record 120 //this.popFront(); // correction, do not load the first record 121 } 122 else { 123 // TODO handle error 124 throw new Exception("could not allocate this.itr"); 125 } 126 } 127 ~this() 128 { 129 debug(dhtslib_debug) { writeln("Region dtor // this.itr: ", this.itr); } 130 //tbx_itr_destroy(itr); 131 //free(this.kstr.s); 132 } 133 134 // had to remove "const" property from empty() due to manipulation of this.active 135 @property bool empty() { 136 137 if (!this.active) { 138 // this is the first call to empty() (and use of the range) 139 // Let's make it active and attempt to load the first record, if one exists 140 this.active = true; 141 this.popFront(); 142 } 143 144 if (!this.next) return true; 145 else return false; 146 } 147 148 @property string front() const { 149 return this.next; 150 } 151 152 void popFront() { 153 // closure over fp and tbx? (i.e. potentially unsafe?) 154 155 // Get next entry 156 kstring_t kstr; 157 immutable res = tbx_itr_next(this.fp, this.tbx, this.itr, &kstr); 158 if (res < 0) { 159 // we are done 160 this.next = null; 161 } else { 162 // Otherwise load into next 163 this.next = fromStringz(kstr.s).idup; 164 free(kstr.s); 165 } 166 } 167 } 168 169 return Region(this.fp, this.tbx, r); 170 } 171 172 }