1 module dhtslib.tabix;
2 
3 import std.stdio;
4 import std.string;
5 import core.stdc.stdlib : malloc, free;
6 
7 import dhtslib.htslib.hts;
8 import dhtslib.htslib.kstring;
9 import dhtslib.htslib.tbx;
10 //import dhtslib.htslib.regidx;
11 
12 /** Encapsulates a position-sorted record-oriented NGS flat file,
13  *  indexed with Tabix, including BED, GFF3, VCF.
14  *
15  *  region(string r) returns an InputRange that iterates through rows of the file intersecting or contained within the requested range 
16  */
17 struct TabixIndexedFile {
18 
19     htsFile *fp;    /// pointer to htsFile struct
20     tbx_t   *tbx;   /// pointer to tabix handle
21 
22     string header;  /// NGS flat file's header (if any; e.g. BED may not have one)
23 
24     /// Initialize with a complete file path name to the tabix-indexed file
25     /// The tabix index (.tbi) must already exist alongside
26     this(const(char)[] fn, const(char)[] fntbi = "")
27     {
28         debug(dhtslib_debug) { writeln("TabixIndexedFile ctor"); }
29         this.fp = hts_open( toStringz(fn), "r");
30         if ( !this.fp ) {
31             writefln("Could not read %s\n", fn);
32             throw new Exception("Couldn't read file");
33         }
34         //enum htsExactFormat format = hts_get_format(fp)->format;
35         if(fntbi!="") this.tbx = tbx_index_load2( toStringz(fn), toStringz(fntbi) );
36         else this.tbx = tbx_index_load( toStringz(fn) );
37         if (!this.tbx) { 
38             writefln("Could not load .tbi index of %s\n", fn );
39             throw new Exception("Couldn't load tabix index file");
40         }
41 
42         loadHeader();
43     }
44     ~this()
45     {
46         debug(dhtslib_debug) { writeln("TabixIndexedFile dtor"); }
47         tbx_destroy(this.tbx);
48 
49         if ( hts_close(this.fp) ) writefln("hts_close returned non-zero status: %s\n", fromStringz(this.fp.fn));
50     }
51 
52     private void loadHeader()
53     {
54         kstring_t str;
55         scope(exit) { free(str.s); }
56 
57         while ( hts_getline(this.fp, '\n', &str) >= 0 )
58         {
59             if ( !str.l || str.s[0] != this.tbx.conf.meta_char ) break;
60             this.header ~= fromStringz(str.s) ~ '\n';
61         }
62 
63         debug(dhtslib_debug) { writeln("end loadHeader"); }
64     }
65 
66     /// tbx.d: const(char **) tbx_seqnames(tbx_t *tbx, int *n);  // free the array but not the values
67     @property string[] sequenceNames()
68     {
69         // TODO: Check for memory leaks (free the array after copy into sequence_names)
70         int nseqs;
71 
72         string[] sequence_names;
73 
74         const(char **) seqnames = tbx_seqnames(this.tbx, &nseqs);
75 
76         for(int i; i<nseqs; i++) {
77             sequence_names ~= cast(string) fromStringz(seqnames[i]);
78         }
79 
80         return sequence_names;
81     }
82 
83     /** region(r)
84      *  returns an InputRange that iterates through rows of the file intersecting or contained within the requested range 
85      */
86     auto region(const(char)[] r)
87     {
88         struct Region {
89 
90             /** TODO: determine how thread-(un)safe this is (i.e., using a potentially shared *fp and *tbx) */
91             private htsFile *fp;
92             private tbx_t   *tbx;
93 
94             private hts_itr_t *itr;
95             private string next;
96 
97             // necessary because the alternative strategy of preloading the first row
98             // leads to problems when Struct inst is blitted ->
99             // re-iterating always returns first row only (since *itr is expended 
100             // but first row was preloaded in this.next)
101             private bool active;
102 
103             this(htsFile *fp, tbx_t *tbx, const(char)[] r)
104             {
105                 this.fp = fp;
106                 this.tbx= tbx;
107 
108                 this.itr = tbx_itr_querys(tbx, toStringz(r) );
109                 debug(dhtslib_debug) { writeln("Region ctor // this.itr: ", this.itr); }
110                 if (this.itr) {
111                     // Load the first record
112                     //this.popFront(); // correction, do not load the first record
113                 }
114                 else {
115                     // TODO handle error
116                     throw new Exception("could not allocate this.itr");
117                 }
118             }
119             ~this()
120             {
121                 debug(dhtslib_debug) { writeln("Region dtor // this.itr: ", this.itr); }
122                 //tbx_itr_destroy(itr);
123                 //free(this.kstr.s);
124             }
125 
126             // had to remove "const" property from empty() due to manipulation of this.active
127             @property bool empty() {
128 
129                 if (!this.active) {
130                     // this is the first call to empty() (and use of the range)
131                     // Let's make it active and attempt to load the first record, if one exists
132                     this.active = true;
133                     this.popFront();
134                 }
135 
136                 if (!this.next) return true;
137                 else return false;
138             }
139 
140             @property string front() const {
141                 return this.next;
142             }
143 
144             void popFront() {
145                 // closure over fp and tbx? (i.e. potentially unsafe?)
146 
147                 // Get next entry
148                 kstring_t kstr;
149                 immutable res = tbx_itr_next(this.fp, this.tbx, this.itr, &kstr);
150                 if (res < 0) {
151                     // we are done
152                     this.next = null;
153                 } else {
154                     // Otherwise load into next
155                     this.next = fromStringz(kstr.s).idup;
156                     free(kstr.s);
157                 }
158             }
159         }
160 
161         return Region(this.fp, this.tbx, r);
162     }
163 
164 }