1 module dhtslib.bgzf;
2 
3 import core.stdc.stdlib: malloc, free;
4 import std.parallelism: totalCPUs;
5 import std.stdio: writeln, writefln;
6 import std.string: fromStringz, toStringz;
7 
8 import dhtslib.htslib.bgzf;
9 import dhtslib.htslib.kstring;
10 
11 /**
12 Encapsulates a bgzipped (block gzipped) file.
13 Implements InputRange interface using htslib calls to bgzf_getline().
14 */
15 struct BGZFile {
16 
17     /// filename; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope
18     private immutable(char)* fn;
19 
20     /// htslib data structure representing the BGZF compressed file/stream fp
21     private BGZF* bgzf;
22 
23     private kstring_t line;
24 
25     // ref counting to prevent closing file multiple times
26     // (free is instead now in popFront instead of dtor)
27     private int rc = 1;
28 
29     // postblit ref counting
30     this(this)
31     {
32         this.rc++;
33     }
34 
35     ///
36     this(string fn)
37     {
38         debug(dhtslib_debug) { writeln("BGZFile ctor"); }
39 
40         // open file
41         this.fn = toStringz(fn);
42         this.bgzf = bgzf_open(this.fn, "r");
43 
44         // enable multi-threading
45         // (only effective if library was compiled with -DBGZF_MT)
46         // int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks);
47         // n_sub_blks : blocks per thread; 64-256 recommended
48         if(totalCPUs > 1) {
49             immutable int ret = bgzf_mt(this.bgzf, totalCPUs, 64);
50             debug(dhtslib_debug) {
51                 writefln("Total CPUs: %d", totalCPUs);
52                 writefln("bgzf_mt() -> %d", ret);
53             }
54         }
55 
56         // Do not prime the range with popFront(),
57         // because otherwise attempting to iterate again will yield the first row (only)
58 
59     }
60     ~this()
61     {
62         debug(dhtslib_debug) { writefln("BGZFile dtor | rc=%d", this.rc); }
63 
64         if(!--rc) {
65             debug(dhtslib_debug) { 
66                 writefln("BGZFile closing file (rc=%d)", rc);
67             }
68             // free(this.line.s) not necessary as should be taken care of in popFront
69             // (or front() if using pre-primed range and fetching each row in popFront)
70             // on top of this, it should never have been malloc'd in this refcount=0 copy
71             if (bgzf_close(this.bgzf) != 0) writefln("hts_close returned non-zero status: %s\n", fromStringz(this.fn));
72         }
73     }
74 
75     /// InputRange interface
76     @property bool empty()
77     {
78         // equivalent to htslib ks_release
79         this.line.l = 0;
80         this.line.m = 0;
81         this.line.s = null;
82         
83         // int bgzf_getline(BGZF *fp, int delim, kstring_t *str);
84         immutable int res = bgzf_getline(this.bgzf, cast(int)'\n', &this.line);
85         return (res < 0 ? true : false);
86     }
87     /// ditto
88     void popFront()
89     {
90 
91         free(this.line.s);
92 
93         // equivalent to htslib ks_release
94         this.line.l = 0;
95         this.line.m = 0;
96         this.line.s = null;
97         
98     }
99     /// ditto
100     string front()
101     {
102         auto ret = fromStringz(this.line.s).idup;
103         return ret;
104     }
105 }