1 /**
2 This module provides a wrapper, BGZFile, over an htslib BGZF compressed file/stream.
3 The wrapper acts as a linewise ForwardRange over the file or stream.
4 */
5 
6 
7 module dhtslib.bgzf;
8 
9 import core.stdc.stdlib: malloc, free;
10 import std.parallelism: totalCPUs;
11 import std.stdio: writeln, writefln;
12 import std..string: fromStringz, toStringz;
13 
14 import htslib.bgzf;
15 import htslib.kstring;
16 
17 /**
18 Encapsulates a bgzipped (block gzipped) file.
19 Implements InputRange interface using htslib calls to bgzf_getline().
20 */
21 struct BGZFile {
22 
23     /// filename; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope
24     private immutable(char)* fn;
25 
26     /// htslib data structure representing the BGZF compressed file/stream fp
27     private BGZF* bgzf;
28 
29     private kstring_t line;
30 
31     // ref counting to prevent closing file multiple times
32     // (free is instead now in popFront instead of dtor)
33     private int rc = 1;
34 
35     // postblit ref counting
36     this(this)
37     {
38         this.rc++;
39     }
40 
41     ///
42     this(string fn)
43     {
44         debug(dhtslib_debug) { writeln("BGZFile ctor"); }
45 
46         // open file
47         this.fn = toStringz(fn);
48         this.bgzf = bgzf_open(this.fn, "r");
49 
50         // enable multi-threading
51         // (only effective if library was compiled with -DBGZF_MT)
52         // int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks);
53         // n_sub_blks : blocks per thread; 64-256 recommended
54         if(totalCPUs > 1) {
55             immutable int ret = bgzf_mt(this.bgzf, totalCPUs, 64);
56             debug(dhtslib_debug) {
57                 writefln("Total CPUs: %d", totalCPUs);
58                 writefln("bgzf_mt() -> %d", ret);
59             }
60         }
61 
62         // Do not prime the range with popFront(),
63         // because otherwise attempting to iterate again will yield the first row (only)
64 
65     }
66     ~this()
67     {
68         debug(dhtslib_debug) { writefln("BGZFile dtor | rc=%d", this.rc); }
69 
70         if(!--rc) {
71             debug(dhtslib_debug) { 
72                 writefln("BGZFile closing file (rc=%d)", rc);
73             }
74             // free(this.line.s) not necessary as should be taken care of in popFront
75             // (or front() if using pre-primed range and fetching each row in popFront)
76             // on top of this, it should never have been malloc'd in this refcount=0 copy
77             if (bgzf_close(this.bgzf) != 0) writefln("hts_close returned non-zero status: %s\n", fromStringz(this.fn));
78         }
79     }
80 
81     /// InputRange interface
82     @property bool empty()
83     {
84         // equivalent to htslib ks_release
85         this.line.l = 0;
86         this.line.m = 0;
87         this.line.s = null;
88         
89         // int bgzf_getline(BGZF *fp, int delim, kstring_t *str);
90         immutable int res = bgzf_getline(this.bgzf, cast(int)'\n', &this.line);
91         return (res < 0 ? true : false);
92     }
93     /// ditto
94     void popFront()
95     {
96 
97         free(this.line.s);
98 
99         // equivalent to htslib ks_release
100         this.line.l = 0;
101         this.line.m = 0;
102         this.line.s = null;
103         
104     }
105     /// ditto
106     string front()
107     {
108         auto ret = fromStringz(this.line.s).idup;
109         return ret;
110     }
111 }