1 module dhtslib.util;
2 
3 import std.string : toStringz;
4 import std.typecons : tuple;
5 
6 import core.stdc.stdlib : malloc;
7 import core.stdc.stdio : SEEK_SET;
8 import dhtslib.coordinates;
9 import dhtslib.memory;
10 import htslib;
11 
12 /// Returns tuple of String and ZBHO coordinates
13 /// representing input string. Supports htslib coordinate strings.
14 /// i.e chr1:1-10
15 auto getIntervalFromString(string region){
16     ZBHO coords;
17     auto regStr = toStringz(region);
18     auto ptr = hts_parse_reg64(regStr,&coords.start.pos,&coords.end.pos);
19     if(!ptr){
20         throw new Exception("Region string could not be parsed");
21     }
22     auto contig = region[0..ptr - regStr];
23     return tuple!("contig","interval")(contig,coords);
24 }
25 
26 debug(dhtslib_unittest) unittest
27 {
28     auto reg = getIntervalFromString("chrom1:0-100");
29     assert(reg.contig == "chrom1");
30     auto c0 = reg.interval;
31     assert(c0 == Interval!(CoordSystem.zbho)(0, 100));
32 }
33 
34 /// TODO: complete getIntervalFromString with the ability to check headers
35 
36 // auto getIntervalFromString(Header)(string region, Header h)
37 //     if(is(Header == SAMHeader) || is(Header == VCFHeader))
38 // {
39 //     ZBHO coords;
40 //     int tid;
41 //     auto regStr = toStringz(region);
42 //     auto flag =  HTS_PARSE_FLAGS.HTS_PARSE_THOUSANDS_SEP;
43 //     static if(is(Header == SAMHeader)){
44 //         auto ptr = hts_parse_region(regStr,&tid, &coords.start.pos, &coords.end.pos, cast(hts_name2id_f * )&bam_name2id, h.h, flag);
45 //     } else static if(is(Header == VCFHeader)){
46 //         auto ptr = hts_parse_region(regStr,&tid, &coords.start.pos, &coords.end.pos, &bcf_hdr_id2name, h.hdr, flag);
47 //     }
48 //     if(!ptr){
49 //         throw new Exception("Region string could not be parsed");
50 //     }
51 //     return tuple!("tid","interval")(tid, coords);
52 // }
53 
54 
55 /**
56     All functions below take a SafeHtslibPtr alias as input but return
57     a raw htslib pointer. The returned pointer should be wrapped in
58     a SafeHtslibPtr to maintain safety.
59 
60     (We cannot return a new SafeHtslibPtr from these functions because of 
61     scope rules with dip1000)
62 */
63 
64 pragma(inline, true){
65 
66     /// deep copy an hts_itr_t
67     /// htslib really needs a copy/dup function for this
68     auto copyHtsItr(HtsItr itr) 
69     {
70         /// init
71         auto newItr = cast(hts_itr_t *) malloc(hts_itr_t.sizeof);
72         /// bulk copy data
73         *newItr = *itr;
74 
75         /// initialize and copy region list
76         /// if it is not null
77         if(newItr.reg_list){
78             /// initialize the region lists
79             newItr.reg_list = cast(hts_reglist_t *) malloc(itr.n_reg * hts_reglist_t.sizeof);
80             newItr.reg_list[0 .. newItr.n_reg] = itr.reg_list[0 .. itr.n_reg];
81             /// for each list
82             for(auto i=0; i < newItr.n_reg; i++)
83             {
84                 /// copy all intervals
85                 newItr.reg_list[i].intervals = cast(hts_pair_pos_t *) malloc(itr.reg_list[i].count * hts_pair_pos_t.sizeof);
86                 newItr.reg_list[i].intervals[0 .. newItr.reg_list[i].count] = itr.reg_list[i].intervals[0 .. itr.reg_list[i].count];
87             }
88         }
89 
90         /// initialize and copy bins list
91         newItr.bins.a = cast(int *) malloc(itr.bins.m * int.sizeof);
92         assert(newItr.bins.m >= newItr.bins.n);
93         newItr.bins.a[0 .. newItr.bins.m] = itr.bins.a[0 .. itr.bins.m];
94 
95         /// initialize and copy off list
96         newItr.off = cast(hts_pair64_max_t *) malloc(itr.n_off * hts_pair64_max_t.sizeof);
97         newItr.off[0 .. newItr.n_off] = itr.off[0 .. itr.n_off];
98         return newItr;
99     }
100 
101     /// copy a htsFile
102     auto copyHtsFile(HtsFile fp){
103         // collect offset of the file
104         // we would like to copy
105         off_t offset;
106         if(fp.is_bgzf) offset = bgzf_tell(fp.fp.bgzf);
107         else offset = htell(fp.fp.hfile);
108         return copyHtsFile(fp, offset);
109     }
110 
111     /// copy a htsFile with custom offset
112     auto copyHtsFile(HtsFile fp, off_t offset)
113     {
114         auto newfp = hts_open(fp.fn, cast(immutable(char)*) "r");
115         // Need to seek to current offset
116         if (newfp.is_bgzf)
117             bgzf_seek(newfp.fp.bgzf, offset, SEEK_SET);
118         else
119             hseek(newfp.fp.hfile, offset, SEEK_SET);
120         return newfp;
121     }
122 
123     /// copy a BGZF
124     auto copyBgzf(Bgzf fp, const(char) * fn)
125     {
126         // collect offset of the file
127         // we would like to copy
128         auto offset = bgzf_tell(fp);
129         return copyBgzf(fp, offset, fn);
130     }
131 
132     /// copy a BGZF with custom offset
133     auto copyBgzf(Bgzf fp, off_t offset, const(char)* fn)
134     {
135         auto newfp = bgzf_open(fn, cast(immutable(char)*) "r");
136         // Need to seek to current offset
137         bgzf_seek(newfp, offset, SEEK_SET);
138         return newfp;
139     }
140 
141 
142     /// Copy a kstring_t
143     auto copyKstring(Kstring str)
144     {
145         auto newStr = cast(kstring_t*)malloc(kstring_t.sizeof);
146         newStr.l = str.l;
147         newStr.m = str.m;
148         newStr.s = cast(char*)malloc(str.m);
149         newStr.s[0..newStr.m] = str.s[0..str.m];
150         return newStr;
151     }
152 
153     /// create a kstring_t *
154     auto initKstring()
155     {
156         return cast(kstring_t*)malloc(kstring_t.sizeof);
157     }
158 }