1 module dhtslib.util; 2 3 import std.string : toStringz; 4 import std.typecons : tuple; 5 6 import core.stdc.stdlib : malloc; 7 import core.stdc.stdio : SEEK_SET; 8 import dhtslib.coordinates; 9 import dhtslib.memory; 10 import htslib; 11 12 /// Returns tuple of String and ZBHO coordinates 13 /// representing input string. Supports htslib coordinate strings. 14 /// i.e chr1:1-10 15 auto getIntervalFromString(string region){ 16 ZBHO coords; 17 auto regStr = toStringz(region); 18 auto ptr = hts_parse_reg64(regStr,&coords.start.pos,&coords.end.pos); 19 if(!ptr){ 20 throw new Exception("Region string could not be parsed"); 21 } 22 auto contig = region[0..ptr - regStr]; 23 return tuple!("contig","interval")(contig,coords); 24 } 25 26 debug(dhtslib_unittest) unittest 27 { 28 auto reg = getIntervalFromString("chrom1:0-100"); 29 assert(reg.contig == "chrom1"); 30 auto c0 = reg.interval; 31 assert(c0 == Interval!(CoordSystem.zbho)(0, 100)); 32 } 33 34 /// TODO: complete getIntervalFromString with the ability to check headers 35 36 // auto getIntervalFromString(Header)(string region, Header h) 37 // if(is(Header == SAMHeader) || is(Header == VCFHeader)) 38 // { 39 // ZBHO coords; 40 // int tid; 41 // auto regStr = toStringz(region); 42 // auto flag = HTS_PARSE_FLAGS.HTS_PARSE_THOUSANDS_SEP; 43 // static if(is(Header == SAMHeader)){ 44 // auto ptr = hts_parse_region(regStr,&tid, &coords.start.pos, &coords.end.pos, cast(hts_name2id_f * )&bam_name2id, h.h, flag); 45 // } else static if(is(Header == VCFHeader)){ 46 // auto ptr = hts_parse_region(regStr,&tid, &coords.start.pos, &coords.end.pos, &bcf_hdr_id2name, h.hdr, flag); 47 // } 48 // if(!ptr){ 49 // throw new Exception("Region string could not be parsed"); 50 // } 51 // return tuple!("tid","interval")(tid, coords); 52 // } 53 54 55 /** 56 All functions below take a SafeHtslibPtr alias as input but return 57 a raw htslib pointer. The returned pointer should be wrapped in 58 a SafeHtslibPtr to maintain safety. 59 60 (We cannot return a new SafeHtslibPtr from these functions because of 61 scope rules with dip1000) 62 */ 63 64 pragma(inline, true){ 65 66 /// deep copy an hts_itr_t 67 /// htslib really needs a copy/dup function for this 68 auto copyHtsItr(HtsItr itr) 69 { 70 /// init 71 auto newItr = cast(hts_itr_t *) malloc(hts_itr_t.sizeof); 72 /// bulk copy data 73 *newItr = *itr; 74 75 /// initialize and copy region list 76 /// if it is not null 77 if(newItr.reg_list){ 78 /// initialize the region lists 79 newItr.reg_list = cast(hts_reglist_t *) malloc(itr.n_reg * hts_reglist_t.sizeof); 80 newItr.reg_list[0 .. newItr.n_reg] = itr.reg_list[0 .. itr.n_reg]; 81 /// for each list 82 for(auto i=0; i < newItr.n_reg; i++) 83 { 84 /// copy all intervals 85 newItr.reg_list[i].intervals = cast(hts_pair_pos_t *) malloc(itr.reg_list[i].count * hts_pair_pos_t.sizeof); 86 newItr.reg_list[i].intervals[0 .. newItr.reg_list[i].count] = itr.reg_list[i].intervals[0 .. itr.reg_list[i].count]; 87 } 88 } 89 90 /// initialize and copy bins list 91 newItr.bins.a = cast(int *) malloc(itr.bins.m * int.sizeof); 92 assert(newItr.bins.m >= newItr.bins.n); 93 newItr.bins.a[0 .. newItr.bins.m] = itr.bins.a[0 .. itr.bins.m]; 94 95 /// initialize and copy off list 96 newItr.off = cast(hts_pair64_max_t *) malloc(itr.n_off * hts_pair64_max_t.sizeof); 97 newItr.off[0 .. newItr.n_off] = itr.off[0 .. itr.n_off]; 98 return newItr; 99 } 100 101 /// copy a htsFile 102 auto copyHtsFile(HtsFile fp){ 103 // collect offset of the file 104 // we would like to copy 105 off_t offset; 106 if(fp.is_bgzf) offset = bgzf_tell(fp.fp.bgzf); 107 else offset = htell(fp.fp.hfile); 108 return copyHtsFile(fp, offset); 109 } 110 111 /// copy a htsFile with custom offset 112 auto copyHtsFile(HtsFile fp, off_t offset) 113 { 114 auto newfp = hts_open(fp.fn, cast(immutable(char)*) "r"); 115 // Need to seek to current offset 116 if (newfp.is_bgzf) 117 bgzf_seek(newfp.fp.bgzf, offset, SEEK_SET); 118 else 119 hseek(newfp.fp.hfile, offset, SEEK_SET); 120 return newfp; 121 } 122 123 /// copy a BGZF 124 auto copyBgzf(Bgzf fp, const(char) * fn) 125 { 126 // collect offset of the file 127 // we would like to copy 128 auto offset = bgzf_tell(fp); 129 return copyBgzf(fp, offset, fn); 130 } 131 132 /// copy a BGZF with custom offset 133 auto copyBgzf(Bgzf fp, off_t offset, const(char)* fn) 134 { 135 auto newfp = bgzf_open(fn, cast(immutable(char)*) "r"); 136 // Need to seek to current offset 137 bgzf_seek(newfp, offset, SEEK_SET); 138 return newfp; 139 } 140 141 142 /// Copy a kstring_t 143 auto copyKstring(Kstring str) 144 { 145 auto newStr = cast(kstring_t*)malloc(kstring_t.sizeof); 146 newStr.l = str.l; 147 newStr.m = str.m; 148 newStr.s = cast(char*)malloc(str.m); 149 newStr.s[0..newStr.m] = str.s[0..str.m]; 150 return newStr; 151 } 152 153 /// create a kstring_t * 154 auto initKstring() 155 { 156 return cast(kstring_t*)malloc(kstring_t.sizeof); 157 } 158 }