1 /* The MIT License 2 3 Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk> 4 Copyright (C) 2013-2014, 2016, 2018-2019 Genome Research Ltd. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice shall be 15 included in all copies or substantial portions of the Software. 16 17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 SOFTWARE. 25 */ 26 27 module htslib.kstring; 28 29 import core.stdc.config : c_long; 30 import core.stdc.stdarg; 31 import core.stdc.stdio : EOF; 32 import core.stdc.stdlib; 33 import core.stdc..string : memcpy, strlen; 34 35 alias ssize_t = ptrdiff_t; // should be defined in core.stdc somewhere but is not :/ 36 37 extern (C): 38 39 // #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 40 41 /// round 32 or 64 bit (u)int x to power of 2 that is equal or greater (JSB) 42 pragma(inline, true) 43 extern (D) 44 void kroundup_size_t(ref size_t x) { 45 x -= 1; 46 x |= (x >> 1); 47 x |= (x >> 2); 48 x |= (x >> 4); 49 x |= (x >> 8); 50 x |= (x >> 16); 51 52 static if (size_t.sizeof == 8) 53 x |= (x >> 32); 54 55 ++x; 56 } 57 58 /+ 59 #if defined __GNUC__ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4)) 60 #ifdef __MINGW_PRINTF_FORMAT 61 #define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__MINGW_PRINTF_FORMAT, fmt, arg))) 62 #else 63 #define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__printf__, fmt, arg))) 64 #endif // __MINGW_PRINTF_FORMAT 65 #else 66 #define KS_ATTR_PRINTF(fmt, arg) 67 #endif 68 69 #ifndef HAVE___BUILTIN_CLZ 70 #if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)) 71 #define HAVE___BUILTIN_CLZ 1 72 #endif 73 #endif 74 +/ 75 76 /* kstring_t is a simple non-opaque type whose fields are likely to be 77 * used directly by user code (but see also ks_str() and ks_len() below). 78 * A kstring_t object is initialised by either of 79 * kstring_t str = KS_INITIALIZE; 80 * kstring_t str; ...; ks_initialize(&str); 81 * and either ownership of the underlying buffer should be given away before 82 * the object disappears (see ks_release() below) or the kstring_t should be 83 * destroyed with ks_free(&str) or free(str.s) */ 84 85 alias KSTRING_T = kstring_t; 86 87 struct kstring_t 88 { 89 size_t l; 90 size_t m; 91 char* s; 92 } 93 94 struct ks_tokaux_t 95 { 96 ulong[4] tab; 97 int sep; 98 int finished; 99 const(char)* p; // end of the current token 100 } 101 102 int kvsprintf(kstring_t* s, const(char)* fmt, va_list ap); 103 104 int ksprintf(kstring_t* s, const(char)* fmt, ...); 105 106 int kputd(double d, kstring_t* s); // custom %g only handler 107 108 int ksplit_core(char* s, int delimiter, int* _max, int** _offsets); 109 110 char* kstrstr(const(char)* str, const(char)* pat, int** _prep); 111 112 char* kstrnstr(const(char)* str, const(char)* pat, int n, int** _prep); 113 114 void* kmemmem(const(void)* _str, int n, const(void)* _pat, int m, int** _prep); 115 116 /* kstrtok() is similar to strtok_r() except that str is not 117 * modified and both str and sep can be NULL. For efficiency, it is 118 * actually recommended to set both to NULL in the subsequent calls 119 * if sep is not changed. */ 120 char* kstrtok(const(char)* str, const(char)* sep, ks_tokaux_t* aux); 121 122 /* kgetline() uses the supplied fgets()-like function to read a "\n"- 123 * or "\r\n"-terminated line from fp. The line read is appended to the 124 * kstring without its terminator and 0 is returned; EOF is returned at 125 * EOF or on error (determined by querying fp, as per fgets()). */ 126 alias kgets_func = char* function(char*, int, void*); 127 int kgetline(kstring_t* s, char* function(char*, int, void*) fgets, void* fp); 128 129 // This matches the signature of hgetln(), apart from the last pointer 130 alias kgets_func2 = c_long function(char*, size_t, void*); 131 int kgetline2(kstring_t* s, ssize_t function(char*, size_t, void*) fgets, void* fp); 132 133 /// kstring initializer for structure assignment 134 //#define KS_INITIALIZE { 0, 0, NULL } 135 136 /// kstring initializer for pointers 137 /** 138 @note Not to be used if the buffer has been allocated. Use ks_release() 139 or ks_clear() instead. 140 */ 141 142 void ks_initialize(kstring_t* s) 143 { 144 s.l = s.m = 0; 145 s.s = null; 146 } 147 148 /// Resize a kstring to a given capacity 149 int ks_resize(kstring_t* s, size_t size) 150 { 151 if (s.m < size) { 152 char *tmp; 153 kroundup_size_t(size); 154 tmp = cast(char*)realloc(s.s, size); 155 if (!tmp && size) 156 return -1; 157 s.s = tmp; 158 s.m = size; 159 } 160 return 0; 161 } 162 163 /// Increase kstring capacity by a given number of bytes 164 int ks_expand(kstring_t* s, size_t expansion) 165 { 166 size_t new_size = s.l + expansion; 167 168 if (new_size < s.l) // Overflow check 169 return -1; 170 return ks_resize(s, new_size); 171 } 172 173 /// Returns the kstring buffer 174 char* ks_str(kstring_t* s) 175 { 176 return s.s; 177 } 178 179 /// Returns the kstring buffer, or an empty string if l == 0 180 /** 181 * Unlike ks_str(), this function will never return NULL. If the kstring is 182 * empty it will return a read-only empty string. As the returned value 183 * may be read-only, the caller should not attempt to modify it. 184 */ 185 const(char)* ks_c_str(kstring_t* s) 186 { 187 return s.l && s.s ? s.s : ""; 188 } 189 190 size_t ks_len(kstring_t* s) 191 { 192 return s.l; 193 } 194 195 /// Reset kstring length to zero 196 /** 197 @return The kstring itself 198 199 Example use: kputsn(string, len, ks_clear(s)) 200 */ 201 kstring_t* ks_clear(kstring_t* s) 202 { 203 s.l = 0; 204 return s; 205 } 206 207 // Give ownership of the underlying buffer away to something else (making 208 // that something else responsible for freeing it), leaving the kstring_t 209 // empty and ready to be used again, or ready to go out of scope without 210 // needing free(str.s) to prevent a memory leak. 211 char* ks_release(kstring_t* s) 212 { 213 char *ss = s.s; 214 s.l = s.m = 0; 215 s.s = null; 216 return ss; 217 } 218 219 /// Safely free the underlying buffer in a kstring. 220 void ks_free(kstring_t* s) 221 { 222 if (s) { 223 free(s.s); 224 ks_initialize(s); 225 } 226 } 227 228 int kputsn(const(char)* p, size_t l, kstring_t* s) 229 { 230 size_t new_sz = s.l + l + 2; 231 if (new_sz <= s.l || ks_resize(s, new_sz) < 0) 232 return EOF; 233 memcpy(s.s + s.l, p, l); 234 s.l += l; 235 s.s[s.l] = 0; 236 return cast(int)l; // no implicit down casting 237 } 238 239 int kputs(const(char)* p, kstring_t* s) 240 { 241 return kputsn(p, strlen(p), s); 242 } 243 244 int kputc(int c, kstring_t* s) 245 { 246 if (ks_resize(s, s.l + 2) < 0) 247 return EOF; 248 s.s[s.l++] = cast(char)c; // no implicit down casting 249 s.s[s.l] = 0; 250 return cast(ubyte)c; 251 } 252 253 int kputc_(int c, kstring_t* s) 254 { 255 if (ks_resize(s, s.l + 1) < 0) 256 return EOF; 257 s.s[s.l++] = cast(char)c; // no implicit down casting 258 return 1; 259 } 260 261 int kputsn_(const(void)* p, size_t l, kstring_t* s) 262 { 263 size_t new_sz = s.l + l; 264 if (new_sz < s.l || ks_resize(s, new_sz ? new_sz : 1) < 0) 265 return EOF; 266 memcpy(s.s + s.l, p, l); 267 s.l += l; 268 return cast(int)l; // no implicit down casting 269 } 270 271 // htslib 1.10 replaced this function with a higher performance 272 // version using BSR/CTLZ intrinsics . this diverges from klib's 273 // kstring implementation. other functions may have also changed. 274 deprecated("TODO -- need to write or pull in ") 275 int kputuw(uint x, kstring_t* s); 276 277 int kputw(int c, kstring_t* s) 278 { 279 uint x = c; 280 if (c < 0) { 281 x = -x; 282 if (ks_resize(s, s.l + 3) < 0) 283 return EOF; 284 s.s[s.l++] = '-'; 285 } 286 287 return kputuw(x, s); 288 } 289 290 int kputll(long c, kstring_t* s) 291 { 292 char[32] buf; 293 int i, l = 0; 294 ulong x = c; 295 if (c < 0) x = -x; 296 do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0); 297 if (c < 0) buf[l++] = '-'; 298 if (ks_resize(s, s.l + l + 2) < 0) 299 return EOF; 300 for (i = l - 1; i >= 0; --i) s.s[s.l++] = buf[i]; 301 s.s[s.l] = 0; 302 return 0; 303 } 304 305 int kputl(c_long c, kstring_t* s) { 306 return kputll(c, s); 307 } 308 309 /* 310 * Returns 's' split by delimiter, with *n being the number of components; 311 * NULL on failue. 312 */ 313 int* ksplit(kstring_t* s, int delimiter, int* n) 314 { 315 int max = 0; 316 int* offsets = null; 317 *n = ksplit_core(s.s, delimiter, &max, &offsets); 318 return offsets; 319 } 320 321