1 /* The MIT License 2 3 Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk> 4 Copyright (C) 2013-2014, 2016, 2018-2020 Genome Research Ltd. 5 6 Permission is hereby granted, free of charge, to any person obtaining 7 a copy of this software and associated documentation files (the 8 "Software"), to deal in the Software without restriction, including 9 without limitation the rights to use, copy, modify, merge, publish, 10 distribute, sublicense, and/or sell copies of the Software, and to 11 permit persons to whom the Software is furnished to do so, subject to 12 the following conditions: 13 14 The above copyright notice and this permission notice shall be 15 included in all copies or substantial portions of the Software. 16 17 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 18 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 19 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 20 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 21 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 22 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 23 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 24 SOFTWARE. 25 */ 26 module htslib.kstring; 27 28 @system: 29 nothrow: 30 @nogc: 31 32 import core.stdc.config : c_long; 33 import core.stdc.stdarg; 34 import core.stdc.stdio : EOF; 35 import core.stdc.stdlib; 36 import core.stdc.string : memcpy, strlen; 37 38 import htslib.kroundup; 39 40 alias ssize_t = ptrdiff_t; // should be defined in core.stdc somewhere but is not :/ 41 42 extern (C): 43 44 // __MINGW_PRINTF_FORMAT 45 46 enum HAVE___BUILTIN_CLZ = 1; 47 48 /* kstring_t is a simple non-opaque type whose fields are likely to be 49 * used directly by user code (but see also ks_str() and ks_len() below). 50 * A kstring_t object is initialised by either of 51 * kstring_t str = KS_INITIALIZE; 52 * kstring_t str; ...; ks_initialize(&str); 53 * and either ownership of the underlying buffer should be given away before 54 * the object disappears (see ks_release() below) or the kstring_t should be 55 * destroyed with ks_free(&str) or free(str.s) */ 56 57 alias KSTRING_T = kstring_t; 58 59 struct kstring_t 60 { 61 size_t l; 62 size_t m; 63 char* s; 64 } 65 66 struct ks_tokaux_t 67 { 68 ulong[4] tab; 69 int sep; 70 int finished; 71 const(char)* p; // end of the current token 72 } 73 74 int kvsprintf(kstring_t* s, const(char)* fmt, va_list ap); 75 76 int ksprintf(kstring_t* s, const(char)* fmt, ...); 77 78 int kputd(double d, kstring_t* s); // custom %g only handler 79 80 int ksplit_core(char* s, int delimiter, int* _max, int** _offsets); 81 82 char* kstrstr(const(char)* str, const(char)* pat, int** _prep); 83 84 char* kstrnstr(const(char)* str, const(char)* pat, int n, int** _prep); 85 86 void* kmemmem(const(void)* _str, int n, const(void)* _pat, int m, int** _prep); 87 88 /* kstrtok() is similar to strtok_r() except that str is not 89 * modified and both str and sep can be NULL. For efficiency, it is 90 * actually recommended to set both to NULL in the subsequent calls 91 * if sep is not changed. */ 92 char* kstrtok(const(char)* str, const(char)* sep, ks_tokaux_t* aux); 93 94 /* kgetline() uses the supplied fgets()-like function to read a "\n"- 95 * or "\r\n"-terminated line from fp. The line read is appended to the 96 * kstring without its terminator and 0 is returned; EOF is returned at 97 * EOF or on error (determined by querying fp, as per fgets()). */ 98 alias kgets_func = char* function(char*, int, void*)*; 99 int kgetline(kstring_t* s, kgets_func fgets_fn, void* fp); 100 101 /* kgetline2() uses the supplied hgetln()-like function to read a "\n"- 102 * or "\r\n"-terminated line from fp. The line read is appended to the 103 * ksring without its terminator and 0 is returned; EOF is returned at 104 * EOF or on error (determined by querying fp, as per fgets()). */ 105 alias kgets_func2 = c_long function(char*, size_t, void*)*; 106 int kgetline2(kstring_t* s, kgets_func2 fgets_fn, void* fp); 107 108 /// kstring initializer for structure assignment 109 110 /// kstring initializer for pointers 111 /** 112 @note Not to be used if the buffer has been allocated. Use ks_release() 113 or ks_clear() instead. 114 */ 115 116 void ks_initialize(kstring_t* s) 117 { 118 s.l = s.m = 0; 119 s.s = null; 120 } 121 122 /// Resize a kstring to a given capacity 123 int ks_resize(kstring_t* s, size_t size) 124 { 125 if (s.m < size) { 126 char *tmp; 127 kroundup_size_t(size); 128 tmp = cast(char*)realloc(s.s, size); 129 if (!tmp && size) 130 return -1; 131 s.s = tmp; 132 s.m = size; 133 } 134 return 0; 135 } 136 137 /// Increase kstring capacity by a given number of bytes 138 int ks_expand(kstring_t* s, size_t expansion) 139 { 140 size_t new_size = s.l + expansion; 141 142 if (new_size < s.l) // Overflow check 143 return -1; 144 return ks_resize(s, new_size); 145 } 146 147 /// Returns the kstring buffer 148 char* ks_str(kstring_t* s) 149 { 150 return s.s; 151 } 152 153 /// Returns the kstring buffer, or an empty string if l == 0 154 /** 155 * Unlike ks_str(), this function will never return NULL. If the kstring is 156 * empty it will return a read-only empty string. As the returned value 157 * may be read-only, the caller should not attempt to modify it. 158 */ 159 const(char)* ks_c_str(kstring_t* s) 160 { 161 return s.l && s.s ? s.s : ""; 162 } 163 164 size_t ks_len(kstring_t* s) 165 { 166 return s.l; 167 } 168 169 /// Reset kstring length to zero 170 /** 171 @return The kstring itself 172 173 Example use: kputsn(string, len, ks_clear(s)) 174 */ 175 kstring_t* ks_clear(kstring_t* s) 176 { 177 s.l = 0; 178 return s; 179 } 180 181 // Give ownership of the underlying buffer away to something else (making 182 // that something else responsible for freeing it), leaving the kstring_t 183 // empty and ready to be used again, or ready to go out of scope without 184 // needing free(str.s) to prevent a memory leak. 185 char* ks_release(kstring_t* s) 186 { 187 char *ss = s.s; 188 s.l = s.m = 0; 189 s.s = null; 190 return ss; 191 } 192 193 /// Safely free the underlying buffer in a kstring. 194 void ks_free(kstring_t* s) 195 { 196 if (s) { 197 free(s.s); 198 ks_initialize(s); 199 } 200 } 201 202 int kputsn(const(char)* p, size_t l, kstring_t* s) 203 { 204 size_t new_sz = s.l + l + 2; 205 if (new_sz <= s.l || ks_resize(s, new_sz) < 0) 206 return EOF; 207 memcpy(s.s + s.l, p, l); 208 s.l += l; 209 s.s[s.l] = 0; 210 return cast(int)l; // no implicit down casting 211 } 212 213 int kputs(const(char)* p, kstring_t* s) 214 { 215 return kputsn(p, strlen(p), s); 216 } 217 218 int kputc(int c, kstring_t* s) 219 { 220 if (ks_resize(s, s.l + 2) < 0) 221 return EOF; 222 s.s[s.l++] = cast(char)c; // no implicit down casting 223 s.s[s.l] = 0; 224 return cast(ubyte)c; 225 } 226 227 int kputc_(int c, kstring_t* s) 228 { 229 if (ks_resize(s, s.l + 1) < 0) 230 return EOF; 231 s.s[s.l++] = cast(char)c; // no implicit down casting 232 return 1; 233 } 234 235 int kputsn_(const(void)* p, size_t l, kstring_t* s) 236 { 237 size_t new_sz = s.l + l; 238 if (new_sz < s.l || ks_resize(s, new_sz ? new_sz : 1) < 0) 239 return EOF; 240 memcpy(s.s + s.l, p, l); 241 s.l += l; 242 return cast(int)l; // no implicit down casting 243 } 244 245 // htslib 1.10 replaced this function with a higher performance 246 // version using BSR/CTLZ intrinsics . this diverges from klib's 247 // kstring implementation. other functions may have also changed. 248 int kputuw(T)(T x, kstring_t* s){ 249 version(LDC){ 250 static uint[32] kputuw_num_digits = [ 251 10, 10, 10, 9, 9, 9, 8, 8, 252 8, 7, 7, 7, 7, 6, 6, 6, 253 5, 5, 5, 4, 4, 4, 4, 3, 254 3, 3, 2, 2, 2, 1, 1, 1 255 ]; 256 static uint[32] kputuw_thresholds = [ 257 0, 0, 1000000000U, 0, 0, 100000000U, 0, 0, 258 10000000, 0, 0, 0, 1000000, 0, 0, 100000, 259 0, 0, 10000, 0, 0, 0, 1000, 0, 260 0, 100, 0, 0, 10, 0, 0, 0 261 ]; 262 }else{ 263 ulong m; 264 } 265 static string kputuw_dig2r = 266 "00010203040506070809" ~ 267 "10111213141516171819" ~ 268 "20212223242526272829" ~ 269 "30313233343536373839" ~ 270 "40414243444546474849" ~ 271 "50515253545556575859" ~ 272 "60616263646566676869" ~ 273 "70717273747576777879" ~ 274 "80818283848586878889" ~ 275 "90919293949596979899"; 276 uint l, j; 277 char * cp; 278 279 // Trivial case - also prevents __builtin_clz(0), which is undefined 280 if (x < 10) { 281 if (ks_resize(s, s.l + 2) < 0) 282 return EOF; 283 s.s[s.l++] = cast(char)('0'+x); 284 s.s[s.l] = 0; 285 return 0; 286 } 287 288 // Find out how many digits are to be printed. 289 version(LDC){ 290 /* 291 * Table method - should be quick if clz can be done in hardware. 292 * Find the most significant bit of the value to print and look 293 * up in a table to find out how many decimal digits are needed. 294 * This number needs to be adjusted by 1 for cases where the decimal 295 * length could vary for a given number of bits (for example, 296 * a four bit number could be between 8 and 15). 297 */ 298 import ldc.intrinsics; 299 300 // ldc version of __builtin_clz 301 l = llvm_ctlz(x,true); 302 l = kputuw_num_digits[l] - (x < kputuw_thresholds[l]); 303 }else{ 304 // Fallback for when clz is not available 305 m = 1; 306 l = 0; 307 do { 308 l++; 309 m *= 10; 310 } while (x >= m); 311 } 312 313 if (ks_resize(s, s.l + l + 2) < 0) 314 return EOF; 315 316 // Add digits two at a time 317 j = l; 318 cp = s.s + s.l; 319 while (x >= 10) { 320 const char *d = &kputuw_dig2r[2*(x%100)]; 321 x /= 100; 322 memcpy(&cp[j-=2], d, 2); 323 } 324 325 // Last one (if necessary). We know that x < 10 by now. 326 if (j == 1) 327 cp[0] = cast(char)(x + '0'); 328 329 s.l += l; 330 s.s[s.l] = 0; 331 return 0; 332 } 333 334 int kputw(int c, kstring_t* s) 335 { 336 uint x = c; 337 if (c < 0) { 338 x = -x; 339 if (ks_resize(s, s.l + 3) < 0) 340 return EOF; 341 s.s[s.l++] = '-'; 342 } 343 344 return kputuw(x, s); 345 } 346 347 int kputll(long c, kstring_t* s) 348 { 349 char[32] buf; 350 int i, l = 0; 351 ulong x = c; 352 if (c < 0) x = -x; 353 do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0); 354 if (c < 0) buf[l++] = '-'; 355 if (ks_resize(s, s.l + l + 2) < 0) 356 return EOF; 357 for (i = l - 1; i >= 0; --i) s.s[s.l++] = buf[i]; 358 s.s[s.l] = 0; 359 return 0; 360 } 361 362 int kputl(c_long c, kstring_t* s) { 363 return kputll(c, s); 364 } 365 366 /* 367 * Returns 's' split by delimiter, with *n being the number of components; 368 * NULL on failure. 369 */ 370 int* ksplit(kstring_t* s, int delimiter, int* n) 371 { 372 int max = 0; 373 int* offsets = null; 374 *n = ksplit_core(s.s, delimiter, &max, &offsets); 375 return offsets; 376 }