1 /* The MIT License
2 
3    Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
4    Copyright (C) 2013-2014, 2016, 2018-2020 Genome Research Ltd.
5 
6    Permission is hereby granted, free of charge, to any person obtaining
7    a copy of this software and associated documentation files (the
8    "Software"), to deal in the Software without restriction, including
9    without limitation the rights to use, copy, modify, merge, publish,
10    distribute, sublicense, and/or sell copies of the Software, and to
11    permit persons to whom the Software is furnished to do so, subject to
12    the following conditions:
13 
14    The above copyright notice and this permission notice shall be
15    included in all copies or substantial portions of the Software.
16 
17    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24    SOFTWARE.
25 */
26 module htslib.kstring;
27 
28 @system:
29 nothrow:
30 @nogc:
31 
32 import core.stdc.config : c_long;
33 import core.stdc.stdarg;
34 import core.stdc.stdio : EOF;
35 import core.stdc.stdlib;
36 import core.stdc.string : memcpy, strlen;
37 
38 import htslib.kroundup;
39 
40 alias ssize_t = ptrdiff_t;	// should be defined in core.stdc somewhere but is not :/
41 
42 extern (C):
43 
44 // __MINGW_PRINTF_FORMAT
45 
46 enum HAVE___BUILTIN_CLZ = 1;
47 
48 /* kstring_t is a simple non-opaque type whose fields are likely to be
49  * used directly by user code (but see also ks_str() and ks_len() below).
50  * A kstring_t object is initialised by either of
51  *       kstring_t str = KS_INITIALIZE;
52  *       kstring_t str; ...; ks_initialize(&str);
53  * and either ownership of the underlying buffer should be given away before
54  * the object disappears (see ks_release() below) or the kstring_t should be
55  * destroyed with  ks_free(&str) or free(str.s) */
56 
57 alias KSTRING_T = kstring_t;
58 
59 struct kstring_t
60 {
61     size_t l;
62     size_t m;
63     char* s;
64 }
65 
66 struct ks_tokaux_t
67 {
68     ulong[4] tab;
69     int sep;
70     int finished;
71     const(char)* p; // end of the current token
72 }
73 
74 int kvsprintf(kstring_t* s, const(char)* fmt, va_list ap);
75 
76 int ksprintf(kstring_t* s, const(char)* fmt, ...);
77 
78 int kputd(double d, kstring_t* s); // custom %g only handler
79 
80 int ksplit_core(char* s, int delimiter, int* _max, int** _offsets);
81 
82 char* kstrstr(const(char)* str, const(char)* pat, int** _prep);
83 
84 char* kstrnstr(const(char)* str, const(char)* pat, int n, int** _prep);
85 
86 void* kmemmem(const(void)* _str, int n, const(void)* _pat, int m, int** _prep);
87 
88 /* kstrtok() is similar to strtok_r() except that str is not
89 	 * modified and both str and sep can be NULL. For efficiency, it is
90 	 * actually recommended to set both to NULL in the subsequent calls
91 	 * if sep is not changed. */
92 char* kstrtok(const(char)* str, const(char)* sep, ks_tokaux_t* aux);
93 
94 /* kgetline() uses the supplied fgets()-like function to read a "\n"-
95  * or "\r\n"-terminated line from fp.  The line read is appended to the
96  * kstring without its terminator and 0 is returned; EOF is returned at
97  * EOF or on error (determined by querying fp, as per fgets()). */
98 alias kgets_func = char* function(char*, int, void*)*;
99 int kgetline(kstring_t* s, kgets_func fgets_fn, void* fp);
100 
101 /* kgetline2() uses the supplied hgetln()-like function to read a "\n"-
102  * or "\r\n"-terminated line from fp.  The line read is appended to the
103  * ksring without its terminator and 0 is returned; EOF is returned at
104  * EOF or on error (determined by querying fp, as per fgets()). */
105 alias kgets_func2 = c_long function(char*, size_t, void*)*;
106 int kgetline2(kstring_t* s, kgets_func2 fgets_fn, void* fp);
107 
108 /// kstring initializer for structure assignment
109 
110 /// kstring initializer for pointers
111 /**
112    @note Not to be used if the buffer has been allocated.  Use ks_release()
113    or ks_clear() instead.
114 */
115 
116 void ks_initialize(kstring_t* s)
117 {
118     s.l = s.m = 0;
119     s.s = null;
120 }
121 
122 /// Resize a kstring to a given capacity
123 int ks_resize(kstring_t* s, size_t size)
124 {
125 	if (s.m < size) {
126 		char *tmp;
127 		kroundup_size_t(size);
128 		tmp = cast(char*)realloc(s.s, size);
129 		if (!tmp && size)
130 		    return -1;
131 		s.s = tmp;
132 		s.m = size;
133 	}
134 	return 0;
135 }
136 
137 /// Increase kstring capacity by a given number of bytes
138 int ks_expand(kstring_t* s, size_t expansion)
139 {
140     size_t new_size = s.l + expansion;
141 
142     if (new_size < s.l) // Overflow check
143         return -1;
144     return ks_resize(s, new_size);
145 }
146 
147 /// Returns the kstring buffer
148 char* ks_str(kstring_t* s)
149 {
150 	return s.s;
151 }
152 
153 /// Returns the kstring buffer, or an empty string if l == 0
154 /**
155  * Unlike ks_str(), this function will never return NULL.  If the kstring is
156  * empty it will return a read-only empty string.  As the returned value
157  * may be read-only, the caller should not attempt to modify it.
158  */
159 const(char)* ks_c_str(kstring_t* s)
160 {
161     return s.l && s.s ? s.s : "";
162 }
163 
164 size_t ks_len(kstring_t* s)
165 {
166 	return s.l;
167 }
168 
169 /// Reset kstring length to zero
170 /**
171    @return The kstring itself
172 
173    Example use: kputsn(string, len, ks_clear(s))
174 */
175 kstring_t* ks_clear(kstring_t* s)
176 {
177     s.l = 0;
178     return s;
179 }
180 
181 // Give ownership of the underlying buffer away to something else (making
182 // that something else responsible for freeing it), leaving the kstring_t
183 // empty and ready to be used again, or ready to go out of scope without
184 // needing  free(str.s)  to prevent a memory leak.
185 char* ks_release(kstring_t* s)
186 {
187 	char *ss = s.s;
188 	s.l = s.m = 0;
189 	s.s = null;
190 	return ss;
191 }
192 
193 /// Safely free the underlying buffer in a kstring.
194 void ks_free(kstring_t* s)
195 {
196     if (s) {
197         free(s.s);
198         ks_initialize(s);
199     }
200 }
201 
202 int kputsn(const(char)* p, size_t l, kstring_t* s)
203 {
204 	size_t new_sz = s.l + l + 2;
205 	if (new_sz <= s.l || ks_resize(s, new_sz) < 0)
206 		return EOF;
207 	memcpy(s.s + s.l, p, l);
208 	s.l += l;
209 	s.s[s.l] = 0;
210 	return cast(int)l;	// no implicit down casting
211 }
212 
213 int kputs(const(char)* p, kstring_t* s)
214 {
215 	return kputsn(p, strlen(p), s);
216 }
217 
218 int kputc(int c, kstring_t* s)
219 {
220 	if (ks_resize(s, s.l + 2) < 0)
221 		return EOF;
222 	s.s[s.l++] = cast(char)c;	// no implicit down casting
223 	s.s[s.l] = 0;
224 	return cast(ubyte)c;
225 }
226 
227 int kputc_(int c, kstring_t* s)
228 {
229 	if (ks_resize(s, s.l + 1) < 0)
230 		return EOF;
231 	s.s[s.l++] = cast(char)c;	// no implicit down casting
232 	return 1;
233 }
234 
235 int kputsn_(const(void)* p, size_t l, kstring_t* s)
236 {
237 	size_t new_sz = s.l + l;
238 	if (new_sz < s.l || ks_resize(s, new_sz ? new_sz : 1) < 0)
239 		return EOF;
240 	memcpy(s.s + s.l, p, l);
241 	s.l += l;
242 	return cast(int)l;	// no implicit down casting
243 }
244 
245 // htslib 1.10 replaced this function with a higher performance
246 // version using BSR/CTLZ intrinsics . this diverges from klib's
247 // kstring implementation. other functions may have also changed.
248 int kputuw(T)(T x, kstring_t* s){
249 	version(LDC){
250 		static uint[32] kputuw_num_digits = [
251 			10, 10, 10,  9,  9,  9,  8,  8,
252 			8,   7,  7,  7,  7,  6,  6,  6,
253 			5,   5,  5,  4,  4,  4,  4,  3,
254 			3,   3,  2,  2,  2,  1,  1,  1
255 		];
256 		static uint[32] kputuw_thresholds = [
257 			0,        0, 1000000000U, 0,       0, 100000000U,   0,      0,
258 			10000000, 0,          0,  0, 1000000,         0,    0, 100000,
259 			0,        0,      10000,  0,       0,         0, 1000,      0,
260 			0,      100,          0,  0,      10,         0,    0,      0
261 		];
262 	}else{
263 		ulong m;
264 	}
265     static string kputuw_dig2r =
266         "00010203040506070809" ~
267         "10111213141516171819" ~
268         "20212223242526272829" ~
269         "30313233343536373839" ~
270         "40414243444546474849" ~
271         "50515253545556575859" ~
272         "60616263646566676869" ~
273         "70717273747576777879" ~
274         "80818283848586878889" ~
275         "90919293949596979899";
276     uint l, j;
277     char * cp;
278 
279     // Trivial case - also prevents __builtin_clz(0), which is undefined
280     if (x < 10) {
281         if (ks_resize(s, s.l + 2) < 0)
282             return EOF;
283         s.s[s.l++] = cast(char)('0'+x);
284         s.s[s.l] = 0;
285         return 0;
286     }
287 
288     // Find out how many digits are to be printed.
289 	version(LDC){
290 			/*
291 		* Table method - should be quick if clz can be done in hardware.
292 		* Find the most significant bit of the value to print and look
293 		* up in a table to find out how many decimal digits are needed.
294 		* This number needs to be adjusted by 1 for cases where the decimal
295 		* length could vary for a given number of bits (for example,
296 		* a four bit number could be between 8 and 15).
297 		*/
298 		import ldc.intrinsics;
299 
300 		// ldc version of __builtin_clz
301 		l = llvm_ctlz(x,true);
302 		l = kputuw_num_digits[l] - (x < kputuw_thresholds[l]);
303 	}else{
304 	// Fallback for when clz is not available
305 		m = 1;
306 		l = 0;
307 		do {
308 			l++;
309 			m *= 10;
310 		} while (x >= m);
311 	}
312 
313     if (ks_resize(s, s.l + l + 2) < 0)
314         return EOF;
315 
316     // Add digits two at a time
317     j = l;
318     cp = s.s + s.l;
319     while (x >= 10) {
320         const char *d = &kputuw_dig2r[2*(x%100)];
321         x /= 100;
322         memcpy(&cp[j-=2], d, 2);
323     }
324 
325     // Last one (if necessary).  We know that x < 10 by now.
326     if (j == 1)
327         cp[0] = cast(char)(x + '0');
328 
329     s.l += l;
330     s.s[s.l] = 0;
331     return 0;
332 }
333 
334 int kputw(int c, kstring_t* s)
335 {
336     uint x = c;
337     if (c < 0) {
338         x = -x;
339         if (ks_resize(s, s.l + 3) < 0)
340             return EOF;
341         s.s[s.l++] = '-';
342     }
343 
344     return kputuw(x, s);
345 }
346 
347 int kputll(long c, kstring_t* s)
348 {
349 	char[32] buf;
350 	int i, l = 0;
351 	ulong x = c;
352 	if (c < 0) x = -x;
353 	do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
354 	if (c < 0) buf[l++] = '-';
355 	if (ks_resize(s, s.l + l + 2) < 0)
356 		return EOF;
357 	for (i = l - 1; i >= 0; --i) s.s[s.l++] = buf[i];
358 	s.s[s.l] = 0;
359 	return 0;
360 }
361 
362 int kputl(c_long c, kstring_t* s) {
363     return kputll(c, s);
364 }
365 
366 /*
367  * Returns 's' split by delimiter, with *n being the number of components;
368  *         NULL on failure.
369  */
370 int* ksplit(kstring_t* s, int delimiter, int* n)
371 {
372 	int max = 0;
373 	int* offsets = null;
374 	*n = ksplit_core(s.s, delimiter, &max, &offsets);
375 	return offsets;
376 }