1 /* The MIT License
2 
3    Copyright (C) 2011 by Attractive Chaos <attractor@live.co.uk>
4    Copyright (C) 2013-2014, 2016, 2018-2019 Genome Research Ltd.
5 
6    Permission is hereby granted, free of charge, to any person obtaining
7    a copy of this software and associated documentation files (the
8    "Software"), to deal in the Software without restriction, including
9    without limitation the rights to use, copy, modify, merge, publish,
10    distribute, sublicense, and/or sell copies of the Software, and to
11    permit persons to whom the Software is furnished to do so, subject to
12    the following conditions:
13 
14    The above copyright notice and this permission notice shall be
15    included in all copies or substantial portions of the Software.
16 
17    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
21    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
22    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
23    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24    SOFTWARE.
25 */
26 
27 module htslib.kstring;
28 
29 import core.stdc.config : c_long;
30 import core.stdc.stdarg;
31 import core.stdc.stdio : EOF;
32 import core.stdc.stdlib;
33 import core.stdc..string : memcpy, strlen;
34 
35 alias ssize_t = ptrdiff_t;	// should be defined in core.stdc somewhere but is not :/
36 
37 extern (C):
38 
39 // #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
40 
41 /// round 32 or 64 bit (u)int x to power of 2 that is equal or greater (JSB)
42 pragma(inline, true)
43 extern (D)
44 void kroundup_size_t(ref size_t x) {
45 	x -= 1;
46 	x |= (x >> 1);
47 	x |= (x >> 2);
48 	x |= (x >> 4);
49 	x |= (x >> 8);
50 	x |= (x >> 16);
51 
52 	static if (size_t.sizeof == 8)
53         x |= (x >> 32);
54 
55 	++x;
56 }
57 
58 /+
59 #if defined __GNUC__ && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ > 4))
60 #ifdef __MINGW_PRINTF_FORMAT
61 #define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__MINGW_PRINTF_FORMAT, fmt, arg)))
62 #else
63 #define KS_ATTR_PRINTF(fmt, arg) __attribute__((__format__ (__printf__, fmt, arg)))
64 #endif // __MINGW_PRINTF_FORMAT
65 #else
66 #define KS_ATTR_PRINTF(fmt, arg)
67 #endif
68 
69 #ifndef HAVE___BUILTIN_CLZ
70 #if defined __GNUC__ && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4))
71 #define HAVE___BUILTIN_CLZ 1
72 #endif
73 #endif
74 +/
75 
76 /* kstring_t is a simple non-opaque type whose fields are likely to be
77  * used directly by user code (but see also ks_str() and ks_len() below).
78  * A kstring_t object is initialised by either of
79  *       kstring_t str = KS_INITIALIZE;
80  *       kstring_t str; ...; ks_initialize(&str);
81  * and either ownership of the underlying buffer should be given away before
82  * the object disappears (see ks_release() below) or the kstring_t should be
83  * destroyed with  ks_free(&str) or free(str.s) */
84 
85 alias KSTRING_T = kstring_t;
86 
87 struct kstring_t
88 {
89     size_t l;
90     size_t m;
91     char* s;
92 }
93 
94 struct ks_tokaux_t
95 {
96     ulong[4] tab;
97     int sep;
98     int finished;
99     const(char)* p; // end of the current token
100 }
101 
102 int kvsprintf(kstring_t* s, const(char)* fmt, va_list ap);
103 
104 int ksprintf(kstring_t* s, const(char)* fmt, ...);
105 
106 int kputd(double d, kstring_t* s); // custom %g only handler
107 
108 int ksplit_core(char* s, int delimiter, int* _max, int** _offsets);
109 
110 char* kstrstr(const(char)* str, const(char)* pat, int** _prep);
111 
112 char* kstrnstr(const(char)* str, const(char)* pat, int n, int** _prep);
113 
114 void* kmemmem(const(void)* _str, int n, const(void)* _pat, int m, int** _prep);
115 
116 /* kstrtok() is similar to strtok_r() except that str is not
117 	 * modified and both str and sep can be NULL. For efficiency, it is
118 	 * actually recommended to set both to NULL in the subsequent calls
119 	 * if sep is not changed. */
120 char* kstrtok(const(char)* str, const(char)* sep, ks_tokaux_t* aux);
121 
122 /* kgetline() uses the supplied fgets()-like function to read a "\n"-
123 	 * or "\r\n"-terminated line from fp.  The line read is appended to the
124 	 * kstring without its terminator and 0 is returned; EOF is returned at
125 	 * EOF or on error (determined by querying fp, as per fgets()). */
126 alias kgets_func = char* function(char*, int, void*);
127 int kgetline(kstring_t* s, char* function(char*, int, void*) fgets, void* fp);
128 
129 // This matches the signature of hgetln(), apart from the last pointer
130 alias kgets_func2 = c_long function(char*, size_t, void*);
131 int kgetline2(kstring_t* s, ssize_t function(char*, size_t, void*) fgets, void* fp);
132 
133 /// kstring initializer for structure assignment
134 //#define KS_INITIALIZE { 0, 0, NULL }
135 
136 /// kstring initializer for pointers
137 /**
138    @note Not to be used if the buffer has been allocated.  Use ks_release()
139    or ks_clear() instead.
140 */
141 
142 void ks_initialize(kstring_t* s)
143 {
144     s.l = s.m = 0;
145     s.s = null;
146 }
147 
148 /// Resize a kstring to a given capacity
149 int ks_resize(kstring_t* s, size_t size)
150 {
151 	if (s.m < size) {
152 		char *tmp;
153 		kroundup_size_t(size);
154 		tmp = cast(char*)realloc(s.s, size);
155 		if (!tmp && size)
156 		    return -1;
157 		s.s = tmp;
158 		s.m = size;
159 	}
160 	return 0;
161 }
162 
163 /// Increase kstring capacity by a given number of bytes
164 int ks_expand(kstring_t* s, size_t expansion)
165 {
166     size_t new_size = s.l + expansion;
167 
168     if (new_size < s.l) // Overflow check
169         return -1;
170     return ks_resize(s, new_size);
171 }
172 
173 /// Returns the kstring buffer
174 char* ks_str(kstring_t* s)
175 {
176 	return s.s;
177 }
178 
179 /// Returns the kstring buffer, or an empty string if l == 0
180 /**
181  * Unlike ks_str(), this function will never return NULL.  If the kstring is
182  * empty it will return a read-only empty string.  As the returned value
183  * may be read-only, the caller should not attempt to modify it.
184  */
185 const(char)* ks_c_str(kstring_t* s)
186 {
187     return s.l && s.s ? s.s : "";
188 }
189 
190 size_t ks_len(kstring_t* s)
191 {
192 	return s.l;
193 }
194 
195 /// Reset kstring length to zero
196 /**
197    @return The kstring itself
198 
199    Example use: kputsn(string, len, ks_clear(s))
200 */
201 kstring_t* ks_clear(kstring_t* s)
202 {
203     s.l = 0;
204     return s;
205 }
206 
207 // Give ownership of the underlying buffer away to something else (making
208 // that something else responsible for freeing it), leaving the kstring_t
209 // empty and ready to be used again, or ready to go out of scope without
210 // needing  free(str.s)  to prevent a memory leak.
211 char* ks_release(kstring_t* s)
212 {
213 	char *ss = s.s;
214 	s.l = s.m = 0;
215 	s.s = null;
216 	return ss;
217 }
218 
219 /// Safely free the underlying buffer in a kstring.
220 void ks_free(kstring_t* s)
221 {
222     if (s) {
223         free(s.s);
224         ks_initialize(s);
225     }
226 }
227 
228 int kputsn(const(char)* p, size_t l, kstring_t* s)
229 {
230 	size_t new_sz = s.l + l + 2;
231 	if (new_sz <= s.l || ks_resize(s, new_sz) < 0)
232 		return EOF;
233 	memcpy(s.s + s.l, p, l);
234 	s.l += l;
235 	s.s[s.l] = 0;
236 	return cast(int)l;	// no implicit down casting
237 }
238 
239 int kputs(const(char)* p, kstring_t* s)
240 {
241 	return kputsn(p, strlen(p), s);
242 }
243 
244 int kputc(int c, kstring_t* s)
245 {
246 	if (ks_resize(s, s.l + 2) < 0)
247 		return EOF;
248 	s.s[s.l++] = cast(char)c;	// no implicit down casting
249 	s.s[s.l] = 0;
250 	return cast(ubyte)c;
251 }
252 
253 int kputc_(int c, kstring_t* s)
254 {
255 	if (ks_resize(s, s.l + 1) < 0)
256 		return EOF;
257 	s.s[s.l++] = cast(char)c;	// no implicit down casting
258 	return 1;
259 }
260 
261 int kputsn_(const(void)* p, size_t l, kstring_t* s)
262 {
263 	size_t new_sz = s.l + l;
264 	if (new_sz < s.l || ks_resize(s, new_sz ? new_sz : 1) < 0)
265 		return EOF;
266 	memcpy(s.s + s.l, p, l);
267 	s.l += l;
268 	return cast(int)l;	// no implicit down casting
269 }
270 
271 // htslib 1.10 replaced this function with a higher performance
272 // version using BSR/CTLZ intrinsics . this diverges from klib's
273 // kstring implementation. other functions may have also changed.
274 deprecated("TODO -- need to write or pull in ")
275 int kputuw(uint x, kstring_t* s);
276 
277 int kputw(int c, kstring_t* s)
278 {
279     uint x = c;
280     if (c < 0) {
281         x = -x;
282         if (ks_resize(s, s.l + 3) < 0)
283             return EOF;
284         s.s[s.l++] = '-';
285     }
286 
287     return kputuw(x, s);
288 }
289 
290 int kputll(long c, kstring_t* s)
291 {
292 	char[32] buf;
293 	int i, l = 0;
294 	ulong x = c;
295 	if (c < 0) x = -x;
296 	do { buf[l++] = x%10 + '0'; x /= 10; } while (x > 0);
297 	if (c < 0) buf[l++] = '-';
298 	if (ks_resize(s, s.l + l + 2) < 0)
299 		return EOF;
300 	for (i = l - 1; i >= 0; --i) s.s[s.l++] = buf[i];
301 	s.s[s.l] = 0;
302 	return 0;
303 }
304 
305 int kputl(c_long c, kstring_t* s) {
306     return kputll(c, s);
307 }
308 
309 /*
310  * Returns 's' split by delimiter, with *n being the number of components;
311  *         NULL on failue.
312  */
313 int* ksplit(kstring_t* s, int delimiter, int* n)
314 {
315 	int max = 0;
316 	int* offsets = null;
317 	*n = ksplit_core(s.s, delimiter, &max, &offsets);
318 	return offsets;
319 }
320 
321