1 // htslib-1.9 hts.h as D module
2 // Changes include:
3 // In D, const on either LHS or RHS of function declaration applies to the function, not return value, unless parents included:
4 // changed ^const <type> <fnname> to ^const(<type>) <fnname>
5 /*  *      aliased typedef'd function pointers */
6 /*  *       changed C-style arrays (eg line 339, extern const char seq_nt16_str[];) to char[] seq_nt16_str */
7 /*  *           as update to above, seq_nt16_str needs to be char[16], as C and D style char[] imply different things */
8 module htslib.hts;
9 
10 import std.bitmanip;
11 
12 extern (C):
13 /// @file htslib/hts.h
14 /// Format-neutral I/O, indexing, and iterator API functions.
15 /*
16     Copyright (C) 2012-2019 Genome Research Ltd.
17     Copyright (C) 2010, 2012 Broad Institute.
18     Portions copyright (C) 2003-2006, 2008-2010 by Heng Li <lh3@live.co.uk>
19 
20     Author: Heng Li <lh3@sanger.ac.uk>
21 
22 Permission is hereby granted, free of charge, to any person obtaining a copy
23 of this software and associated documentation files (the "Software"), to deal
24 in the Software without restriction, including without limitation the rights
25 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
26 copies of the Software, and to permit persons to whom the Software is
27 furnished to do so, subject to the following conditions:
28 
29 The above copyright notice and this permission notice shall be included in
30 all copies or substantial portions of the Software.
31 
32 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
35 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
37 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
38 DEALINGS IN THE SOFTWARE.  */
39 
40 //#include <stddef.h>
41 import core.stdc.stdint;
42 /+
43 #include "hts_defs.h"
44 #include "hts_log.h"
45 +/
46 
47 // Separator used to split HTS_PATH (for plugins); REF_PATH (cram references)
48 version(Windows)
49 {
50     enum HTS_PATH_SEPARATOR_CHAR = ';';
51     enum HTS_PATH_SEPARATOR_STR = ";";
52 }
53 else
54 {
55     enum HTS_PATH_SEPARATOR_CHAR = ':';
56     enum HTS_PATH_SEPARATOR_STR = ":";
57 }
58 
59 import htslib.bgzf;
60 
61 /// see cram.h, sam.h, sam.d
62 struct cram_fd; // @suppress(dscanner.style.phobos_naming_convention)
63 /// see hfile.d
64 //struct hFILE; // @suppress(dscanner.style.phobos_naming_convention)
65 import htslib.hfile: hFILE;
66 /// see thread_pool.d
67 struct hts_tpool; // @suppress(dscanner.style.phobos_naming_convention)
68 import htslib.sam : sam_hdr_t;
69 
70 import htslib.kstring: kstring_t;
71 
72 /+
73 #ifndef kroundup32
74 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
75 #endif
76 
77 /**
78  * @hideinitializer
79  * Deprecated macro to expand a dynamic array of a given type
80  *
81  * @param         type_t The type of the array elements
82  * @param[in]     n      Requested number of elements of type type_t
83  * @param[in,out] m      Size of memory allocated
84  * @param[in,out] ptr    Pointer to the array
85  *
86  * @discussion
87  * Do not use this macro.  Use hts_resize() instead as allows allocation
88  * failures to be handled more gracefully.
89  *
90  * The array *ptr will be expanded if necessary so that it can hold @p n
91  * or more elements.  If the array is expanded then the new size will be
92  * written to @p m and the value in @p ptr may change.
93  *
94  * It must be possible to take the address of @p ptr and @p m must be usable
95  * as an lvalue.
96  *
97  * @bug
98  * If the memory allocation fails, this will call exit(1).  This is
99  * not ideal behaviour in a library.
100  */
101 #define hts_expand(type_t, n, m, ptr) do {                              \
102         if ((n) > (m)) {                                                \
103             size_t hts_realloc_or_die(size_t, size_t, size_t, size_t,   \
104                                       int, void **, const char *);      \
105             (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \
106                                      sizeof(type_t),  0,                \
107                                      (void **)&(ptr), __func__);        \
108         }                                                               \
109     } while (0)
110 
111 /**
112  * @hideinitializer
113  * Macro to expand a dynamic array, zeroing any newly-allocated memory
114  *
115  * @param         type_t The type of the array elements
116  * @param[in]     n      Requested number of elements of type type_t
117  * @param[in,out] m      Size of memory allocated
118  * @param[in,out] ptr    Pointer to the array
119  *
120  * @discussion
121  * Do not use this macro.  Use hts_resize() instead as allows allocation
122  * failures to be handled more gracefully.
123  * 
124  * As for hts_expand(), except the bytes that make up the array elements
125  * between the old and new values of @p m are set to zero using memset().
126  *
127  * @bug
128  * If the memory allocation fails, this will call exit(1).  This is
129  * not ideal behaviour in a library.
130  */
131 
132 
133 #define hts_expand0(type_t, n, m, ptr) do {                             \
134         if ((n) > (m)) {                                                \
135             size_t hts_realloc_or_die(size_t, size_t, size_t, size_t,   \
136                                       int, void **, const char *);      \
137             (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \
138                                      sizeof(type_t), 1,                 \
139                                      (void **)&(ptr), __func__);        \
140         }                                                               \
141     } while (0)
142 +/
143 
144 // For internal use (by hts_resize()) only
145 int hts_resize_array_(size_t, size_t, size_t, void *, void **, int,
146                       const(char) *);
147 
148 enum HTS_RESIZE_CLEAR = 1;
149 
150 /**
151  * @hideinitializer
152  * Template fn to expand a dynamic array of a given type
153  *
154  * @param         T         The type of the array elements
155  * @param[in]     num       Requested number of elements of type T 
156  * @param[in,out] size      Pointer to where the size (in elements) of the
157                             array is stored.
158  * @param[in,out] ptr       Location of the pointer to the array
159  * @param[in]     flags     Option flags
160  *
161  * @return        0 for success, or negative if an error occurred.
162  *
163  * @discussion
164  * The array *ptr will be expanded if necessary so that it can hold @p num
165  * or more elements.  If the array is expanded then the new size will be
166  * written to @p *size_ptr and the value in @p *ptr may change.
167  *
168  * If ( @p flags & HTS_RESIZE_CLEAR ) is set, any newly allocated memory will
169  * be cleared.
170  */
171 
172 pragma(inline,true)
173 int hts_resize(T)(size_t num, ref size_t size, T* ptr, int flags)
174 {
175     return (num > size)
176         ? hts_resize_array_(T.sizeof, num, size_t.sizeof, &size, cast(void **)&ptr, flags, __FUNCTION__)
177         : 0;
178 }
179 
180 /**
181  * Wrapper function for free(). Enables memory deallocation across DLL
182  * boundary. Should be used by all applications, which are compiled
183  * with a different standard library than htslib and call htslib
184  * methods that return dynamically allocated data.
185  */
186 void hts_free(void *ptr);
187 
188 /************
189  * File I/O *
190  ************/
191 
192 // Add new entries only at the end (but before the *_maximum entry)
193 // of these enums, as their numbering is part of the htslib ABI.
194 
195 /// Broad format category (sequence data, variant data, index, regions, etc.)
196 enum htsFormatCategory { // @suppress(dscanner.style.phobos_naming_convention)
197     unknown_category,
198     sequence_data,    // Sequence data -- SAM, BAM, CRAM, etc
199     variant_data,     // Variant calling data -- VCF, BCF, etc
200     index_file,       // Index file associated with some data file
201     region_list,      // Coordinate intervals or regions -- BED, etc
202     category_maximum = 32_767
203 }
204 
205 /// Specific format (SAM, BAM, CRAM, BCF, VCF, TBI, BED, etc.)
206 enum htsExactFormat { // @suppress(dscanner.style.phobos_naming_convention)
207     unknown_format,
208     binary_format, text_format,
209     sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed,
210     htsget,
211     //deprecated("Use htsExactFormat 'htsget' instead") json = htsget,
212     empty_format,  // File is empty (or empty after decompression)
213     fasta_format, fastq_format, fai_format, fqi_format,
214     format_maximum = 32_767
215 }
216 
217 /// Compression type
218 enum htsCompression { // @suppress(dscanner.style.phobos_naming_convention)
219     no_compression, gzip, bgzf, custom, bzip2_compression,
220     compression_maximum = 32_767
221 }
222 
223 /// hts file complete file format information
224 // NB: version is a reserved keyword in D -- changed to "vers"
225 struct htsFormat { // @suppress(dscanner.style.phobos_naming_convention)
226     htsFormatCategory category; /// Broad format category (sequence data, variant data, index, regions, etc.)
227     htsExactFormat format;      /// Specific format (SAM, BAM, CRAM, BCF, VCF, TBI, BED, etc.)
228     /// format version
229     struct Vers { short major, minor; } // @suppress(dscanner.style.undocumented_declaration)
230     Vers v; /// format version
231     htsCompression compression; /// Compression type
232     short compression_level;/// currently unused
233     void *specific;         /// format specific options; see struct hts_opt.
234 }
235 
236 /// index data (opaque)
237 struct __hts_idx_t;
238 alias hts_idx_t = __hts_idx_t;
239 
240 // Maintainers note htsFile cannot be an opaque structure because some of its
241 // fields are part of libhts.so's ABI (hence these fields must not be moved):
242 //  - fp is used in the public sam_itr_next()/etc macros
243 //  - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1
244 //  - is_write and is_cram are used directly in samtools <= 1.1
245 //  - fp is used directly in samtools (up to and including current develop)
246 //  - line is used directly in bcftools (up to and including current develop)
247 /// Data and metadata for an hts file; part of public and private ABI
248 struct htsFile { // @suppress(dscanner.style.phobos_naming_convention)
249     pragma(msg, "htsFile: bitfield order assumed starting with LSB");
250     //uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, is_bgzf:1, dummy:27;
251     mixin(bitfields!(
252         bool, "is_bin", 1,
253         bool, "is_write", 1,
254         bool, "is_be", 1,
255         bool, "is_cram", 1,
256         bool, "is_bgzf", 1,
257         uint, "padding27", 27 ));
258     int64_t lineno; /// uncompressed(?) file line no.
259     kstring_t line; /// buffer to hold line
260     char *fn;       /// filename
261     char *fn_aux;   /// auxillary (i.e, index) file name
262     /// hFile plus any needed bgzf or CRAM (if applicable) structure data
263     union FP {
264         BGZF *bgzf;     /// see bgzf.d
265         cram_fd *cram;  /// see cram.d
266         hFILE *hfile;   /// see hfile.d
267     }
268     FP fp;              /// hFile plus any needed bgzf or CRAM (if applicable) structure data
269     void *state;        /// format specific state information
270     htsFormat format;   /// hts file complete file format information
271     hts_idx_t *idx;
272     const(char) *fnidx;
273     sam_hdr_t *bam_header;
274 }
275 
276 /// A combined thread pool and queue allocation size.
277 /// The pool should already be defined, but qsize may be zero to
278 /// indicate an appropriate queue size is taken from the pool.
279 ///
280 /// Reasons for explicitly setting it could be where many more file
281 /// descriptors are in use than threads, so keeping memory low is
282 /// important.
283 struct htsThreadPool { // @suppress(dscanner.style.phobos_naming_convention)
284     hts_tpool *pool;/// The shared thread pool itself
285     int qsize;      /// Size of I/O queue to use for this fp
286 }
287 
288 /// REQUIRED_FIELDS
289 enum sam_fields { // @suppress(dscanner.style.phobos_naming_convention)
290     SAM_QNAME = 0x00000001,
291     SAM_FLAG  = 0x00000002,
292     SAM_RNAME = 0x00000004,
293     SAM_POS   = 0x00000008,
294     SAM_MAPQ  = 0x00000010,
295     SAM_CIGAR = 0x00000020,
296     SAM_RNEXT = 0x00000040,
297     SAM_PNEXT = 0x00000080,
298     SAM_TLEN  = 0x00000100,
299     SAM_SEQ   = 0x00000200,
300     SAM_QUAL  = 0x00000400,
301     SAM_AUX   = 0x00000800,
302     SAM_RGAUX = 0x00001000,
303 }
304 
305 /// Mostly CRAM only, but this could also include other format options
306 enum hts_fmt_option { // @suppress(dscanner.style.phobos_naming_convention)
307     // CRAM specific
308     CRAM_OPT_DECODE_MD,
309     CRAM_OPT_PREFIX,
310     CRAM_OPT_VERBOSITY,  /// obsolete, use hts_set_log_level() instead
311     CRAM_OPT_SEQS_PER_SLICE,
312     CRAM_OPT_SLICES_PER_CONTAINER,
313     CRAM_OPT_RANGE,
314     CRAM_OPT_VERSION,    /// rename to cram_version?
315     CRAM_OPT_EMBED_REF,
316     CRAM_OPT_IGNORE_MD5,
317     CRAM_OPT_REFERENCE,  // make general
318     CRAM_OPT_MULTI_SEQ_PER_SLICE,
319     CRAM_OPT_NO_REF,
320     CRAM_OPT_USE_BZIP2,
321     CRAM_OPT_SHARED_REF,
322     CRAM_OPT_NTHREADS,   /// deprecated, use HTS_OPT_NTHREADS
323     CRAM_OPT_THREAD_POOL,/// make general
324     CRAM_OPT_USE_LZMA,
325     CRAM_OPT_USE_RANS,
326     CRAM_OPT_REQUIRED_FIELDS,
327     CRAM_OPT_LOSSY_NAMES,
328     CRAM_OPT_BASES_PER_SLICE,
329     CRAM_OPT_STORE_MD,
330     CRAM_OPT_STORE_NM,
331 
332     // General purpose
333     HTS_OPT_COMPRESSION_LEVEL = 100,
334     HTS_OPT_NTHREADS,
335     HTS_OPT_THREAD_POOL,
336     HTS_OPT_CACHE_SIZE,
337     HTS_OPT_BLOCK_SIZE,
338 }
339 
340 /// For backwards compatibility
341 alias cram_option = hts_fmt_option;
342 
343 /// Options for cache, (de)compression, threads, CRAM, etc.
344 struct hts_opt { // @suppress(dscanner.style.phobos_naming_convention)
345     char *arg;          /// string form, strdup()ed
346     hts_fmt_option opt; /// tokenised key
347     /// option value
348     union VAL {         /// ... and value
349         int i;          /// int value
350         char *s;        /// string value
351     }
352     VAL val;            /// value
353     hts_opt *next;      /// next option (linked list)
354 }
355 
356 //#define HTS_FILE_OPTS_INIT {{0},0}
357 // Not apparently used in htslib-1.7
358 
359 /**
360  * Explicit index file name delimiter, see below
361  */
362 enum HTS_IDX_DELIM = "##idx##";
363 
364 
365 /**********************
366  * Exported functions *
367  **********************/
368 
369 /**
370  * Parses arg and appends it to the option list.
371  *
372  * Returns 0 on success;
373  *        -1 on failure.
374  */
375 int hts_opt_add(hts_opt **opts, const(char) *c_arg);
376 
377 /**
378  * Applies an hts_opt option list to a given htsFile.
379  *
380  * Returns 0 on success
381  *        -1 on failure
382  */
383 int hts_opt_apply(htsFile *fp, hts_opt *opts);
384 
385 /**
386  * Frees an hts_opt list.
387  */
388 void hts_opt_free(hts_opt *opts);
389 
390 /**
391  * Accepts a string file format (sam, bam, cram, vcf, bam) optionally
392  * followed by a comma separated list of key=value options and splits
393  * these up into the fields of htsFormat struct.
394  *
395  * Returns 0 on success
396  *        -1 on failure.
397  */
398 int hts_parse_format(htsFormat *opt, const(char) *str);
399 
400 /**
401  * Tokenise options as (key(=value)?,)*(key(=value)?)?
402  * NB: No provision for ',' appearing in the value!
403  * Add backslashing rules?
404  *
405  * This could be used as part of a general command line option parser or
406  * as a string concatenated onto the file open mode.
407  *
408  * Returns 0 on success
409  *        -1 on failure.
410  */
411 int hts_parse_opt_list(htsFormat *opt, const(char) *str);
412 
413 /**! @abstract Table for converting a nucleotide character to 4-bit encoding.
414 The input character may be either an IUPAC ambiguity code, '=' for 0, or
415 '0'/'1'/'2'/'3' for a result of 1/2/4/8.  The result is encoded as 1/2/4/8
416 for A/C/G/T or combinations of these bits for ambiguous bases.
417 */
418 
419 version(Windows){
420     const (char)[256] seq_nt16_table = [
421         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
422         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
423         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
424         1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,
425         15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
426         15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
427         15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
428         15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
429 
430         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
431         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
432         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
433         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
434         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
435         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
436         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
437         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
438     ];
439 }else{
440     extern const(char)[256] seq_nt16_table;
441 }
442 
443 /**! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC
444 ambiguity code letter (or '=' when given 0).
445 */
446 
447 version(Windows) __gshared const (char)[16] seq_nt16_str = ['=','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N'];
448 else extern __gshared const(char)[16] seq_nt16_str;
449 /**! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits.
450 Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous).
451 */
452 version(Windows) const (int)[16] seq_nt16_int = [ 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 ];
453 else extern const int[] seq_nt16_int;
454 /**!
455   @abstract  Get the htslib version number
456   @return    For released versions, a string like "N.N[.N]"; or git describe
457   output if using a library built within a Git repository.
458 */
459 const(char) *hts_version();
460 
461 /*!
462   @abstract  Compile-time HTSlib version number, for use in #if checks
463   @return    For released versions X.Y[.Z], an integer of the form XYYYZZ;
464   useful for preprocessor conditionals such as
465       #if HTS_VERSION >= 101000  // Check for v1.10 or later
466 */
467 // Maintainers: Bump this in the final stage of preparing a new release.
468 // Immediately after release, bump ZZ to 90 to distinguish in-development
469 // Git repository builds from the release; you may wish to increment this
470 // further when significant features are merged.
471 enum HTS_VERSION = 101000; // @suppress(dscanner.style.number_literals)
472 
473 /**!
474   @abstract    Determine format by peeking at the start of a file
475   @param fp    File opened for reading, positioned at the beginning
476   @param fmt   Format structure that will be filled out on return
477   @return      0 for success, or negative if an error occurred.
478 */
479 int hts_detect_format(hFILE *fp, htsFormat *fmt);
480 
481 /**!
482   @abstract    Get a human-readable description of the file format
483   @param fmt   Format structure holding type, version, compression, etc.
484   @return      Description string, to be freed by the caller after use.
485 */
486 char *hts_format_description(const(htsFormat) *format);
487 
488 /**!
489   @abstract       Open a sequence data (SAM/BAM/CRAM) or variant data (VCF/BCF)
490                   or possibly-compressed textual line-orientated file
491   @param fn       The file name or "-" for stdin/stdout. For indexed files
492                   with a non-standard naming, the file name can include the
493                   name of the index file delimited with HTS_IDX_DELIM
494   @param mode     Mode matching / [rwa][bceguxz0-9]* /
495   @discussion
496       With 'r' opens for reading; any further format mode letters are ignored
497       as the format is detected by checking the first few bytes or BGZF blocks
498       of the file.  With 'w' or 'a' opens for writing or appending, with format
499       specifier letters:
500         b  binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc)
501         c  CRAM format
502         g  gzip compressed
503         u  uncompressed
504         z  bgzf compressed
505         [0-9]  zlib compression level
506       and with non-format option letters (for any of 'r'/'w'/'a'):
507         e  close the file on exec(2) (opens with O_CLOEXEC, where supported)
508         x  create the file exclusively (opens with O_EXCL, where supported)
509       Note that there is a distinction between 'u' and '0': the first yields
510       plain uncompressed output whereas the latter outputs uncompressed data
511       wrapped in the zlib format.
512   @example
513       [rw]b  .. compressed BCF, BAM, FAI
514       [rw]bu .. uncompressed BCF
515       [rw]z  .. compressed VCF
516       [rw]   .. uncompressed VCF
517 */
518 htsFile *hts_open(const(char) *fn, const(char) *mode);
519 
520 /**!
521   @abstract       Open a SAM/BAM/CRAM/VCF/BCF/etc file
522   @param fn       The file name or "-" for stdin/stdout
523   @param mode     Open mode, as per hts_open()
524   @param fmt      Optional format specific parameters
525   @discussion
526       See hts_open() for description of fn and mode.
527       // TODO Update documentation for s/opts/fmt/
528       Opts contains a format string (sam, bam, cram, vcf, bcf) which will,
529       if defined, override mode.  Opts also contains a linked list of hts_opt
530       structures to apply to the open file handle.  These can contain things
531       like pointers to the reference or information on compression levels,
532       block sizes, etc.
533 */
534 htsFile *hts_open_format(const(char) *fn, const(char) *mode, const(htsFormat) *fmt);
535 
536 /**!
537   @abstract       Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file
538   @param fn       The already-open file handle
539   @param mode     Open mode, as per hts_open()
540 */
541 htsFile *hts_hopen(hFILE *fp, const(char) *fn, const(char) *mode);
542 
543 /**!
544   @abstract  Close a file handle, flushing buffered data for output streams
545   @param fp  The file handle to be closed
546   @return    0 for success, or negative if an error occurred.
547 */
548 int hts_close(htsFile *fp);
549 
550 /**!
551   @abstract  Returns the file's format information
552   @param fp  The file handle
553   @return    Read-only pointer to the file's htsFormat.
554 */
555 const(htsFormat *) hts_get_format(htsFile *fp);
556 
557 /**!
558   @ abstract      Returns a string containing the file format extension.
559   @ param format  Format structure containing the file type.
560   @ return        A string ("sam", "bam", etc) or "?" for unknown formats.
561  */
562 const(char *) hts_format_file_extension(const(htsFormat) *format);
563 
564 /**!
565   @abstract  Sets a specified CRAM option on the open file handle.
566   @param fp  The file handle open the open file.
567   @param opt The CRAM_OPT_* option.
568   @param ... Optional arguments, dependent on the option used.
569   @return    0 for success, or negative if an error occurred.
570 */
571 int hts_set_opt(htsFile *fp, hts_fmt_option opt, ...);
572 
573 /// ?Get line as string from line-oriented flat file (undocumented in hts.h)
574 int hts_getline(htsFile *fp, int delimiter, kstring_t *str);
575 /// ?Get _n lines into buffer from line-oriented flat file; sets _n as number read (undocumented in hts.h)
576 char **hts_readlines(const(char) *fn, int *_n);
577 
578 /**!
579     @abstract       Parse comma-separated list or read list from a file
580     @param list     File name or comma-separated list
581     @param is_file
582     @param _n       Size of the output array (number of items read)
583     @return         NULL on failure or pointer to newly allocated array of
584                     strings
585 */
586 char **hts_readlist(const(char) *fn, int is_file, int *_n);
587 
588 /**!
589   @abstract  Create extra threads to aid compress/decompression for this file
590   @param fp  The file handle
591   @param n   The number of worker threads to create
592   @return    0 for success, or negative if an error occurred.
593   @notes     This function creates non-shared threads for use solely by fp.
594              The hts_set_thread_pool function is the recommended alternative.
595 */
596 int hts_set_threads(htsFile *fp, int n);
597 
598 /**!
599   @abstract  Create extra threads to aid compress/decompression for this file
600   @param fp  The file handle
601   @param p   A pool of worker threads, previously allocated by hts_create_threads().
602   @return    0 for success, or negative if an error occurred.
603 */
604 int hts_set_thread_pool(htsFile *fp, htsThreadPool *p);
605 
606 /**!
607   @abstract  Adds a cache of decompressed blocks, potentially speeding up seeks.
608              This may not work for all file types (currently it is bgzf only).
609   @param fp  The file handle
610   @param n   The size of cache, in bytes
611 */
612 void hts_set_cache_size(htsFile *fp, int n);
613 
614 /**!
615   @abstract  Set .fai filename for a file opened for reading
616   @return    0 for success, negative on failure
617   @discussion
618       Called before *_hdr_read(), this provides the name of a .fai file
619       used to provide a reference list if the htsFile contains no @SQ headers.
620 */
621 int hts_set_fai_filename(htsFile *fp, const(char) *fn_aux);
622 
623 
624 /**!
625   @abstract  Determine whether a given htsFile contains a valid EOF block
626   @return    3 for a non-EOF checkable filetype;
627              2 for an unseekable file type where EOF cannot be checked;
628              1 for a valid EOF block;
629              0 for if the EOF marker is absent when it should be present;
630             -1 (with errno set) on failure
631   @discussion
632       Check if the BGZF end-of-file (EOF) marker is present
633 */
634 int hts_check_EOF(htsFile *fp);
635 
636 /************
637  * Indexing *
638  ************/
639 
640 /*!
641 These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc,
642 producing iterators operating as follows:
643  - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file
644  - HTS_IDX_START  iterates over the entire file
645  - HTS_IDX_REST   iterates from the current position to the end of the file
646  - HTS_IDX_NONE   always returns "no more alignment records"
647 When one of these special tid values is used, beg and end are ignored.
648 When REST or NONE is used, idx is also ignored and may be NULL.
649 */
650 enum HTS_IDX_NOCOOR = (-2); /// iterates over unmapped reads sorted at the end of the file
651 enum HTS_IDX_START  = (-3); /// iterates over the entire file
652 enum HTS_IDX_REST   = (-4); /// iterates from the current position to the end of the file
653 enum HTS_IDX_NONE   = (-5); /// always returns "no more alignment records"
654 
655 enum int HTS_FMT_CSI = 0;   /// coordinate-sorted index (new)
656 enum int HTS_FMT_BAI = 1;   /// BAM index (old)
657 enum int HTS_FMT_TBI = 2;   /// Tabix index
658 enum int HTS_FMT_CRAI= 3;   /// CRAM index (not sure if superceded by CSI?)
659 
660 // Almost INT64_MAX, but when cast into a 32-bit int it's
661 // also INT_MAX instead of -1.  This avoids bugs with old code
662 // using the new hts_pos_t data type.
663 enum HTS_POS_MAX = (((cast(int64_t)int.max)<<32)|int.max);
664 enum HTS_POS_MIN = int64_t.min;
665 //enum PRIhts_pos = PRId64;
666 alias hts_pos_t = int64_t;
667 
668 // For comparison with previous release:
669 //
670 // #define HTS_POS_MAX INT_MAX
671 // #define HTS_POS_MIN INT_MIN
672 // #define PRIhts_pos PRId32
673 // typedef int32_t hts_pos_t;
674 
675 struct hts_pair_pos_t {
676     hts_pos_t beg, end;
677 }
678 
679 alias hts_pair32_t = hts_pair_pos_t;    // For backwards compatibility
680 
681 struct hts_pair64_t { // @suppress(dscanner.style.phobos_naming_convention)
682     /// start, end coordinates (64-bit)
683     ulong u, v;
684 }
685 
686 /// 64-bit start, end coordinate pair tracking max (internally used in hts.c)
687 struct hts_pair64_max_t { // @suppress(dscanner.style.phobos_naming_convention)
688     /// ?
689     ulong u, v;
690     /// ?
691     ulong max;
692 }
693 
694 /// Region list used in iterators (NB: apparently confined to single contig/tid)
695 struct hts_reglist_t { // @suppress(dscanner.style.phobos_naming_convention)
696     const(char) *reg;   /// Region string
697     hts_pair_pos_t *intervals;  /// (start,end) intervals
698     int tid;            /// Contig id
699     uint32_t count;                 /// How many intervals
700     /// absolute bounds
701     hts_pos_t min_beg, max_end;
702 }
703 
704 //typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, int *beg, int *end);
705 alias hts_readrec_func = int function(BGZF *fp, void *data, void *r, int *tid, hts_pos_t *beg, hts_pos_t *end);
706 
707 //typedef int hts_seek_func(void *fp, int64_t offset, int where);
708 alias hts_seek_func = int function(void *fp, ulong offset, int where);
709 
710 //typedef int64_t hts_tell_func(void *fp);
711 alias hts_tell_func = long function(void *fp);
712 
713 /// iterator
714 struct hts_itr_t { // @suppress(dscanner.style.phobos_naming_convention)
715     pragma(msg, "hts_itr_t: bitfield order assumed starting with LSB");
716     // uint32_t read_rest:1, finished:1, is_cram:1, dummy:29;
717     mixin(bitfields!(
718         bool, "read_rest", 1,
719         bool, "finished",  1,
720         bool, "is_cram",   1,
721         bool, "nocoor",    1,
722         bool, "multi",     1,
723         uint, "dummy27", 27));
724     /// iterator position data
725     int tid, n_off, i, n_reg;
726     hts_pos_t beg, end;
727     hts_reglist_t *reg_list;
728     int curr_tid, curr_reg, curr_intv;
729     hts_pos_t curr_beg, curr_end;
730     uint64_t curr_off, nocoor_off;
731     hts_pair64_max_t *off;  /// ? (start, end) offset
732     hts_readrec_func *readrec;  /// record parsing fn pointer
733     hts_seek_func *seek;
734     hts_tell_func *tell;
735     /// ???
736     struct Bins {
737         /// ???
738         int n, m;
739         int *a; /// ???
740     }
741     Bins bins;  /// ???
742 }
743 
744 /// ? index key
745 struct aux_key_t { // @suppress(dscanner.style.phobos_naming_convention)
746     int key;    /// ???
747     /// ???
748     ulong min_off, max_off;
749 }
750 
751 alias hts_itr_multi_t = hts_itr_t;
752 
753     pragma(inline, true)
754     {
755         /// ???
756         auto hts_bin_first(T)(T l) { return (((1<<(((l)<<1) + (l))) - 1) / 7); }    //     #define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7)
757         /// ???
758         auto hts_bin_parent(T)(T l){ return (((l) - 1) >> 3); }                     //     #define hts_bin_parent(l) (((l) - 1) >> 3)
759     }
760 
761 /+
762     /// Initialize index
763     hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls);
764     /// Destroy index
765     void hts_idx_destroy(hts_idx_t *idx);
766     /// Add to index
767     int hts_idx_push(hts_idx_t *idx, int tid, int beg, int end, uint64_t offset, int is_mapped);
768     /// ?finalize index
769     void hts_idx_finish(hts_idx_t *idx, uint64_t final_offset);
770 +/
771 ///////////////////////////////////////////////////////////
772 // Low-level API for building indexes.
773 
774 /// Create a BAI/CSI/TBI type index structure
775 /** @param n          Initial number of targets
776     @param fmt        Format, one of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI
777     @param offset0    Initial file offset
778     @param min_shift  Number of bits for the minimal interval
779     @param n_lvls     Number of levels in the binning index
780     @return An initialised hts_idx_t struct on success; NULL on failure
781 
782 The struct returned by a successful call should be freed via hts_idx_destroy()
783 when it is no longer needed.
784 */
785 hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls);
786 
787 /// Free a BAI/CSI/TBI type index
788 /** @param idx   Index structure to free
789  */
790 void hts_idx_destroy(hts_idx_t *idx);
791 
792 /// Push an index entry
793 /** @param idx        Index
794     @param tid        Target id
795     @param beg        Range start (zero-based)
796     @param end        Range end (zero-based, half-open)
797     @param offset     File offset
798     @param is_mapped  Range corresponds to a mapped read
799     @return 0 on success; -1 on failure
800 
801 The @p is_mapped parameter is used to update the n_mapped / n_unmapped counts
802 stored in the meta-data bin.
803  */
804 int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped);
805 
806 /// Finish building an index
807 /** @param idx          Index
808     @param final_offset Last file offset
809     @return 0 on success; non-zero on failure.
810 */
811 int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset);
812 
813 /// Returns index format
814 /** @param idx   Index
815     @return One of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI
816 */
817 int hts_idx_fmt(hts_idx_t *idx);
818 
819 /// Add name to TBI index meta-data
820 /** @param idx   Index
821     @param tid   Target identifier
822     @param name  Target name
823     @return Index number of name in names list on success; -1 on failure.
824 */
825 int hts_idx_tbi_name(hts_idx_t *idx, int tid, const(char) *name);
826 
827 // Index loading and saving
828 
829 
830 
831 /// Save an index to a file
832 /** @param idx  Index to be written
833     @param fn   Input BAM/BCF/etc filename, to which .bai/.csi/etc will be added
834     @param fmt  One of the HTS_FMT_* index formats
835     @return  0 if successful, or negative if an error occurred.
836 */
837 int hts_idx_save(const(hts_idx_t) *idx, const(char) *fn, int fmt);
838 
839 /// Save an index to a specific file
840 /** @param idx    Index to be written
841     @param fn     Input BAM/BCF/etc filename
842     @param fnidx  Output filename, or NULL to add .bai/.csi/etc to @a fn
843     @param fmt    One of the HTS_FMT_* index formats
844     @return  0 if successful, or negative if an error occurred.
845 */
846 int hts_idx_save_as(const(hts_idx_t) *idx, const(char) *fn, const(char) *fnidx, int fmt);
847 
848 
849 /// Load an index file
850 /** @param fn   BAM/BCF/etc filename, to which .bai/.csi/etc will be added or
851                 the extension substituted, to search for an existing index file.
852                 In case of a non-standard naming, the file name can include the
853                 name of the index file delimited with HTS_IDX_DELIM.
854 
855     @param fmt  One of the HTS_FMT_* index formats
856     @return  The index, or NULL if an error occurred.
857 
858 If @p fn contains the string "##idx##" (HTS_IDX_DELIM), the part before
859 the delimiter will be used as the name of the data file and the part after
860 it will be used as the name of the index.
861 
862 Otherwise, this function tries to work out the index name as follows:
863 
864   It will try appending ".csi" to @p fn
865   It will try substituting an existing suffix (e.g. .bam, .vcf) with ".csi"
866   Then, if @p fmt is HTS_FMT_BAI:
867     It will try appending ".bai" to @p fn
868     To will substituting the existing suffix (e.g. .bam) with ".bai"
869   else if @p fmt is HTS_FMT_TBI:
870     It will try appending ".tbi" to @p fn
871     To will substituting the existing suffix (e.g. .vcf) with ".tbi"
872 
873 If the index file is remote (served over a protocol like https), first a check
874 is made to see is a locally cached copy is available.  This is done for all
875 of the possible names listed above.  If a cached copy is not available then
876 the index will be downloaded and stored in the current working directory,
877 with the same name as the remote index.
878 
879     Equivalent to hts_idx_load3(fn, NULL, fmt, HTS_IDX_SAVE_REMOTE);
880 */
881 hts_idx_t *hts_idx_load(const(char) *fn, int fmt);
882 
883 /// Load a specific index file
884 /** @param fn     Input BAM/BCF/etc filename
885     @param fnidx  The input index filename
886     @return  The index, or NULL if an error occurred.
887 
888     Equivalent to hts_idx_load3(fn, fnidx, 0, 0);
889 
890     This function will not attempt to save index files locally.
891 */
892 hts_idx_t *hts_idx_load2(const(char) *fn, const(char) *fnidx);
893 
894 /// Load a specific index file
895 /** @param fn     Input BAM/BCF/etc filename
896     @param fnidx  The input index filename
897     @param fmt    One of the HTS_FMT_* index formats
898     @param flags  Flags to alter behaviour (see description)
899     @return  The index, or NULL if an error occurred.
900 
901     If @p fnidx is NULL, the index name will be derived from @p fn in the
902     same way as hts_idx_load().
903 
904     If @p fnidx is not NULL, @p fmt is ignored.
905 
906     The @p flags parameter can be set to a combination of the following
907     values:
908 
909         HTS_IDX_SAVE_REMOTE   Save a local copy of any remote indexes
910         HTS_IDX_SILENT_FAIL   Fail silently if the index is not present
911 
912     The index struct returned by a successful call should be freed
913     via hts_idx_destroy() when it is no longer needed.
914 */
915 hts_idx_t *hts_idx_load3(const(char) *fn, const(char) *fnidx, int fmt, HTS_IDX_FLAG flags);
916 
917 /// Flags for hts_idx_load3() ( and also sam_idx_load3(), tbx_idx_load3() )
918 enum HTS_IDX_FLAG : int {
919     HTS_IDX_SAVE_REMOTE = 1,
920     HTS_IDX_SILENT_FAIL = 2
921 }
922 
923 ///////////////////////////////////////////////////////////
924 // Functions for accessing meta-data stored in indexes
925 
926 /// Get extra index meta-data
927 /** @param idx    The index
928     @param l_meta Pointer to where the length of the extra data is stored
929     @return Pointer to the extra data if present; NULL otherwise
930 
931     Indexes (both .tbi and .csi) made by tabix include extra data about
932     the indexed file.  The returns a pointer to this data.  Note that the
933     data is stored exactly as it is in the index.  Callers need to interpret
934     the results themselves, including knowing what sort of data to expect;
935     byte swapping etc.
936 */
937 uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta);
938 
939 /// Set extra index meta-data
940 /** @param idx     The index
941     @param l_meta  Length of data
942     @param meta    Pointer to the extra data
943     @param is_copy If not zero, a copy of the data is taken
944     @return 0 on success; -1 on failure (out of memory).
945 
946     Sets the data that is returned by hts_idx_get_meta().
947 
948     If is_copy != 0, a copy of the input data is taken.  If not, ownership of
949     the data pointed to by *meta passes to the index.
950 */
951 int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, int is_copy);
952 
953 /// Get number of mapped and unmapped reads from an index
954 /** @param      idx      Index
955     @param      tid      Target ID
956     @param[out] mapped   Location to store number of mapped reads
957     @param[out] unmapped Location to store number of unmapped reads
958     @return 0 on success; -1 on failure (data not available)
959 
960     BAI and CSI indexes store information on the number of reads for each
961     target that were mapped or unmapped (unmapped reads will generally have
962     a paired read that is mapped to the target).  This function returns this
963     infomation if it is available.
964 
965     @note Cram CRAI indexes do not include this information.
966 */
967 int hts_idx_get_stat(const(hts_idx_t)* idx, int tid, uint64_t* mapped, uint64_t* unmapped);
968 
969 /// Return the number of unplaced reads from an index
970 /** @param idx    Index
971     @return Unplaced reads count
972 
973     Unplaced reads are not linked to any reference (e.g. RNAME is '*' in SAM
974     files).
975 */
976 uint64_t hts_idx_get_n_no_coor(const(hts_idx_t)* idx);
977 
978 ///////////////////////////////////////////////////////////
979 // Region parsing
980 
981 enum HTS_PARSE_FLAGS : int {
982     HTS_PARSE_THOUSANDS_SEP = 1, ///< Ignore ',' separators within numbers
983     HTS_PARSE_ONE_COORD = 2,     ///< chr:pos means chr:pos-pos and not chr:pos-end
984     HTS_PARSE_LIST = 4           ///< Expect a comma separated list of regions. (Disables HTS_PARSE_THOUSANDS_SEP)
985 }
986 
987 /// Parse a numeric string
988 /** The number may be expressed in scientific notation, and optionally may
989     contain commas in the integer part (before any decimal point or E notation).
990     @param str     String to be parsed
991     @param strend  If non-NULL, set on return to point to the first character
992                    in @a str after those forming the parsed number
993     @param flags   Or'ed-together combination of HTS_PARSE_* flags
994     @return  Converted value of the parsed number.
995 
996     When @a strend is NULL, a warning will be printed (if hts_verbose is HTS_LOG_WARNING
997     or more) if there are any trailing characters after the number.
998 */
999 long hts_parse_decimal(const(char) *str, char **strend, HTS_PARSE_FLAGS flags);
1000 
1001 alias hts_name2id_f = int function(void*, const(char)*);
1002 alias hts_id2name_f = const(char)* function(void*, int);
1003 
1004 /// Parse a "CHR:START-END"-style region string
1005 /** @param str  String to be parsed
1006     @param beg  Set on return to the 0-based start of the region
1007     @param end  Set on return to the 1-based end of the region
1008     @return  Pointer to the colon or '\0' after the reference sequence name,
1009              or NULL if @a str could not be parsed.
1010 
1011     NOTE: For compatibility with hts_parse_reg only.
1012     Please use hts_parse_region instead.
1013 */
1014 const(char) *hts_parse_reg64(const(char) *str, hts_pos_t *beg, hts_pos_t *end);
1015 
1016 /// Parse a "CHR:START-END"-style region string
1017 /** @param str  String to be parsed
1018     @param beg  Set on return to the 0-based start of the region
1019     @param end  Set on return to the 1-based end of the region
1020     @return  Pointer to the colon or '\0' after the reference sequence name,
1021              or NULL if @a str could not be parsed.
1022 */
1023 const(char) *hts_parse_reg(const(char) *str, int *beg, int *end);
1024 
1025 /// Parse a "CHR:START-END"-style region string
1026 /** @param str   String to be parsed
1027     @param tid   Set on return (if not NULL) to be reference index (-1 if invalid)
1028     @param beg   Set on return to the 0-based start of the region
1029     @param end   Set on return to the 1-based end of the region
1030     @param getid Function pointer.  Called if not NULL to set tid.
1031     @param hdr   Caller data passed to getid.
1032     @param flags Bitwise HTS_PARSE_* flags listed above.
1033     @return      Pointer to the byte after the end of the entire region
1034                  specifier (including any trailing comma) on success,
1035                  or NULL if @a str could not be parsed.
1036 
1037     A variant of hts_parse_reg which is reference-id aware.  It uses
1038     the iterator name2id callbacks to validate the region tokenisation works.
1039 
1040     This is necessary due to GRCh38 HLA additions which have reference names
1041     like "HLA-DRB1*12:17".
1042 
1043     To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200"
1044     are reference names, quote using curly braces.
1045     Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example.
1046 
1047     Flags are used to control how parsing works, and can be one of the below.
1048 
1049     HTS_PARSE_THOUSANDS_SEP:
1050         Ignore commas in numbers.  For example with this flag 1,234,567
1051         is interpreted as 1234567.
1052 
1053     HTS_PARSE_LIST:
1054         If present, the region is assmed to be a comma separated list and
1055         position parsing will not contain commas (this implicitly
1056         clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal).
1057         On success the return pointer will be the start of the next region, ie
1058         the character after the comma.  (If *ret != '\0' then the caller can
1059         assume another region is present in the list.)
1060 
1061         If not set then positions may contain commas.  In this case the return
1062         value should point to the end of the string, or NULL on failure.
1063 
1064     HTS_PARSE_ONE_COORD:
1065         If present, X:100 is treated as the single base pair region X:100-100.
1066         In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-<end>.
1067         (This is the standard bcftools region convention.)
1068 
1069         When not set X:100 is considered to be X:100-<end> where <end> is
1070         the end of chromosome X (set to INT_MAX here).  X:100- and X:-100 are
1071         invalid.
1072         (This is the standard samtools region convention.)
1073 
1074     Note the supplied string expects 1 based inclusive coordinates, but the
1075     returned coordinates start from 0 and are half open, so pos0 is valid
1076     for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}"
1077 
1078     If NULL is returned, the value in tid mat give additional information
1079     about the error:
1080 
1081         -2   Failed to parse @p hdr; or out of memory
1082         -1   The reference in @p str has mismatched braces, or does not
1083              exist in @p hdr
1084         >= 0 The specified range in @p str could not be parsed
1085 */
1086 const(char) *hts_parse_region(const(char)*s, int *tid, hts_pos_t *beg,
1087                              hts_pos_t *end, hts_name2id_f getid, void *hdr,
1088                              HTS_PARSE_FLAGS flags);
1089 
1090 ///////////////////////////////////////////////////////////
1091 // Generic iterators
1092 //
1093 // These functions provide the low-level infrastructure for iterators.
1094 // Wrappers around these are used to make iterators for specific file types.
1095 // See:
1096 //     htslib/sam.h  for SAM/BAM/CRAM iterators
1097 //     htslib/vcf.h  for VCF/BCF iterators
1098 //     htslib/tbx.h  for files indexed by tabix
1099 
1100 /// Create a single-region iterator
1101 /** @param idx      Index
1102     @param tid      Target ID
1103     @param beg      Start of region
1104     @param end      End of region
1105     @param readrec  Callback to read a record from the input file
1106     @return An iterator on success; NULL on failure
1107 
1108     The iterator struct returned by a successful call should be freed
1109     via hts_itr_destroy() when it is no longer needed.
1110  */
1111 hts_itr_t *hts_itr_query(const(hts_idx_t)*idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func readrec);
1112 
1113 /// Free an iterator
1114 /** @param iter   Iterator to free
1115  */
1116 void hts_itr_destroy(hts_itr_t *iter);
1117 
1118 alias hts_itr_query_func =
1119     hts_itr_t * function(const(hts_idx_t)*idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func readrec);
1120 
1121 /// Create a single-region iterator from a text region specification
1122 /** @param idx       Index
1123     @param reg       Region specifier
1124     @param getid     Callback function to return the target ID for a name
1125     @param hdr       Input file header
1126     @param itr_query Callback function returning an iterator for a numeric tid,
1127                      start and end position
1128     @param readrec   Callback to read a record from the input file
1129     @return An iterator on success; NULL on error
1130 
1131     The iterator struct returned by a successful call should be freed
1132     via hts_itr_destroy() when it is no longer needed.
1133  */
1134 hts_itr_t *hts_itr_querys(const(hts_idx_t) *idx, const(char) *reg, hts_name2id_f getid, void *hdr,
1135     hts_itr_query_func itr_query, hts_readrec_func readrec);
1136 
1137 /// Return the next record from an iterator
1138 /** @param fp      Input file handle
1139     @param iter    Iterator
1140     @param r       Pointer to record placeholder
1141     @param data    Data passed to the readrec callback
1142     @return >= 0 on success, -1 when there is no more data, < -1 on error
1143  */
1144 int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data);
1145 
1146 /// Return a list of target names from an index
1147 /** @param      idx    Index
1148     @param[out] n      Location to store the number of targets
1149     @param      getid  Callback function to get the name for a target ID
1150     @param      hdr    Header from indexed file
1151     @return An array of pointers to the names on success; NULL on failure
1152 
1153     @note The names are pointers into the header data structure.  When cleaning
1154     up, only the array should be freed, not the names.
1155  */
1156 const(char)** hts_idx_seqnames(const(hts_idx_t) *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values
1157 
1158 /**********************************
1159  * Iterator with multiple regions *
1160  **********************************/
1161 
1162 alias hts_itr_multi_query_func = int function(const(hts_idx_t) *idx, hts_itr_t *itr);
1163 int hts_itr_multi_bam(const(hts_idx_t) *idx, hts_itr_t *iter);
1164 int hts_itr_multi_cram(const(hts_idx_t) *idx, hts_itr_t *iter);
1165 
1166 /// Create a multi-region iterator from a region list
1167 /** @param idx          Index
1168     @param reglist      Region list
1169     @param count        Number of items in region list
1170     @param getid        Callback to convert names to target IDs
1171     @param hdr          Indexed file header (passed to getid)
1172     @param itr_specific Filetype-specific callback function
1173     @param readrec      Callback to read an input file record
1174     @param seek         Callback to seek in the input file
1175     @param tell         Callback to return current input file location
1176     @return An iterator on success; NULL on failure
1177 
1178     The iterator struct returned by a successful call should be freed
1179     via hts_itr_destroy() when it is no longer needed.
1180  */
1181 hts_itr_t *hts_itr_regions(const(hts_idx_t) *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr,
1182             hts_itr_multi_query_func itr_specific, hts_readrec_func readrec, hts_seek_func seek, hts_tell_func tell);
1183 
1184 /// Return the next record from an iterator
1185 /** @param fp      Input file handle
1186     @param iter    Iterator
1187     @param r       Pointer to record placeholder
1188     @return >= 0 on success, -1 when there is no more data, < -1 on error
1189  */
1190 int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r);
1191 
1192 /// Create a region list from a char array
1193 /** @param argv      Char array of target:interval elements, e.g. chr1:2500-3600, chr1:5100, chr2
1194     @param argc      Number of items in the array
1195     @param r_count   Pointer to the number of items in the resulting region list
1196     @param hdr       Header for the sam/bam/cram file
1197     @param getid     Callback to convert target names to target ids.
1198     @return  A region list on success, NULL on failure
1199 
1200     The hts_reglist_t struct returned by a successful call should be freed
1201     via hts_reglist_free() when it is no longer needed.
1202  */
1203 hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr,  hts_name2id_f getid);
1204 
1205 /// Free a region list
1206 /** @param reglist    Region list
1207     @param count      Number of items in the list
1208  */
1209 void hts_reglist_free(hts_reglist_t *reglist, int count);
1210 
1211 /// Free a multi-region iterator
1212 /** @param iter   Iterator to free
1213  */
1214 alias hts_itr_multi_destroy = hts_itr_destroy;
1215 
1216     /**
1217      * hts_file_type() - Convenience function to determine file type
1218      * DEPRECATED:  This function has been replaced by hts_detect_format().
1219      * It and these FT_* macros will be removed in a future HTSlib release.
1220      */
1221     enum FT_UNKN   = 0;
1222     enum FT_GZ     = 1;                 /// ditto
1223     enum FT_VCF    = 2;                 /// ditto
1224     enum FT_VCF_GZ = (FT_GZ|FT_VCF);    /// ditto
1225     enum FT_BCF    = (1<<2);            /// ditto
1226     enum FT_BCF_GZ = (FT_GZ|FT_BCF);    /// ditto
1227     enum FT_STDIN  = (1<<3);            /// ditto
1228     deprecated("This function has been replaced by hts_detect_format(). "
1229     ~ "It and these FT_* macros will be removed in a future HTSlib release.")
1230     int hts_file_type(const(char) *fname);
1231 
1232 /+
1233 /***************************
1234  * Revised MAQ error model *
1235  ***************************/
1236 
1237 struct errmod_t;
1238 typedef struct errmod_t errmod_t;
1239 
1240 errmod_t *errmod_init(double depcorr);
1241 void errmod_destroy(errmod_t *em);
1242 
1243 /*
1244     n: number of bases
1245     m: maximum base
1246     bases[i]: qual:6, strand:1, base:4
1247     q[i*m+j]: phred-scaled likelihood of (i,j)
1248  */
1249 int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q);
1250 
1251 
1252 /*****************************************************
1253  * Probabilistic banded glocal alignment             *
1254  * See https://doi.org/10.1093/bioinformatics/btr076 *
1255  *****************************************************/
1256 
1257 typedef struct probaln_par_t {
1258     float d, e;
1259     int bw;
1260 } probaln_par_t;
1261 
1262 /// Perform probabilistic banded glocal alignment
1263 /** @param      ref     Reference sequence
1264     @param      l_ref   Length of reference
1265     @param      query   Query sequence
1266     @param      l_query Length of query sequence
1267     @param      iqual   Query base qualities
1268     @param      c       Alignment parameters
1269     @param[out] state   Output alignment
1270     @param[out] q    Phred scaled posterior probability of state[i] being wrong
1271     @return     Phred-scaled likelihood score, or INT_MIN on failure.
1272 
1273 The reference and query sequences are coded using integers 0,1,2,3,4 for
1274 bases A,C,G,T,N respectively (N here is for any ambiguity code).
1275 
1276 On output, state and q are arrays of length l_query. The higher 30
1277 bits give the reference position the query base is matched to and the
1278 lower two bits can be 0 (an alignment match) or 1 (an
1279 insertion). q[i] gives the phred scaled posterior probability of
1280 state[i] being wrong.
1281 
1282 On failure, errno will be set to EINVAL if the values of l_ref or l_query
1283 were invalid; or ENOMEM if a memory allocation failed.
1284 */
1285 
1286 int probaln_glocal(const uint8_t *ref, int l_ref, const uint8_t *query, int l_query, const uint8_t *iqual, const probaln_par_t *c, int *state, uint8_t *q);
1287 
1288 
1289     /**********************
1290      * MD5 implementation *
1291      **********************/
1292 
1293     struct hts_md5_context;
1294     typedef struct hts_md5_context hts_md5_context;
1295 
1296     /*! @abstract   Intialises an MD5 context.
1297      *  @discussion
1298      *    The expected use is to allocate an hts_md5_context using
1299      *    hts_md5_init().  This pointer is then passed into one or more calls
1300      *    of hts_md5_update() to compute successive internal portions of the
1301      *    MD5 sum, which can then be externalised as a full 16-byte MD5sum
1302      *    calculation by calling hts_md5_final().  This can then be turned
1303      *    into ASCII via hts_md5_hex().
1304      *
1305      *    To dealloate any resources created by hts_md5_init() call the
1306      *    hts_md5_destroy() function.
1307      *
1308      *  @return     hts_md5_context pointer on success, NULL otherwise.
1309      */
1310     hts_md5_context *hts_md5_init(void);
1311 
1312     /*! @abstract Updates the context with the MD5 of the data. */
1313     void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size);
1314 
1315     /*! @abstract Computes the final 128-bit MD5 hash from the given context */
1316     void hts_md5_final(unsigned char *digest, hts_md5_context *ctx);
1317 
1318     /*! @abstract Resets an md5_context to the initial state, as returned
1319      *            by hts_md5_init().
1320      */
1321     void hts_md5_reset(hts_md5_context *ctx);
1322 
1323     /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated
1324      *            hex string.
1325      */
1326     void hts_md5_hex(char *hex, const unsigned char *digest);
1327 
1328     /*! @abstract Deallocates any memory allocated by hts_md5_init. */
1329     void hts_md5_destroy(hts_md5_context *ctx);
1330 +/
1331 
1332 pragma(inline,true)
1333 long hts_reg2bin(hts_pos_t beg, hts_pos_t end, int min_shift, int n_lvls)
1334 {
1335     int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7;
1336     for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l))
1337         if (beg>>s == end>>s) return t + (beg>>s);
1338     return 0;
1339 }
1340 /+
1341 static inline int hts_bin_bot(int bin, int n_lvls)
1342 {
1343     int l, b;
1344     for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); // compute the level of bin
1345     return (bin - hts_bin_first(l)) << (n_lvls - l) * 3;
1346 }
1347 
1348 /**************
1349  * Endianness *
1350  **************/
1351 
1352 static inline int ed_is_big(void)
1353 {
1354     long one= 1;
1355     return !(*((char *)(&one)));
1356 }
1357 static inline uint16_t ed_swap_2(uint16_t v)
1358 {
1359     return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
1360 }
1361 static inline void *ed_swap_2p(void *x)
1362 {
1363     *(uint16_t*)x = ed_swap_2(*(uint16_t*)x);
1364     return x;
1365 }
1366 static inline uint32_t ed_swap_4(uint32_t v)
1367 {
1368     v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
1369     return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
1370 }
1371 static inline void *ed_swap_4p(void *x)
1372 {
1373     *(uint32_t*)x = ed_swap_4(*(uint32_t*)x);
1374     return x;
1375 }
1376 static inline uint64_t ed_swap_8(uint64_t v)
1377 {
1378     v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
1379     v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
1380     return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
1381 }
1382 static inline void *ed_swap_8p(void *x)
1383 {
1384     *(uint64_t*)x = ed_swap_8(*(uint64_t*)x);
1385     return x;
1386 }
1387 
1388 +/