1 // htslib-1.9 hts.h as D module
2 // Changes include:
3 // In D, const on either LHS or RHS of function declaration applies to the function, not return value, unless parents included:
4 // changed ^const <type> <fnname> to ^const(<type>) <fnname>
5 /*  *      aliased typedef'd function pointers */
6 /*  *       changed C-style arrays (eg line 339, extern const char seq_nt16_str[];) to char[] seq_nt16_str */
7 /*  *           as update to above, seq_nt16_str needs to be char[16], as C and D style char[] imply different things */
8 module dhtslib.htslib.hts;
9 
10 import std.bitmanip;
11 
12 extern (C):
13 /// @file htslib/hts.h
14 /// Format-neutral I/O, indexing, and iterator API functions.
15 /*
16     Copyright (C) 2012-2016 Genome Research Ltd.
17     Copyright (C) 2010, 2012 Broad Institute.
18     Portions copyright (C) 2003-2006, 2008-2010 by Heng Li <lh3@live.co.uk>
19 
20     Author: Heng Li <lh3@sanger.ac.uk>
21 
22 Permission is hereby granted, free of charge, to any person obtaining a copy
23 of this software and associated documentation files (the "Software"), to deal
24 in the Software without restriction, including without limitation the rights
25 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
26 copies of the Software, and to permit persons to whom the Software is
27 furnished to do so, subject to the following conditions:
28 
29 The above copyright notice and this permission notice shall be included in
30 all copies or substantial portions of the Software.
31 
32 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
33 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
34 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
35 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
36 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
37 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
38 DEALINGS IN THE SOFTWARE.  */
39 
40 //#include <stddef.h>
41 import core.stdc.stdint;
42 /+
43 #include "hts_defs.h"
44 #include "hts_log.h"
45 +/
46 import dhtslib.htslib.bgzf;
47 
48 /// see cram.h, sam.h, sam.d
49 struct cram_fd; // @suppress(dscanner.style.phobos_naming_convention)
50 /// see hfile.d
51 //struct hFILE; // @suppress(dscanner.style.phobos_naming_convention)
52 import dhtslib.htslib.hfile: hFILE;
53 /// see thread_pool.d
54 struct hts_tpool; // @suppress(dscanner.style.phobos_naming_convention)
55 
56 import dhtslib.htslib.kstring: __kstring_t, kstring_t;
57 
58 /+
59 #ifndef kroundup32
60 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x))
61 #endif
62 
63 /**
64  * @hideinitializer
65  * Macro to expand a dynamic array of a given type
66  *
67  * @param         type_t The type of the array elements
68  * @param[in]     n      Requested number of elements of type type_t
69  * @param[in,out] m      Size of memory allocated
70  * @param[in,out] ptr    Pointer to the array
71  *
72  * @discussion
73  * The array *ptr will be expanded if necessary so that it can hold @p n
74  * or more elements.  If the array is expanded then the new size will be
75  * written to @p m and the value in @ptr may change.
76  *
77  * It must be possible to take the address of @p ptr and @p m must be usable
78  * as an lvalue.
79  *
80  * @bug
81  * If the memory allocation fails, this will call exit(1).  This is
82  * not ideal behaviour in a library.
83  */
84 #define hts_expand(type_t, n, m, ptr) do {                              \
85         if ((n) > (m)) {                                                \
86             size_t hts_realloc_or_die(size_t, size_t, size_t, size_t,   \
87                                       int, void **, const char *);      \
88             (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \
89                                      sizeof(type_t),  0,                \
90                                      (void **)&(ptr), __func__);        \
91         }                                                               \
92     } while (0)
93 
94 /**
95  * @hideinitializer
96  * Macro to expand a dynamic array, zeroing any newly-allocated memory
97  *
98  * @param         type_t The type of the array elements
99  * @param[in]     n      Requested number of elements of type type_t
100  * @param[in,out] m      Size of memory allocated
101  * @param[in,out] ptr    Pointer to the array
102  *
103  * @discussion
104  * As for hts_expand(), except the bytes that make up the array elements
105  * between the old and new values of @p m are set to zero using memset().
106  *
107  * @bug
108  * If the memory allocation fails, this will call exit(1).  This is
109  * not ideal behaviour in a library.
110  */
111 
112 
113 #define hts_expand0(type_t, n, m, ptr) do {                             \
114         if ((n) > (m)) {                                                \
115             size_t hts_realloc_or_die(size_t, size_t, size_t, size_t,   \
116                                       int, void **, const char *);      \
117             (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \
118                                      sizeof(type_t), 1,                 \
119                                      (void **)&(ptr), __func__);        \
120         }                                                               \
121     } while (0)
122 +/
123 /************
124  * File I/O *
125  ************/
126 
127 // Add new entries only at the end (but before the *_maximum entry)
128 // of these enums, as their numbering is part of the htslib ABI.
129 
130 /// Broad format category (sequence data, variant data, index, regions, etc.)
131 enum htsFormatCategory { // @suppress(dscanner.style.phobos_naming_convention)
132     unknown_category,
133     sequence_data,    // Sequence data -- SAM, BAM, CRAM, etc
134     variant_data,     // Variant calling data -- VCF, BCF, etc
135     index_file,       // Index file associated with some data file
136     region_list,      // Coordinate intervals or regions -- BED, etc
137     category_maximum = 32_767
138 }
139 
140 /// Specific format (SAM, BAM, CRAM, BCF, VCF, TBI, BED, etc.)
141 enum htsExactFormat { // @suppress(dscanner.style.phobos_naming_convention)
142     unknown_format,
143     binary_format, text_format,
144     sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed,
145     htsget,
146     //deprecated("Use htsExactFormat 'htsget' instead") json = htsget,
147     format_maximum = 32_767
148 }
149 
150 /// Compression type
151 enum htsCompression { // @suppress(dscanner.style.phobos_naming_convention)
152     no_compression, gzip, bgzf, custom,
153     compression_maximum = 32_767
154 }
155 
156 /// hts file complete file format information
157 // NB: version is a reserved keyword in D -- changed to "vers"
158 struct htsFormat { // @suppress(dscanner.style.phobos_naming_convention)
159     htsFormatCategory category; /// Broad format category (sequence data, variant data, index, regions, etc.)
160     htsExactFormat format;      /// Specific format (SAM, BAM, CRAM, BCF, VCF, TBI, BED, etc.)
161     /// format version
162     struct Vers { short major, minor; } // @suppress(dscanner.style.undocumented_declaration)
163     Vers v; /// format version
164     htsCompression compression; /// Compression type
165     short compression_level;/// currently unused
166     void *specific;         /// format specific options; see struct hts_opt.
167 }
168 
169 
170 // Maintainers note htsFile cannot be an opaque structure because some of its
171 // fields are part of libhts.so's ABI (hence these fields must not be moved):
172 //  - fp is used in the public sam_itr_next()/etc macros
173 //  - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1
174 //  - is_write and is_cram are used directly in samtools <= 1.1
175 //  - fp is used directly in samtools (up to and including current develop)
176 //  - line is used directly in bcftools (up to and including current develop)
177 /// Data and metadata for an hts file; part of public and private ABI
178 struct htsFile { // @suppress(dscanner.style.phobos_naming_convention)
179     pragma(msg, "htsFile: bitfield order assumed starting with LSB");
180     //uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, is_bgzf:1, dummy:27;
181     mixin(bitfields!(
182         bool, "is_bin", 1,
183         bool, "is_write", 1,
184         bool, "is_be", 1,
185         bool, "is_cram", 1,
186         bool, "is_bgzf", 1,
187         uint, "padding27", 27 ));
188     int64_t lineno; /// uncompressed(?) file line no.
189     kstring_t line; /// buffer to hold line
190     char *fn;       /// filename
191     char *fn_aux;   /// auxillary (i.e, index) file name
192     /// hFile plus any needed bgzf or CRAM (if applicable) structure data
193     union FP {
194         BGZF *bgzf;     /// see bgzf.d
195         cram_fd *cram;  /// see cram.d
196         hFILE *hfile;   /// see hfile.d
197     }
198     FP fp;              /// hFile plus any needed bgzf or CRAM (if applicable) structure data
199     htsFormat format;   /// hts file complete file format information
200 }
201 
202 /// A combined thread pool and queue allocation size.
203 /// The pool should already be defined, but qsize may be zero to
204 /// indicate an appropriate queue size is taken from the pool.
205 ///
206 /// Reasons for explicitly setting it could be where many more file
207 /// descriptors are in use than threads, so keeping memory low is
208 /// important.
209 struct htsThreadPool { // @suppress(dscanner.style.phobos_naming_convention)
210     hts_tpool *pool;/// The shared thread pool itself
211     int qsize;      /// Size of I/O queue to use for this fp
212 }
213 
214 /// REQUIRED_FIELDS
215 enum sam_fields { // @suppress(dscanner.style.phobos_naming_convention)
216     SAM_QNAME = 0x00000001,
217     SAM_FLAG  = 0x00000002,
218     SAM_RNAME = 0x00000004,
219     SAM_POS   = 0x00000008,
220     SAM_MAPQ  = 0x00000010,
221     SAM_CIGAR = 0x00000020,
222     SAM_RNEXT = 0x00000040,
223     SAM_PNEXT = 0x00000080,
224     SAM_TLEN  = 0x00000100,
225     SAM_SEQ   = 0x00000200,
226     SAM_QUAL  = 0x00000400,
227     SAM_AUX   = 0x00000800,
228     SAM_RGAUX = 0x00001000,
229 }
230 
231 /// Mostly CRAM only, but this could also include other format options
232 enum hts_fmt_option { // @suppress(dscanner.style.phobos_naming_convention)
233     // CRAM specific
234     CRAM_OPT_DECODE_MD,
235     CRAM_OPT_PREFIX,
236     CRAM_OPT_VERBOSITY,  /// obsolete, use hts_set_log_level() instead
237     CRAM_OPT_SEQS_PER_SLICE,
238     CRAM_OPT_SLICES_PER_CONTAINER,
239     CRAM_OPT_RANGE,
240     CRAM_OPT_VERSION,    /// rename to cram_version?
241     CRAM_OPT_EMBED_REF,
242     CRAM_OPT_IGNORE_MD5,
243     CRAM_OPT_REFERENCE,  // make general
244     CRAM_OPT_MULTI_SEQ_PER_SLICE,
245     CRAM_OPT_NO_REF,
246     CRAM_OPT_USE_BZIP2,
247     CRAM_OPT_SHARED_REF,
248     CRAM_OPT_NTHREADS,   /// deprecated, use HTS_OPT_NTHREADS
249     CRAM_OPT_THREAD_POOL,/// make general
250     CRAM_OPT_USE_LZMA,
251     CRAM_OPT_USE_RANS,
252     CRAM_OPT_REQUIRED_FIELDS,
253     CRAM_OPT_LOSSY_NAMES,
254     CRAM_OPT_BASES_PER_SLICE,
255     CRAM_OPT_STORE_MD,
256     CRAM_OPT_STORE_NM,
257 
258     // General purpose
259     HTS_OPT_COMPRESSION_LEVEL = 100,
260     HTS_OPT_NTHREADS,
261     HTS_OPT_THREAD_POOL,
262     HTS_OPT_CACHE_SIZE,
263     HTS_OPT_BLOCK_SIZE,
264 }
265 
266 /// For backwards compatibility
267 alias cram_option = hts_fmt_option;
268 
269 /// Options for cache, (de)compression, threads, CRAM, etc.
270 struct hts_opt { // @suppress(dscanner.style.phobos_naming_convention)
271     char *arg;          /// string form, strdup()ed
272     hts_fmt_option opt; /// tokenised key
273     /// option value
274     union VAL {         /// ... and value
275         int i;          /// int value
276         char *s;        /// string value
277     }
278     VAL val;            /// value
279     hts_opt *next;      /// next option (linked list)
280 }
281 
282 //#define HTS_FILE_OPTS_INIT {{0},0}
283 // Not apparently used in htslib-1.7
284 
285 /**********************
286  * Exported functions *
287  **********************/
288 
289 /**
290  * Parses arg and appends it to the option list.
291  *
292  * Returns 0 on success;
293  *        -1 on failure.
294  */
295 int hts_opt_add(hts_opt **opts, const(char) *c_arg);
296 
297 /**
298  * Applies an hts_opt option list to a given htsFile.
299  *
300  * Returns 0 on success
301  *        -1 on failure
302  */
303 int hts_opt_apply(htsFile *fp, hts_opt *opts);
304 
305 /**
306  * Frees an hts_opt list.
307  */
308 void hts_opt_free(hts_opt *opts);
309 
310 /**
311  * Accepts a string file format (sam, bam, cram, vcf, bam) optionally
312  * followed by a comma separated list of key=value options and splits
313  * these up into the fields of htsFormat struct.
314  *
315  * Returns 0 on success
316  *        -1 on failure.
317  */
318 int hts_parse_format(htsFormat *opt, const(char) *str);
319 
320 /**
321  * Tokenise options as (key(=value)?,)*(key(=value)?)?
322  * NB: No provision for ',' appearing in the value!
323  * Add backslashing rules?
324  *
325  * This could be used as part of a general command line option parser or
326  * as a string concatenated onto the file open mode.
327  *
328  * Returns 0 on success
329  *        -1 on failure.
330  */
331 int hts_parse_opt_list(htsFormat *opt, const(char) *str);
332 
333 /**! @abstract Table for converting a nucleotide character to 4-bit encoding.
334 The input character may be either an IUPAC ambiguity code, '=' for 0, or
335 '0'/'1'/'2'/'3' for a result of 1/2/4/8.  The result is encoded as 1/2/4/8
336 for A/C/G/T or combinations of these bits for ambiguous bases.
337 */
338 
339 version(Windows){
340     const (char)[256] seq_nt16_table = [
341         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
342         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
343         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
344         1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,
345         15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
346         15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
347         15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
348         15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
349 
350         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
351         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
352         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
353         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
354         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
355         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
356         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
357         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
358     ];
359 }else{
360     extern const(char)[256] seq_nt16_table;
361 }
362 
363 /**! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC
364 ambiguity code letter (or '=' when given 0).
365 */
366 
367 version(Windows) __gshared const (char)[16] seq_nt16_str = ['=','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N'];
368 else extern __gshared const(char)[16] seq_nt16_str;
369 /**! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits.
370 Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous).
371 */
372 version(Windows) const (int)[16] seq_nt16_int = [ 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 ];
373 else extern const int[] seq_nt16_int;
374 /**!
375   @abstract  Get the htslib version number
376   @return    For released versions, a string like "N.N[.N]"; or git describe
377   output if using a library built within a Git repository.
378 */
379 const(char) *hts_version();
380 
381 /**!
382   @abstract    Determine format by peeking at the start of a file
383   @param fp    File opened for reading, positioned at the beginning
384   @param fmt   Format structure that will be filled out on return
385   @return      0 for success, or negative if an error occurred.
386 */
387 int hts_detect_format(hFILE *fp, htsFormat *fmt);
388 
389 /**!
390   @abstract    Get a human-readable description of the file format
391   @param fmt   Format structure holding type, version, compression, etc.
392   @return      Description string, to be freed by the caller after use.
393 */
394 char *hts_format_description(const(htsFormat) *format);
395 
396 /**!
397   @abstract       Open a SAM/BAM/CRAM/VCF/BCF/etc file
398   @param fn       The file name or "-" for stdin/stdout
399   @param mode     Mode matching / [rwa][bceguxz0-9]* /
400   @discussion
401       With 'r' opens for reading; any further format mode letters are ignored
402       as the format is detected by checking the first few bytes or BGZF blocks
403       of the file.  With 'w' or 'a' opens for writing or appending, with format
404       specifier letters:
405         b  binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc)
406         c  CRAM format
407         g  gzip compressed
408         u  uncompressed
409         z  bgzf compressed
410         [0-9]  zlib compression level
411       and with non-format option letters (for any of 'r'/'w'/'a'):
412         e  close the file on exec(2) (opens with O_CLOEXEC, where supported)
413         x  create the file exclusively (opens with O_EXCL, where supported)
414       Note that there is a distinction between 'u' and '0': the first yields
415       plain uncompressed output whereas the latter outputs uncompressed data
416       wrapped in the zlib format.
417   @example
418       [rw]b  .. compressed BCF, BAM, FAI
419       [rw]bu .. uncompressed BCF
420       [rw]z  .. compressed VCF
421       [rw]   .. uncompressed VCF
422 */
423 htsFile *hts_open(const(char) *fn, const(char) *mode);
424 
425 /**!
426   @abstract       Open a SAM/BAM/CRAM/VCF/BCF/etc file
427   @param fn       The file name or "-" for stdin/stdout
428   @param mode     Open mode, as per hts_open()
429   @param fmt      Optional format specific parameters
430   @discussion
431       See hts_open() for description of fn and mode.
432       // TODO Update documentation for s/opts/fmt/
433       Opts contains a format string (sam, bam, cram, vcf, bcf) which will,
434       if defined, override mode.  Opts also contains a linked list of hts_opt
435       structures to apply to the open file handle.  These can contain things
436       like pointers to the reference or information on compression levels,
437       block sizes, etc.
438 */
439 htsFile *hts_open_format(const(char) *fn, const(char) *mode, const(htsFormat) *fmt);
440 
441 /**!
442   @abstract       Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file
443   @param fn       The already-open file handle
444   @param mode     Open mode, as per hts_open()
445 */
446 htsFile *hts_hopen(hFILE *fp, const(char) *fn, const(char) *mode);
447 
448 /**!
449   @abstract  Close a file handle, flushing buffered data for output streams
450   @param fp  The file handle to be closed
451   @return    0 for success, or negative if an error occurred.
452 */
453 int hts_close(htsFile *fp);
454 
455 /**!
456   @abstract  Returns the file's format information
457   @param fp  The file handle
458   @return    Read-only pointer to the file's htsFormat.
459 */
460 const(htsFormat *) hts_get_format(htsFile *fp);
461 
462 /**!
463   @ abstract      Returns a string containing the file format extension.
464   @ param format  Format structure containing the file type.
465   @ return        A string ("sam", "bam", etc) or "?" for unknown formats.
466  */
467 const(char *) hts_format_file_extension(const(htsFormat) *format);
468 
469 /**!
470   @abstract  Sets a specified CRAM option on the open file handle.
471   @param fp  The file handle open the open file.
472   @param opt The CRAM_OPT_* option.
473   @param ... Optional arguments, dependent on the option used.
474   @return    0 for success, or negative if an error occurred.
475 */
476 int hts_set_opt(htsFile *fp, hts_fmt_option opt, ...);
477 
478 /// ?Get line as string from line-oriented flat file (undocumented in hts.h)
479 int hts_getline(htsFile *fp, int delimiter, kstring_t *str);
480 /// ?Get _n lines into buffer from line-oriented flat file; sets _n as number read (undocumented in hts.h)
481 char **hts_readlines(const(char) *fn, int *_n);
482 
483 /**!
484     @abstract       Parse comma-separated list or read list from a file
485     @param list     File name or comma-separated list
486     @param is_file
487     @param _n       Size of the output array (number of items read)
488     @return         NULL on failure or pointer to newly allocated array of
489                     strings
490 */
491 char **hts_readlist(const(char) *fn, int is_file, int *_n);
492 
493 /**!
494   @abstract  Create extra threads to aid compress/decompression for this file
495   @param fp  The file handle
496   @param n   The number of worker threads to create
497   @return    0 for success, or negative if an error occurred.
498   @notes     This function creates non-shared threads for use solely by fp.
499              The hts_set_thread_pool function is the recommended alternative.
500 */
501 int hts_set_threads(htsFile *fp, int n);
502 
503 /**!
504   @abstract  Create extra threads to aid compress/decompression for this file
505   @param fp  The file handle
506   @param p   A pool of worker threads, previously allocated by hts_create_threads().
507   @return    0 for success, or negative if an error occurred.
508 */
509 int hts_set_thread_pool(htsFile *fp, htsThreadPool *p);
510 
511 /**!
512   @abstract  Adds a cache of decompressed blocks, potentially speeding up seeks.
513              This may not work for all file types (currently it is bgzf only).
514   @param fp  The file handle
515   @param n   The size of cache, in bytes
516 */
517 void hts_set_cache_size(htsFile *fp, int n);
518 
519 /**!
520   @abstract  Set .fai filename for a file opened for reading
521   @return    0 for success, negative on failure
522   @discussion
523       Called before *_hdr_read(), this provides the name of a .fai file
524       used to provide a reference list if the htsFile contains no @SQ headers.
525 */
526 int hts_set_fai_filename(htsFile *fp, const(char) *fn_aux);
527 
528 
529 /**!
530   @abstract  Determine whether a given htsFile contains a valid EOF block
531   @return    3 for a non-EOF checkable filetype;
532              2 for an unseekable file type where EOF cannot be checked;
533              1 for a valid EOF block;
534              0 for if the EOF marker is absent when it should be present;
535             -1 (with errno set) on failure
536   @discussion
537       Check if the BGZF end-of-file (EOF) marker is present
538 */
539 int hts_check_EOF(htsFile *fp);
540 
541 /************
542  * Indexing *
543  ************/
544 
545 /*!
546 These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc,
547 producing iterators operating as follows:
548  - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file
549  - HTS_IDX_START  iterates over the entire file
550  - HTS_IDX_REST   iterates from the current position to the end of the file
551  - HTS_IDX_NONE   always returns "no more alignment records"
552 When one of these special tid values is used, beg and end are ignored.
553 When REST or NONE is used, idx is also ignored and may be NULL.
554 */
555 enum HTS_IDX_NOCOOR = (-2); /// iterates over unmapped reads sorted at the end of the file
556 enum HTS_IDX_START  = (-3); /// iterates over the entire file
557 enum HTS_IDX_REST   = (-4); /// iterates from the current position to the end of the file
558 enum HTS_IDX_NONE   = (-5); /// always returns "no more alignment records"
559 
560 enum int HTS_FMT_CSI = 0;   /// coordinate-sorted index (new)
561 enum int HTS_FMT_BAI = 1;   /// BAM index (old)
562 enum int HTS_FMT_TBI = 2;   /// Tabix index
563 enum int HTS_FMT_CRAI= 3;   /// CRAM index (not sure if superceded by CSI?)
564 
565 /// index data (opaque)
566 struct __hts_idx_t; // @suppress(dscanner.style.phobos_naming_convention)
567 alias hts_idx_t = __hts_idx_t;
568 
569 /// 32-bit start/end coordinate pair
570 struct hts_pair32_t { // @suppress(dscanner.style.phobos_naming_convention)
571     /// start, end coordinates (32-bit)
572     uint beg, end;
573 }
574 
575 /// 64-bit start/end coordinate pair
576 struct hts_pair64_t { // @suppress(dscanner.style.phobos_naming_convention)
577     /// start, end coordinates (64-bit)
578     ulong u, v;
579 }
580 
581 /// 64-bit start, end coordinate pair tracking max (internally used in hts.c)
582 struct hts_pair64_max_t { // @suppress(dscanner.style.phobos_naming_convention)
583     /// ?
584     ulong u, v;
585     /// ?
586     ulong max;
587 }
588 
589 /// Region list used in iterators (NB: apparently confined to single contig/tid)
590 struct hts_reglist_t { // @suppress(dscanner.style.phobos_naming_convention)
591     const(char) *reg;   /// Region string
592     int tid;            /// Contig id
593     hts_pair32_t *intervals;    /// (start,end) intervals
594     uint count;                 /// How many intervals
595     /// absolute bounds
596     uint min_beg, max_end;
597 }
598 
599 //typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, int *beg, int *end);
600 alias hts_readrec_func = int function(BGZF *fp, void *data, void *r, int *tid, int *beg, int *end);
601 
602 //typedef int hts_seek_func(void *fp, int64_t offset, int where);
603 alias hts_seek_func = int function(void *fp, ulong offset, int where);
604 
605 //typedef int64_t hts_tell_func(void *fp);
606 alias hts_tell_func = long function(void *fp);
607 
608 /// iterator
609 struct hts_itr_t { // @suppress(dscanner.style.phobos_naming_convention)
610     pragma(msg, "hts_itr_t: bitfield order assumed starting with LSB");
611     // uint32_t read_rest:1, finished:1, is_cram:1, dummy:29;
612     mixin(bitfields!(
613         bool, "read_rest", 1,
614         bool, "finished",  1,
615         bool, "is_cram",   1,
616         uint, "padding29", 29));
617     /// iterator position data
618     int tid, beg, end, n_off, i;
619     /// iterator position data
620     int curr_tid, curr_beg, curr_end;
621     ulong curr_off;     /// ? file offset
622     hts_pair64_t *off;  /// ? (start,end) offset
623     hts_readrec_func *readrec;  /// record parsing fn pointer
624     /// ???
625     struct Bins {
626         /// ???
627         int n, m;
628         int *a; /// ???
629     }
630     Bins bins;  /// ???
631 }
632 
633 /// ? index key
634 struct aux_key_t { // @suppress(dscanner.style.phobos_naming_convention)
635     int key;    /// ???
636     /// ???
637     ulong min_off, max_off;
638 }
639 
640 /// multi iterator
641 struct hts_itr_multi_t { // @suppress(dscanner.style.phobos_naming_convention)
642     pragma(msg, "hts_itr_multi_t: bitfield order assumed starting with LSB");
643     //uint32_t read_rest:1, finished:1, is_cram:1, nocoor:1, dummy:28;
644     mixin(bitfields!(
645         bool, "read_rest", 1,
646         bool, "finished",  1,
647         bool, "is_cram",   1,
648         bool, "nocoor",    1,
649         uint, "padding28",28));
650     /// multi iterator region list
651     hts_reglist_t *reg_list;
652     /// i of n regions
653     int n_reg, i;
654     /// iteration position data
655     int curr_tid, curr_intv, curr_beg, curr_end, curr_reg;
656     hts_pair64_max_t *off;  /// ? (start,end) offset
657     int n_off;              /// ? n of (start,end) pairs
658     /// ? file offset
659     ulong curr_off, nocoor_off;
660     hts_readrec_func *readrec;  /// record parsing fn pointer
661     hts_seek_func *seek;        /// ? fn pointer
662     hts_tell_func *tell;        /// ? fn pointer
663 }
664 
665     pragma(inline, true)
666     {
667         /// ???
668         auto hts_bin_first(T)(T l) { return (((1<<(((l)<<1) + (l))) - 1) / 7); }    //     #define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7)
669         /// ???
670         auto hts_bin_parent(T)(T l){ return (((l) - 1) >> 3); }                     //     #define hts_bin_parent(l) (((l) - 1) >> 3)
671     }
672 
673     /// Initialize index
674     hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls);
675     /// Destroy index
676     void hts_idx_destroy(hts_idx_t *idx);
677     /// Add to index
678     int hts_idx_push(hts_idx_t *idx, int tid, int beg, int end, uint64_t offset, int is_mapped);
679     /// ?finalize index
680     void hts_idx_finish(hts_idx_t *idx, uint64_t final_offset);
681 
682 /// Save an index to a file
683 /** @param idx  Index to be written
684     @param fn   Input BAM/BCF/etc filename, to which .bai/.csi/etc will be added
685     @param fmt  One of the HTS_FMT_* index formats
686     @return  0 if successful, or negative if an error occurred.
687 */
688 int hts_idx_save(const(hts_idx_t) *idx, const(char) *fn, int fmt);
689 
690 /// Save an index to a specific file
691 /** @param idx    Index to be written
692     @param fn     Input BAM/BCF/etc filename
693     @param fnidx  Output filename, or NULL to add .bai/.csi/etc to @a fn
694     @param fmt    One of the HTS_FMT_* index formats
695     @return  0 if successful, or negative if an error occurred.
696 */
697 int hts_idx_save_as(const(hts_idx_t) *idx, const(char) *fn, const(char) *fnidx, int fmt);
698 
699 
700 /// Load an index file
701 /** @param fn   BAM/BCF/etc filename, to which .bai/.csi/etc will be added or
702                 the extension substituted, to search for an existing index file
703     @param fmt  One of the HTS_FMT_* index formats
704     @return  The index, or NULL if an error occurred.
705 */
706 hts_idx_t *hts_idx_load(const(char) *fn, int fmt);
707 
708 /// Load a specific index file
709 /** @param fn     Input BAM/BCF/etc filename
710     @param fnidx  The input index filename
711     @return  The index, or NULL if an error occurred.
712 */
713 hts_idx_t *hts_idx_load2(const(char) *fn, const(char) *fnidx);
714 
715 
716 /// Get extra index meta-data
717 /** @param idx    The index
718     @param l_meta Pointer to where the length of the extra data is stored
719     @return Pointer to the extra data if present; NULL otherwise
720 
721     Indexes (both .tbi and .csi) made by tabix include extra data about
722     the indexed file.  The returns a pointer to this data.  Note that the
723     data is stored exactly as it is in the index.  Callers need to interpret
724     the results themselves, including knowing what sort of data to expect;
725     byte swapping etc.
726 */
727 uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta);
728 
729 /// Set extra index meta-data
730 /** @param idx     The index
731     @param l_meta  Length of data
732     @param meta    Pointer to the extra data
733     @param is_copy If not zero, a copy of the data is taken
734     @return 0 on success; -1 on failure (out of memory).
735 
736     Sets the data that is returned by hts_idx_get_meta().
737 
738     If is_copy != 0, a copy of the input data is taken.  If not, ownership of
739     the data pointed to by *meta passes to the index.
740 */
741 int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, int is_copy);
742 
743     /// Get statistics(?) from an index (number of mapped and unmapped for a given contig/tid)
744     int hts_idx_get_stat(const(hts_idx_t)* idx, int tid, uint64_t* mapped, uint64_t* unmapped);
745     /// Get number of elements with no coordinate (unmapped?) from an index
746     uint64_t hts_idx_get_n_no_coor(const(hts_idx_t)* idx);
747 
748 
749 enum HTS_PARSE_THOUSANDS_SEP = 1;  ///< Ignore ',' separators within numbers
750 
751 /// Parse a numeric string
752 /** The number may be expressed in scientific notation, and optionally may
753     contain commas in the integer part (before any decimal point or E notation).
754     @param str     String to be parsed
755     @param strend  If non-NULL, set on return to point to the first character
756                    in @a str after those forming the parsed number
757     @param flags   Or'ed-together combination of HTS_PARSE_* flags
758     @return  Converted value of the parsed number.
759 
760     When @a strend is NULL, a warning will be printed (if hts_verbose is HTS_LOG_WARNING
761     or more) if there are any trailing characters after the number.
762 */
763 long hts_parse_decimal(const(char) *str, char **strend, int flags);
764 
765 /// Parse a "CHR:START-END"-style region string
766 /** @param str  String to be parsed
767     @param beg  Set on return to the 0-based start of the region
768     @param end  Set on return to the 1-based end of the region
769     @return  Pointer to the colon or '\0' after the reference sequence name,
770              or NULL if @a str could not be parsed.
771 */
772 const(char) *hts_parse_reg(const(char) *str, int *beg, int *end);
773 
774     /// iterator query function (by integer tid/start/end)
775     hts_itr_t *hts_itr_query(const(hts_idx_t) *idx, int tid, int beg, int end, hts_readrec_func readrec);
776     /// destroy iterator
777     void hts_itr_destroy(hts_itr_t *iter);
778 
779     //typedef int (*hts_name2id_f)(void*, const char*);
780     alias hts_name2id_f = int function(void *, const(char) *);
781     //typedef const char *(*hts_id2name_f)(void*, int);
782     alias hts_id2name_f = const(char) * function(void*, int);
783     //typedef hts_itr_t *hts_itr_query_func(const hts_idx_t *idx, int tid, int beg, int end, hts_readrec_func *readrec);
784     alias hts_itr_query_func =
785         hts_itr_t * function(const(hts_idx_t) *idx, int tid, int beg, int end, hts_readrec_func readrec);
786 
787     /// iterator query function (by string "chr:start-end")
788     hts_itr_t *hts_itr_querys(const(hts_idx_t) *idx, const(char) *reg, hts_name2id_f getid, void *hdr,
789                                 hts_itr_query_func itr_query, hts_readrec_func readrec);
790     /// iterator next
791     int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data);
792     /// return C-string array of sequence names. NB: free only the array, not the values.
793     const(char)** hts_idx_seqnames(const(hts_idx_t) *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values
794 
795 /**********************************
796  * Iterator with multiple regions *
797  **********************************/
798 
799 ///typedef hts_itr_multi_t *hts_itr_multi_query_func(const hts_idx_t *idx, hts_itr_multi_t *itr);
800 alias hts_itr_multi_query_func = hts_itr_multi_t * function(const(hts_idx_t) *idx, hts_itr_multi_t *itr);
801 /// BAM multi iterator
802 hts_itr_multi_t *hts_itr_multi_bam(const(hts_idx_t) *idx, hts_itr_multi_t *iter);
803 /// CRAM multi iterator
804 hts_itr_multi_t *hts_itr_multi_cram(const(hts_idx_t) *idx, hts_itr_multi_t *iter);
805 /// ? multi iterator by regionlist ?
806 hts_itr_multi_t *hts_itr_regions(const(hts_idx_t) *idx, hts_reglist_t *reglist, int count,
807                                 hts_name2id_f getid, void *hdr,
808                                 hts_itr_multi_query_func *itr_specific,
809                                 hts_readrec_func *readrec,
810                                 hts_seek_func *seek,
811                                 hts_tell_func *tell);
812 /// multi iterator: next
813 int hts_itr_multi_next(htsFile *fd, hts_itr_multi_t *iter, void *r);
814 /// free regionlist
815 void hts_reglist_free(hts_reglist_t *reglist, int count);
816 /// multi iterator: free
817 void hts_itr_multi_destroy(hts_itr_multi_t *iter);
818 
819 
820     /**
821      * hts_file_type() - Convenience function to determine file type
822      * DEPRECATED:  This function has been replaced by hts_detect_format().
823      * It and these FT_* macros will be removed in a future HTSlib release.
824      */
825     enum FT_UNKN   = 0;
826     enum FT_GZ     = 1;                 /// ditto
827     enum FT_VCF    = 2;                 /// ditto
828     enum FT_VCF_GZ = (FT_GZ|FT_VCF);    /// ditto
829     enum FT_BCF    = (1<<2);            /// ditto
830     enum FT_BCF_GZ = (FT_GZ|FT_BCF);    /// ditto
831     enum FT_STDIN  = (1<<3);            /// ditto
832     deprecated("This function has been replaced by hts_detect_format(). "
833     ~ "It and these FT_* macros will be removed in a future HTSlib release.")
834     int hts_file_type(const(char) *fname);
835 
836 /+
837 /***************************
838  * Revised MAQ error model *
839  ***************************/
840 
841 struct errmod_t;
842 typedef struct errmod_t errmod_t;
843 
844 errmod_t *errmod_init(double depcorr);
845 void errmod_destroy(errmod_t *em);
846 
847 /*
848     n: number of bases
849     m: maximum base
850     bases[i]: qual:6, strand:1, base:4
851     q[i*m+j]: phred-scaled likelihood of (i,j)
852  */
853 int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q);
854 
855 
856 /*****************************************************
857  * Probabilistic banded glocal alignment             *
858  * See https://doi.org/10.1093/bioinformatics/btr076 *
859  *****************************************************/
860 
861 typedef struct probaln_par_t {
862     float d, e;
863     int bw;
864 } probaln_par_t;
865 
866 /// Perform probabilistic banded glocal alignment
867 /** @param      ref     Reference sequence
868     @param      l_ref   Length of reference
869     @param      query   Query sequence
870     @param      l_query Length of query sequence
871     @param      iqual   Query base qualities
872     @param      c       Alignment parameters
873     @param[out] state   Output alignment
874     @param[out] q    Phred scaled posterior probability of state[i] being wrong
875     @return     Phred-scaled likelihood score, or INT_MIN on failure.
876 
877 The reference and query sequences are coded using integers 0,1,2,3,4 for
878 bases A,C,G,T,N respectively (N here is for any ambiguity code).
879 
880 On output, state and q are arrays of length l_query. The higher 30
881 bits give the reference position the query base is matched to and the
882 lower two bits can be 0 (an alignment match) or 1 (an
883 insertion). q[i] gives the phred scaled posterior probability of
884 state[i] being wrong.
885 
886 On failure, errno will be set to EINVAL if the values of l_ref or l_query
887 were invalid; or ENOMEM if a memory allocation failed.
888 */
889 
890 int probaln_glocal(const uint8_t *ref, int l_ref, const uint8_t *query, int l_query, const uint8_t *iqual, const probaln_par_t *c, int *state, uint8_t *q);
891 
892 
893     /**********************
894      * MD5 implementation *
895      **********************/
896 
897     struct hts_md5_context;
898     typedef struct hts_md5_context hts_md5_context;
899 
900     /*! @abstract   Intialises an MD5 context.
901      *  @discussion
902      *    The expected use is to allocate an hts_md5_context using
903      *    hts_md5_init().  This pointer is then passed into one or more calls
904      *    of hts_md5_update() to compute successive internal portions of the
905      *    MD5 sum, which can then be externalised as a full 16-byte MD5sum
906      *    calculation by calling hts_md5_final().  This can then be turned
907      *    into ASCII via hts_md5_hex().
908      *
909      *    To dealloate any resources created by hts_md5_init() call the
910      *    hts_md5_destroy() function.
911      *
912      *  @return     hts_md5_context pointer on success, NULL otherwise.
913      */
914     hts_md5_context *hts_md5_init(void);
915 
916     /*! @abstract Updates the context with the MD5 of the data. */
917     void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size);
918 
919     /*! @abstract Computes the final 128-bit MD5 hash from the given context */
920     void hts_md5_final(unsigned char *digest, hts_md5_context *ctx);
921 
922     /*! @abstract Resets an md5_context to the initial state, as returned
923      *            by hts_md5_init().
924      */
925     void hts_md5_reset(hts_md5_context *ctx);
926 
927     /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated
928      *            hex string.
929      */
930     void hts_md5_hex(char *hex, const unsigned char *digest);
931 
932     /*! @abstract Deallocates any memory allocated by hts_md5_init. */
933     void hts_md5_destroy(hts_md5_context *ctx);
934 
935 
936 static inline int hts_reg2bin(int64_t beg, int64_t end, int min_shift, int n_lvls)
937 {
938     int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7;
939     for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l))
940         if (beg>>s == end>>s) return t + (beg>>s);
941     return 0;
942 }
943 
944 static inline int hts_bin_bot(int bin, int n_lvls)
945 {
946     int l, b;
947     for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); // compute the level of bin
948     return (bin - hts_bin_first(l)) << (n_lvls - l) * 3;
949 }
950 
951 /**************
952  * Endianness *
953  **************/
954 
955 static inline int ed_is_big(void)
956 {
957     long one= 1;
958     return !(*((char *)(&one)));
959 }
960 static inline uint16_t ed_swap_2(uint16_t v)
961 {
962     return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
963 }
964 static inline void *ed_swap_2p(void *x)
965 {
966     *(uint16_t*)x = ed_swap_2(*(uint16_t*)x);
967     return x;
968 }
969 static inline uint32_t ed_swap_4(uint32_t v)
970 {
971     v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
972     return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
973 }
974 static inline void *ed_swap_4p(void *x)
975 {
976     *(uint32_t*)x = ed_swap_4(*(uint32_t*)x);
977     return x;
978 }
979 static inline uint64_t ed_swap_8(uint64_t v)
980 {
981     v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32);
982     v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16);
983     return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8);
984 }
985 static inline void *ed_swap_8p(void *x)
986 {
987     *(uint64_t*)x = ed_swap_8(*(uint64_t*)x);
988     return x;
989 }
990 
991 +/