1 /// @file htslib/hts.h
2 /// Format-neutral I/O, indexing, and iterator API functions.
3 /*
4     Copyright (C) 2012-2020 Genome Research Ltd.
5     Copyright (C) 2010, 2012 Broad Institute.
6     Portions copyright (C) 2003-2006, 2008-2010 by Heng Li <lh3@live.co.uk>
7 
8     Author: Heng Li <lh3@sanger.ac.uk>
9 
10 Permission is hereby granted, free of charge, to any person obtaining a copy
11 of this software and associated documentation files (the "Software"), to deal
12 in the Software without restriction, including without limitation the rights
13 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 copies of the Software, and to permit persons to whom the Software is
15 furnished to do so, subject to the following conditions:
16 
17 The above copyright notice and this permission notice shall be included in
18 all copies or substantial portions of the Software.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
23 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
25 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE.  */
27 module htslib.hts;
28 
29 import core.stdc.config;
30 import core.stdc.inttypes;
31 import core.stdc.limits;
32 import core.stdc.stdint;
33 
34 import htslib.bgzf : BGZF;
35 import htslib.cram : cram_fd;
36 import htslib.hfile : hFILE;
37 import htslib.thread_pool : hts_tpool;
38 import htslib.sam : sam_hdr_t;
39 import htslib.kstring : kstring_t;
40 
41 @system:
42 nothrow:
43 @nogc:
44 
45 extern (C):
46 
47 // Separator used to split HTS_PATH (for plugins); REF_PATH (cram references)
48 
49 version(Windows)
50 {
51     enum HTS_PATH_SEPARATOR_CHAR = ';';
52     enum HTS_PATH_SEPARATOR_STR = ";";
53 }
54 else
55 {
56     enum HTS_PATH_SEPARATOR_CHAR = ':';
57     enum HTS_PATH_SEPARATOR_STR = ":";
58 }
59 
60 
61 /**
62  * @hideinitializer
63  * Deprecated macro to expand a dynamic array of a given type
64  *
65  * @param         type_t The type of the array elements
66  * @param[in]     n      Requested number of elements of type type_t
67  * @param[in,out] m      Size of memory allocated
68  * @param[in,out] ptr    Pointer to the array
69  *
70  * @discussion
71  * Do not use this macro.  Use hts_resize() instead as allows allocation
72  * failures to be handled more gracefully.
73  *
74  * The array *ptr will be expanded if necessary so that it can hold @p n
75  * or more elements.  If the array is expanded then the new size will be
76  * written to @p m and the value in @p ptr may change.
77  *
78  * It must be possible to take the address of @p ptr and @p m must be usable
79  * as an lvalue.
80  *
81  * @bug
82  * If the memory allocation fails, this will call exit(1).  This is
83  * not ideal behaviour in a library.
84  */
85 
86 /**
87  * @hideinitializer
88  * Macro to expand a dynamic array, zeroing any newly-allocated memory
89  *
90  * @param         type_t The type of the array elements
91  * @param[in]     n      Requested number of elements of type type_t
92  * @param[in,out] m      Size of memory allocated
93  * @param[in,out] ptr    Pointer to the array
94  *
95  * @discussion
96  * Do not use this macro.  Use hts_resize() instead as allows allocation
97  * failures to be handled more gracefully.
98  *
99  * As for hts_expand(), except the bytes that make up the array elements
100  * between the old and new values of @p m are set to zero using memset().
101  *
102  * @bug
103  * If the memory allocation fails, this will call exit(1).  This is
104  * not ideal behaviour in a library.
105  */
106 
107 // For internal use (by hts_resize()) only
108 int hts_resize_array_(size_t, size_t, size_t, void*, void**, int, const(char)*);
109 
110 enum HTS_RESIZE_CLEAR = 1;
111 
112 /**
113  * @hideinitializer
114  * Macro to expand a dynamic array of a given type
115  *
116  * @param         type_t    The type of the array elements
117  * @param[in]     num       Requested number of elements of type type_t
118  * @param[in,out] size_ptr  Pointer to where the size (in elements) of the
119                             array is stored.
120  * @param[in,out] ptr       Location of the pointer to the array
121  * @param[in]     flags     Option flags
122  *
123  * @return        0 for success, or negative if an error occurred.
124  *
125  * @discussion
126  * The array *ptr will be expanded if necessary so that it can hold @p num
127  * or more elements.  If the array is expanded then the new size will be
128  * written to @p *size_ptr and the value in @p *ptr may change.
129  *
130  * If ( @p flags & HTS_RESIZE_CLEAR ) is set, any newly allocated memory will
131  * be cleared.
132  */
133 
134 pragma(inline,true)
135 int hts_resize(T)(size_t num, ref size_t size, T* ptr, int flags)
136 {
137     return (num > size)
138         ? hts_resize_array_(T.sizeof, num, size_t.sizeof, &size, cast(void **)&ptr, flags, __FUNCTION__)
139         : 0;
140 }
141 
142 /// Release resources when dlclosing a dynamically loaded HTSlib
143 /** @discussion
144  *  Normally HTSlib cleans up automatically when your program exits,
145  *  whether that is via exit(3) or returning from main(). However if you
146  *  have dlopen(3)ed HTSlib and wish to close it before your main program
147  *  exits, you must call hts_lib_shutdown() before dlclose(3).
148 */
149 void hts_lib_shutdown();
150 
151 /**
152  * Wrapper function for free(). Enables memory deallocation across DLL
153  * boundary. Should be used by all applications, which are compiled
154  * with a different standard library than htslib and call htslib
155  * methods that return dynamically allocated data.
156  */
157 void hts_free(void* ptr);
158 
159 /************
160  * File I/O *
161  ************/
162 
163 // Add new entries only at the end (but before the *_maximum entry)
164 // of these enums, as their numbering is part of the htslib ABI.
165 
166 /// Broad format category (sequence data, variant data, index, regions, etc.)
167 enum htsFormatCategory // @suppress(dscanner.style.phobos_naming_convention)
168 {
169     unknown_category = 0,
170     sequence_data = 1, // Sequence data -- SAM, BAM, CRAM, etc
171     variant_data = 2, // Variant calling data -- VCF, BCF, etc
172     index_file = 3, // Index file associated with some data file
173     region_list = 4, // Coordinate intervals or regions -- BED, etc
174     category_maximum = 32_767
175 }
176 
177 /// Specific format (SAM, BAM, CRAM, BCF, VCF, TBI, BED, etc.)
178 enum htsExactFormat // @suppress(dscanner.style.phobos_naming_convention)
179 {
180     unknown_format = 0,
181     binary_format = 1,
182     text_format = 2,
183     sam = 3,
184     bam = 4,
185     bai = 5,
186     cram = 6,
187     crai = 7,
188     vcf = 8,
189     bcf = 9,
190     csi = 10,
191     gzi = 11,
192     tbi = 12,
193     bed = 13,
194     htsget = 14,
195     json = 14,
196     empty_format = 15, // File is empty (or empty after decompression)
197     fasta_format = 16,
198     fastq_format = 17,
199     fai_format = 18,
200     fqi_format = 19,
201     hts_crypt4gh_format = 20,
202     format_maximum = 32_767
203 }
204 
205 /// Compression type
206 enum htsCompression // @suppress(dscanner.style.phobos_naming_convention)
207 {
208     no_compression = 0,
209     gzip = 1,
210     bgzf = 2,
211     custom = 3,
212     bzip2_compression = 4,
213     razf_compression = 5,
214     compression_maximum = 32_767
215 }
216 
217 struct htsFormat
218 {
219     htsFormatCategory category; /// Broad format category (sequence data, variant data, index, regions, etc.)
220     htsExactFormat format;      /// Specific format (SAM, BAM, CRAM, BCF, VCF, TBI, BED, etc.)
221     /// format version
222     struct Vers { short major, minor; } // @suppress(dscanner.style.undocumented_declaration)
223     Vers v; /// format version
224     htsCompression compression; /// Compression type
225     short compression_level;/// currently unused
226     void *specific;         /// format specific options; see struct hts_opt.
227 }
228 
229 struct hts_idx_t;
230 struct hts_filter_t;
231 
232 /**
233  * @brief File handle returned by hts_open() etc.
234  * This structure should be considered opaque by end users. There should be
235  * no need to access most fields directly in user code, and in cases where
236  * it is desirable accessor functions such as hts_get_format() are provided.
237  */
238 // Maintainers note htsFile cannot be an incomplete struct because some of its
239 // fields are part of libhts.so's ABI (hence these fields must not be moved):
240 //  - fp is used in the public sam_itr_next()/etc macros
241 //  - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1
242 //  - is_write and is_cram are used directly in samtools <= 1.1
243 //  - fp is used directly in samtools (up to and including current develop)
244 //  - line is used directly in bcftools (up to and including current develop)
245 //  - is_bgzf and is_cram flags indicate which fp union member to use.
246 //    Note is_bgzf being set does not indicate the flag is BGZF compressed,
247 //    nor even whether it is compressed at all (eg on naked BAMs).
248 struct htsFile // @suppress(dscanner.style.phobos_naming_convention)
249 {
250     import std.bitmanip : bitfields;
251 
252     mixin(bitfields!(
253         uint, "is_bin", 1,
254         uint, "is_write", 1,
255         uint, "is_be", 1,
256         uint, "is_cram", 1,
257         uint, "is_bgzf", 1,
258         uint, "dummy", 27));
259     long lineno; /// uncompressed(?) file line no.
260     kstring_t line; /// buffer to hold line
261     char *fn;       /// filename
262     char *fn_aux;   /// auxillary (i.e, index) file name
263     /// hFile plus any needed bgzf or CRAM (if applicable) structure data
264     union FP {
265         BGZF *bgzf;     /// see bgzf.d
266         cram_fd *cram;  /// see cram.d
267         hFILE *hfile;   /// see hfile.d
268     }
269     FP fp;              /// hFile plus any needed bgzf or CRAM (if applicable) structure data
270     void* state; // format specific state information
271     htsFormat format;   /// hts file complete file format information
272     hts_idx_t* idx;
273     const(char)* fnidx;
274     sam_hdr_t* bam_header;
275     hts_filter_t* filter;
276 }
277 
278 /// A combined thread pool and queue allocation size.
279 /// The pool should already be defined, but qsize may be zero to
280 /// indicate an appropriate queue size is taken from the pool.
281 ///
282 /// Reasons for explicitly setting it could be where many more file
283 /// descriptors are in use than threads, so keeping memory low is
284 /// important.
285 struct htsThreadPool // @suppress(dscanner.style.phobos_naming_convention)
286 {
287     hts_tpool* pool; // The shared thread pool itself
288     int qsize; // Size of I/O queue to use for this fp
289 }
290 
291 /// REQUIRED_FIELDS
292 enum sam_fields // @suppress(dscanner.style.phobos_naming_convention)
293 {
294     SAM_QNAME = 0x00000001,
295     SAM_FLAG = 0x00000002,
296     SAM_RNAME = 0x00000004,
297     SAM_POS = 0x00000008,
298     SAM_MAPQ = 0x00000010,
299     SAM_CIGAR = 0x00000020,
300     SAM_RNEXT = 0x00000040,
301     SAM_PNEXT = 0x00000080,
302     SAM_TLEN = 0x00000100,
303     SAM_SEQ = 0x00000200,
304     SAM_QUAL = 0x00000400,
305     SAM_AUX = 0x00000800,
306     SAM_RGAUX = 0x00001000
307 }
308 
309 /// Mostly CRAM only, but this could also include other format options
310 enum hts_fmt_option
311 {
312     // CRAM specific
313     CRAM_OPT_DECODE_MD = 0,
314     CRAM_OPT_PREFIX = 1,
315     CRAM_OPT_VERBOSITY = 2, // obsolete, use hts_set_log_level() instead
316     CRAM_OPT_SEQS_PER_SLICE = 3,
317     CRAM_OPT_SLICES_PER_CONTAINER = 4,
318     CRAM_OPT_RANGE = 5,
319     CRAM_OPT_VERSION = 6, // rename to cram_version?
320     CRAM_OPT_EMBED_REF = 7,
321     CRAM_OPT_IGNORE_MD5 = 8,
322     CRAM_OPT_REFERENCE = 9, // make general
323     CRAM_OPT_MULTI_SEQ_PER_SLICE = 10,
324     CRAM_OPT_NO_REF = 11,
325     CRAM_OPT_USE_BZIP2 = 12,
326     CRAM_OPT_SHARED_REF = 13,
327     CRAM_OPT_NTHREADS = 14, // deprecated, use HTS_OPT_NTHREADS
328     CRAM_OPT_THREAD_POOL = 15, // make general
329     CRAM_OPT_USE_LZMA = 16,
330     CRAM_OPT_USE_RANS = 17,
331     CRAM_OPT_REQUIRED_FIELDS = 18,
332     CRAM_OPT_LOSSY_NAMES = 19,
333     CRAM_OPT_BASES_PER_SLICE = 20,
334     CRAM_OPT_STORE_MD = 21,
335     CRAM_OPT_STORE_NM = 22,
336     CRAM_OPT_RANGE_NOSEEK = 23, // CRAM_OPT_RANGE minus the seek
337     CRAM_OPT_USE_TOK = 24,
338     CRAM_OPT_USE_FQZ = 25,
339     CRAM_OPT_USE_ARITH = 26,
340     CRAM_OPT_POS_DELTA = 27, // force delta for AP, even on non-pos sorted data
341 
342     // General purpose
343     HTS_OPT_COMPRESSION_LEVEL = 100,
344     HTS_OPT_NTHREADS = 101,
345     HTS_OPT_THREAD_POOL = 102,
346     HTS_OPT_CACHE_SIZE = 103,
347     HTS_OPT_BLOCK_SIZE = 104,
348     HTS_OPT_FILTER = 105,
349     HTS_OPT_PROFILE = 106,
350 
351     // Fastq
352 
353     // Boolean.
354     // Read / Write CASAVA 1.8 format.
355     // See https://emea.support.illumina.com/content/dam/illumina-support/documents/documentation/software_documentation/bcl2fastq/bcl2fastq_letterbooklet_15038058brpmi.pdf
356     //
357     // The CASAVA tag matches \d:[YN]:\d+:[ACGTN]+
358     // The first \d is read 1/2 (1 or 2), [YN] is QC-PASS/FAIL flag,
359     // \d+ is a control number, and the sequence at the end is
360     // for barcode sequence.  Barcodes are read into the aux tag defined
361     // by FASTQ_OPT_BARCODE ("BC" by default).
362     FASTQ_OPT_CASAVA = 1000,
363 
364     // String.
365     // Whether to read / write extra SAM format aux tags from the fastq
366     // identifier line.  For reading this can simply be "1" to request
367     // decoding aux tags.  For writing it is a comma separated list of aux
368     // tag types to be written out.
369     FASTQ_OPT_AUX = 1001,
370 
371     // Boolean.
372     // Whether to add /1 and /2 to read identifiers when writing FASTQ.
373     // These come from the BAM_FREAD1 or BAM_FREAD2 flags.
374     // (Detecting the /1 and /2 is automatic when reading fastq.)
375     FASTQ_OPT_RNUM = 1002,
376 
377     // Two character string.
378     // Barcode aux tag for CASAVA; defaults to "BC".
379     FASTQ_OPT_BARCODE = 1003,
380 }
381 
382 /// Profile options for encoding; primarily used at present in CRAM
383 /// but also usable in BAM as a synonym for deflate compression levels.
384 enum hts_profile_option
385 {
386     HTS_PROFILE_FAST = 0,
387     HTS_PROFILE_NORMAL = 1,
388     HTS_PROFILE_SMALL = 2,
389     HTS_PROFILE_ARCHIVE = 3
390 }
391 
392 /// For backwards compatibility
393 alias cram_option = hts_fmt_option;
394 
395 /// Options for cache, (de)compression, threads, CRAM, etc.
396 struct hts_opt // @suppress(dscanner.style.phobos_naming_convention)
397 {
398     char *arg;          /// string form, strdup()ed
399     hts_fmt_option opt; /// tokenised key
400     /// option value
401     union VAL {         /// ... and value
402         int i;          /// int value
403         char *s;        /// string value
404     }
405     VAL val;            /// value
406     hts_opt *next;      /// next option (linked list)
407 }
408 
409 /*
410  * Explicit index file name delimiter, see below
411  */
412 enum HTS_IDX_DELIM = "##idx##";
413 
414 /**********************
415  * Exported functions *
416  **********************/
417 
418 /**
419  * Parses arg and appends it to the option list.
420  *
421  * Returns 0 on success;
422  *        -1 on failure.
423  */
424 int hts_opt_add(hts_opt** opts, const(char)* c_arg);
425 
426 /**
427  * Applies an hts_opt option list to a given htsFile.
428  *
429  * Returns 0 on success
430  *        -1 on failure
431  */
432 int hts_opt_apply(htsFile* fp, hts_opt* opts);
433 
434 /**
435  * Frees an hts_opt list.
436  */
437 void hts_opt_free(hts_opt* opts);
438 
439 /**
440  * Accepts a string file format (sam, bam, cram, vcf, bam) optionally
441  * followed by a comma separated list of key=value options and splits
442  * these up into the fields of htsFormat struct.
443  *
444  * Returns 0 on success
445  *        -1 on failure.
446  */
447 int hts_parse_format(htsFormat* opt, const(char)* str);
448 
449 /**
450  * Tokenise options as (key(=value)?,)*(key(=value)?)?
451  * NB: No provision for ',' appearing in the value!
452  * Add backslashing rules?
453  *
454  * This could be used as part of a general command line option parser or
455  * as a string concatenated onto the file open mode.
456  *
457  * Returns 0 on success
458  *        -1 on failure.
459  */
460 int hts_parse_opt_list(htsFormat* opt, const(char)* str);
461 
462 /*! @abstract Table for converting a nucleotide character to 4-bit encoding.
463 The input character may be either an IUPAC ambiguity code, '=' for 0, or
464 '0'/'1'/'2'/'3' for a result of 1/2/4/8.  The result is encoded as 1/2/4/8
465 for A/C/G/T or combinations of these bits for ambiguous bases.
466 */
467 
468 version(Windows){
469     __gshared const(ubyte)[256] seq_nt16_table = [
470         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
471         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
472         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
473         1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,
474         15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
475         15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
476         15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
477         15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
478 
479         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
480         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
481         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
482         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
483         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
484         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
485         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
486         15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
487     ];
488 }else{
489     extern __gshared const(ubyte)[256] seq_nt16_table;
490 }
491 /**! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC
492 ambiguity code letter (or '=' when given 0).
493 */
494 
495 version(Windows) __gshared const(char)[16] seq_nt16_str = ['=','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N'];
496 else extern __gshared const(char)[16] seq_nt16_str;
497 
498 /**! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits.
499 Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous).
500 */
501 version(Windows) __gshared const(int)[16] seq_nt16_int = [ 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 ];
502 else extern __gshared const(int)[16] seq_nt16_int;
503 /**!
504   @abstract  Get the htslib version number
505   @return    For released versions, a string like "N.N[.N]"; or git describe
506   output if using a library built within a Git repository.
507 */
508 const(char)* hts_version();
509 
510 /**!
511   @abstract  Compile-time HTSlib version number, for use in #if checks
512   @return    For released versions X.Y[.Z], an integer of the form XYYYZZ;
513   useful for preprocessor conditionals such as
514       #if HTS_VERSION >= 101000  // Check for v1.10 or later
515 */
516 // Maintainers: Bump this in the final stage of preparing a new release.
517 // Immediately after release, bump ZZ to 90 to distinguish in-development
518 // Git repository builds from the release; you may wish to increment this
519 // further when significant features are merged.
520 enum HTS_VERSION = 101300;
521 
522 /**! @abstract Introspection on the features enabled in htslib
523  *
524  * @return a bitfield of HTS_FEATURE_* macros.
525  */
526 uint hts_features();
527 
528 const(char)* hts_test_feature(uint id);
529 
530 /**! @abstract Introspection on the features enabled in htslib, string form
531  *
532  * @return a string describing htslib build features
533  */
534 const(char)* hts_feature_string();
535 
536 /// Whether ./configure was used or vanilla Makefile
537 enum HTS_FEATURE_CONFIGURE = 1;
538 
539 /// Whether --enable-plugins was used
540 enum HTS_FEATURE_PLUGINS = 2;
541 
542 /// Transport specific
543 enum HTS_FEATURE_LIBCURL = 1u << 10;
544 enum HTS_FEATURE_S3 = 1u << 11;
545 enum HTS_FEATURE_GCS = 1u << 12;
546 
547 /// Compression options
548 enum HTS_FEATURE_LIBDEFLATE = 1u << 20;
549 enum HTS_FEATURE_LZMA = 1u << 21;
550 enum HTS_FEATURE_BZIP2 = 1u << 22;
551 enum HTS_FEATURE_HTSCODECS = 1u << 23; // htscodecs library version
552 
553 /// Build params
554 enum HTS_FEATURE_CC = 1u << 27;
555 enum HTS_FEATURE_CFLAGS = 1u << 28;
556 enum HTS_FEATURE_CPPFLAGS = 1u << 29;
557 enum HTS_FEATURE_LDFLAGS = 1u << 30;
558 
559 /**!
560   @abstract    Determine format by peeking at the start of a file
561   @param fp    File opened for reading, positioned at the beginning
562   @param fmt   Format structure that will be filled out on return
563   @return      0 for success, or negative if an error occurred.
564 */
565 int hts_detect_format(hFILE* fp, htsFormat* fmt);
566 
567 /**!
568   @abstract    Get a human-readable description of the file format
569   @param fmt   Format structure holding type, version, compression, etc.
570   @return      Description string, to be freed by the caller after use.
571 */
572 char* hts_format_description(const(htsFormat)* format);
573 
574 /**!
575   @abstract       Open a sequence data (SAM/BAM/CRAM) or variant data (VCF/BCF)
576                   or possibly-compressed textual line-orientated file
577   @param fn       The file name or "-" for stdin/stdout. For indexed files
578                   with a non-standard naming, the file name can include the
579                   name of the index file delimited with HTS_IDX_DELIM
580   @param mode     Mode matching / [rwa][bcefFguxz0-9]* /
581   @discussion
582       With 'r' opens for reading; any further format mode letters are ignored
583       as the format is detected by checking the first few bytes or BGZF blocks
584       of the file.  With 'w' or 'a' opens for writing or appending, with format
585       specifier letters:
586         b  binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc)
587         c  CRAM format
588         g  gzip compressed
589         u  uncompressed
590         z  bgzf compressed
591         [0-9]  zlib compression level
592       and with non-format option letters (for any of 'r'/'w'/'a'):
593         e  close the file on exec(2) (opens with O_CLOEXEC, where supported)
594         x  create the file exclusively (opens with O_EXCL, where supported)
595       Note that there is a distinction between 'u' and '0': the first yields
596       plain uncompressed output whereas the latter outputs uncompressed data
597       wrapped in the zlib format.
598   @example
599       [rw]b  .. compressed BCF, BAM, FAI
600       [rw]bu .. uncompressed BCF
601       [rw]z  .. compressed VCF
602       [rw]   .. uncompressed VCF
603 */
604 htsFile* hts_open(const(char)* fn, const(char)* mode);
605 
606 /**!
607   @abstract       Open a SAM/BAM/CRAM/VCF/BCF/etc file
608   @param fn       The file name or "-" for stdin/stdout
609   @param mode     Open mode, as per hts_open()
610   @param fmt      Optional format specific parameters
611   @discussion
612       See hts_open() for description of fn and mode.
613       // TODO Update documentation for s/opts/fmt/
614       Opts contains a format string (sam, bam, cram, vcf, bcf) which will,
615       if defined, override mode.  Opts also contains a linked list of hts_opt
616       structures to apply to the open file handle.  These can contain things
617       like pointers to the reference or information on compression levels,
618       block sizes, etc.
619 */
620 htsFile* hts_open_format(
621     const(char)* fn,
622     const(char)* mode,
623     const(htsFormat)* fmt);
624 
625 /**!
626   @abstract       Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file
627   @param fn       The already-open file handle
628   @param mode     Open mode, as per hts_open()
629 */
630 htsFile* hts_hopen(hFILE* fp, const(char)* fn, const(char)* mode);
631 
632 /**!
633   @abstract  Close a file handle, flushing buffered data for output streams
634   @param fp  The file handle to be closed
635   @return    0 for success, or negative if an error occurred.
636 */
637 int hts_close(htsFile* fp);
638 
639 /**!
640   @abstract  Returns the file's format information
641   @param fp  The file handle
642   @return    Read-only pointer to the file's htsFormat.
643 */
644 const(htsFormat)* hts_get_format(htsFile* fp);
645 
646 /**!
647   @ abstract      Returns a string containing the file format extension.
648   @ param format  Format structure containing the file type.
649   @ return        A string ("sam", "bam", etc) or "?" for unknown formats.
650  */
651 const(char)* hts_format_file_extension(const(htsFormat)* format);
652 
653 /**!
654   @abstract  Sets a specified CRAM option on the open file handle.
655   @param fp  The file handle open the open file.
656   @param opt The CRAM_OPT_* option.
657   @param ... Optional arguments, dependent on the option used.
658   @return    0 for success, or negative if an error occurred.
659 */
660 int hts_set_opt(htsFile* fp, hts_fmt_option opt, ...);
661 
662 /**!
663   @abstract         Read a line (and its \n or \r\n terminator) from a file
664   @param fp         The file handle
665   @param delimiter  Unused, but must be '\n' (or KS_SEP_LINE)
666   @param str        The line (not including the terminator) is written here
667   @return           Length of the string read;
668                     -1 on end-of-file; <= -2 on error
669 */
670 int hts_getline(htsFile* fp, int delimiter, kstring_t* str);
671 
672 char** hts_readlines(const(char)* fn, int* _n);
673 /**!
674     @abstract       Parse comma-separated list or read list from a file
675     @param list     File name or comma-separated list
676     @param is_file
677     @param _n       Size of the output array (number of items read)
678     @return         NULL on failure or pointer to newly allocated array of
679                     strings
680 */
681 char** hts_readlist(const(char)* fn, int is_file, int* _n);
682 
683 /**!
684   @abstract  Create extra threads to aid compress/decompression for this file
685   @param fp  The file handle
686   @param n   The number of worker threads to create
687   @return    0 for success, or negative if an error occurred.
688   @notes     This function creates non-shared threads for use solely by fp.
689              The hts_set_thread_pool function is the recommended alternative.
690 */
691 int hts_set_threads(htsFile* fp, int n);
692 
693 /**!
694   @abstract  Create extra threads to aid compress/decompression for this file
695   @param fp  The file handle
696   @param p   A pool of worker threads, previously allocated by hts_create_threads().
697   @return    0 for success, or negative if an error occurred.
698 */
699 int hts_set_thread_pool(htsFile* fp, htsThreadPool* p);
700 
701 /**!
702   @abstract  Adds a cache of decompressed blocks, potentially speeding up seeks.
703              This may not work for all file types (currently it is bgzf only).
704   @param fp  The file handle
705   @param n   The size of cache, in bytes
706 */
707 void hts_set_cache_size(htsFile* fp, int n);
708 
709 /*!
710   @abstract  Set .fai filename for a file opened for reading
711   @return    0 for success, negative on failure
712   @discussion
713       Called before *_hdr_read(), this provides the name of a .fai file
714       used to provide a reference list if the htsFile contains no @SQ headers.
715 */
716 int hts_set_fai_filename(htsFile* fp, const(char)* fn_aux);
717 
718 /**!
719   @abstract  Sets a filter expression
720   @return    0 for success, negative on failure
721   @discussion
722       To clear an existing filter, specifying expr as NULL.
723 */
724 int hts_set_filter_expression(htsFile* fp, const(char)* expr);
725 
726 /**!
727   @abstract  Determine whether a given htsFile contains a valid EOF block
728   @return    3 for a non-EOF checkable filetype;
729              2 for an unseekable file type where EOF cannot be checked;
730              1 for a valid EOF block;
731              0 for if the EOF marker is absent when it should be present;
732             -1 (with errno set) on failure
733   @discussion
734       Check if the BGZF end-of-file (EOF) marker is present
735 */
736 int hts_check_EOF(htsFile* fp);
737 
738 /************
739  * Indexing *
740  ************/
741 
742 /**!
743 These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc,
744 producing iterators operating as follows:
745  - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file
746  - HTS_IDX_START  iterates over the entire file
747  - HTS_IDX_REST   iterates from the current position to the end of the file
748  - HTS_IDX_NONE   always returns "no more alignment records"
749 When one of these special tid values is used, beg and end are ignored.
750 When REST or NONE is used, idx is also ignored and may be NULL.
751 */
752 enum HTS_IDX_NOCOOR = -2;
753 enum HTS_IDX_START = -3;
754 enum HTS_IDX_REST = -4;
755 enum HTS_IDX_NONE = -5;
756 
757 enum HTS_FMT_CSI = 0;
758 enum HTS_FMT_BAI = 1;
759 enum HTS_FMT_TBI = 2;
760 enum HTS_FMT_CRAI = 3;
761 enum HTS_FMT_FAI = 4;
762 
763 // Almost INT64_MAX, but when cast into a 32-bit int it's
764 // also INT_MAX instead of -1.  This avoids bugs with old code
765 // using the new hts_pos_t data type.
766 enum HTS_POS_MAX = ((cast(long) INT_MAX) << 32) | INT_MAX;
767 enum HTS_POS_MIN = INT64_MIN;
768 enum PRIhts_pos = PRId64;
769 alias hts_pos_t = c_long;
770 
771 // For comparison with previous release:
772 //
773 // #define HTS_POS_MAX INT_MAX
774 // #define HTS_POS_MIN INT_MIN
775 // #define PRIhts_pos PRId32
776 // typedef int32_t hts_pos_t;
777 
778 struct hts_pair_pos_t
779 {
780     hts_pos_t beg;
781     hts_pos_t end;
782 }
783 
784 alias hts_pair32_t = hts_pair_pos_t; // For backwards compatibility
785 
786 struct hts_pair64_t // @suppress(dscanner.style.phobos_naming_convention)
787 {
788     ulong u;
789     ulong v;
790 }
791 
792 /// 64-bit start, end coordinate pair tracking max (internally used in hts.c)
793 struct hts_pair64_max_t // @suppress(dscanner.style.phobos_naming_convention)
794 {
795     ulong u;
796     ulong v;
797     ulong max;
798 }
799 
800 /// Region list used in iterators (NB: apparently confined to single contig/tid)
801 struct hts_reglist_t
802 {
803     const(char) *reg;   /// Region string
804     hts_pair_pos_t *intervals;  /// (start,end) intervals
805     int tid;            /// Contig id
806     uint count;         /// How many intervals
807     hts_pos_t min_beg;  /// absolute bounds
808     hts_pos_t max_end;  /// absolute bounds
809 }
810 
811 alias hts_readrec_func = int function(BGZF* fp, void* data, void* r, int* tid, hts_pos_t* beg, hts_pos_t* end);
812 alias hts_seek_func = int function(void* fp, long offset, int where);
813 alias hts_tell_func = c_long function(void* fp);
814 
815 /**
816  * @brief File iterator that can handle multiple target regions.
817  * This structure should be considered opaque by end users.
818  * It does both the stepping inside the file and the filtering of alignments.
819  * It can operate in single or multi-region mode, and depending on this,
820  * it uses different fields.
821  *
822  * read_rest (1) - read everything from the current offset, without filtering
823  * finished  (1) - no more iterations
824  * is_cram   (1) - current file has CRAM format
825  * nocoor    (1) - read all unmapped reads
826  *
827  * multi     (1) - multi-region moode
828  * reg_list  - List of target regions
829  * n_reg     - Size of the above list
830  * curr_reg  - List index of the current region of search
831  * curr_intv - Interval index inside the current region; points to a (beg, end)
832  * end       - Used for CRAM files, to preserve the max end coordinate
833  *
834  * multi     (0) - single-region mode
835  * tid       - Reference id of the target region
836  * beg       - Start position of the target region
837  * end       - End position of the target region
838  *
839  * Common fields:
840  * off        - List of file offsets computed from the index
841  * n_off      - Size of the above list
842  * i          - List index of the current file offset
843  * curr_off   - File offset for the next file read
844  * curr_tid   - Reference id of the current alignment
845  * curr_beg   - Start position of the current alignment
846  * curr_end   - End position of the current alignment
847  * nocoor_off - File offset where the unmapped reads start
848  *
849  * readrec    - File specific function that reads an alignment
850  * seek       - File specific function for changing the file offset
851  * tell       - File specific function for indicating the file offset
852  */
853 
854 struct hts_itr_t // @suppress(dscanner.style.phobos_naming_convention)
855 {
856     import std.bitmanip : bitfields;
857 
858     mixin(bitfields!(
859         uint, "read_rest", 1,
860         uint, "finished", 1,
861         uint, "is_cram", 1,
862         uint, "nocoor", 1,
863         uint, "multi", 1,
864         uint, "dummy", 27));
865 
866     int tid;
867     int n_off;
868     int i;
869     int n_reg;
870     hts_pos_t beg;
871     hts_pos_t end;
872     hts_reglist_t* reg_list;
873     int curr_tid;
874     int curr_reg;
875     int curr_intv;
876     hts_pos_t curr_beg;
877     hts_pos_t curr_end;
878     ulong curr_off;
879     ulong nocoor_off;
880     hts_pair64_max_t* off;
881     int function() readrec;
882     int function() seek;
883     long function() tell;
884 
885     struct Bins
886     {
887         int n;
888         int m;
889         int* a;
890     }
891 
892     Bins bins;
893 }
894 /// ? index key
895 struct aux_key_t { // @suppress(dscanner.style.phobos_naming_convention)
896     int key;    /// ???
897     /// ???
898     ulong min_off, max_off;
899 }
900 
901 alias hts_itr_multi_t = hts_itr_t;
902 
903 pragma(inline, true)
904 extern (D) auto hts_bin_first(T)(auto ref T l)
905 {
906     return ((1 << ((l << 1) + l)) - 1) / 7;
907 }
908 
909 pragma(inline, true)
910 extern (D) auto hts_bin_parent(T)(auto ref T b)
911 {
912     return (b - 1) >> 3;
913 }
914 
915 ///////////////////////////////////////////////////////////
916 // Low-level API for building indexes.
917 
918 /// Create a BAI/CSI/TBI type index structure
919 /** @param n          Initial number of targets
920     @param fmt        Format, one of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI
921     @param offset0    Initial file offset
922     @param min_shift  Number of bits for the minimal interval
923     @param n_lvls     Number of levels in the binning index
924     @return An initialised hts_idx_t struct on success; NULL on failure
925 
926 The struct returned by a successful call should be freed via hts_idx_destroy()
927 when it is no longer needed.
928 */
929 hts_idx_t* hts_idx_init(
930     int n,
931     int fmt,
932     ulong offset0,
933     int min_shift,
934     int n_lvls);
935 
936 /// Free a BAI/CSI/TBI type index
937 /** @param idx   Index structure to free
938  */
939 void hts_idx_destroy(hts_idx_t* idx);
940 
941 /// Push an index entry
942 /** @param idx        Index
943     @param tid        Target id
944     @param beg        Range start (zero-based)
945     @param end        Range end (zero-based, half-open)
946     @param offset     File offset
947     @param is_mapped  Range corresponds to a mapped read
948     @return 0 on success; -1 on failure
949 
950 The @p is_mapped parameter is used to update the n_mapped / n_unmapped counts
951 stored in the meta-data bin.
952  */
953 int hts_idx_push(
954     hts_idx_t* idx,
955     int tid,
956     hts_pos_t beg,
957     hts_pos_t end,
958     ulong offset,
959     int is_mapped);
960 
961 /// Finish building an index
962 /** @param idx          Index
963     @param final_offset Last file offset
964     @return 0 on success; non-zero on failure.
965 */
966 int hts_idx_finish(hts_idx_t* idx, ulong final_offset);
967 
968 /// Returns index format
969 /** @param idx   Index
970     @return One of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI
971 */
972 int hts_idx_fmt(hts_idx_t* idx);
973 
974 /// Add name to TBI index meta-data
975 /** @param idx   Index
976     @param tid   Target identifier
977     @param name  Target name
978     @return Index number of name in names list on success; -1 on failure.
979 */
980 int hts_idx_tbi_name(hts_idx_t* idx, int tid, const(char)* name);
981 
982 // Index loading and saving
983 
984 /// Save an index to a file
985 /** @param idx  Index to be written
986     @param fn   Input BAM/BCF/etc filename, to which .bai/.csi/etc will be added
987     @param fmt  One of the HTS_FMT_* index formats
988     @return  0 if successful, or negative if an error occurred.
989 */
990 int hts_idx_save(const(hts_idx_t)* idx, const(char)* fn, int fmt);
991 
992 /// Save an index to a specific file
993 /** @param idx    Index to be written
994     @param fn     Input BAM/BCF/etc filename
995     @param fnidx  Output filename, or NULL to add .bai/.csi/etc to @a fn
996     @param fmt    One of the HTS_FMT_* index formats
997     @return  0 if successful, or negative if an error occurred.
998 */
999 int hts_idx_save_as(
1000     const(hts_idx_t)* idx,
1001     const(char)* fn,
1002     const(char)* fnidx,
1003     int fmt);
1004 
1005 /// Load an index file
1006 /** @param fn   BAM/BCF/etc filename, to which .bai/.csi/etc will be added or
1007                 the extension substituted, to search for an existing index file.
1008                 In case of a non-standard naming, the file name can include the
1009                 name of the index file delimited with HTS_IDX_DELIM.
1010     @param fmt  One of the HTS_FMT_* index formats
1011     @return  The index, or NULL if an error occurred.
1012 
1013 If @p fn contains the string "##idx##" (HTS_IDX_DELIM), the part before
1014 the delimiter will be used as the name of the data file and the part after
1015 it will be used as the name of the index.
1016 
1017 Otherwise, this function tries to work out the index name as follows:
1018 
1019   It will try appending ".csi" to @p fn
1020   It will try substituting an existing suffix (e.g. .bam, .vcf) with ".csi"
1021   Then, if @p fmt is HTS_FMT_BAI:
1022     It will try appending ".bai" to @p fn
1023     To will substituting the existing suffix (e.g. .bam) with ".bai"
1024   else if @p fmt is HTS_FMT_TBI:
1025     It will try appending ".tbi" to @p fn
1026     To will substituting the existing suffix (e.g. .vcf) with ".tbi"
1027 
1028 If the index file is remote (served over a protocol like https), first a check
1029 is made to see is a locally cached copy is available.  This is done for all
1030 of the possible names listed above.  If a cached copy is not available then
1031 the index will be downloaded and stored in the current working directory,
1032 with the same name as the remote index.
1033 
1034     Equivalent to hts_idx_load3(fn, NULL, fmt, HTS_IDX_SAVE_REMOTE);
1035 */
1036 hts_idx_t* hts_idx_load(const(char)* fn, int fmt);
1037 
1038 /// Load a specific index file
1039 /** @param fn     Input BAM/BCF/etc filename
1040     @param fnidx  The input index filename
1041     @return  The index, or NULL if an error occurred.
1042 
1043     Equivalent to hts_idx_load3(fn, fnidx, 0, 0);
1044 
1045     This function will not attempt to save index files locally.
1046 */
1047 hts_idx_t* hts_idx_load2(const(char)* fn, const(char)* fnidx);
1048 
1049 /// Load a specific index file
1050 /** @param fn     Input BAM/BCF/etc filename
1051     @param fnidx  The input index filename
1052     @param fmt    One of the HTS_FMT_* index formats
1053     @param flags  Flags to alter behaviour (see description)
1054     @return  The index, or NULL if an error occurred.
1055 
1056     If @p fnidx is NULL, the index name will be derived from @p fn in the
1057     same way as hts_idx_load().
1058 
1059     If @p fnidx is not NULL, @p fmt is ignored.
1060 
1061     The @p flags parameter can be set to a combination of the following
1062     values:
1063 
1064         HTS_IDX_SAVE_REMOTE   Save a local copy of any remote indexes
1065         HTS_IDX_SILENT_FAIL   Fail silently if the index is not present
1066 
1067     The index struct returned by a successful call should be freed
1068     via hts_idx_destroy() when it is no longer needed.
1069 */
1070 hts_idx_t* hts_idx_load3(
1071     const(char)* fn,
1072     const(char)* fnidx,
1073     int fmt,
1074     int flags);
1075 
1076 /// Flags for hts_idx_load3() ( and also sam_idx_load3(), tbx_idx_load3() )
1077 enum HTS_IDX_FLAG : int 
1078 {
1079     HTS_IDX_SAVE_REMOTE = 1,
1080     HTS_IDX_SILENT_FAIL = 2
1081 }
1082 ///////////////////////////////////////////////////////////
1083 // Functions for accessing meta-data stored in indexes
1084 
1085 alias hts_id2name_f = const(char)* function(void*, int);
1086 
1087 /// Get extra index meta-data
1088 /** @param idx    The index
1089     @param l_meta Pointer to where the length of the extra data is stored
1090     @return Pointer to the extra data if present; NULL otherwise
1091 
1092     Indexes (both .tbi and .csi) made by tabix include extra data about
1093     the indexed file.  The returns a pointer to this data.  Note that the
1094     data is stored exactly as it is in the index.  Callers need to interpret
1095     the results themselves, including knowing what sort of data to expect;
1096     byte swapping etc.
1097 */
1098 ubyte* hts_idx_get_meta(hts_idx_t* idx, uint* l_meta);
1099 
1100 /// Set extra index meta-data
1101 /** @param idx     The index
1102     @param l_meta  Length of data
1103     @param meta    Pointer to the extra data
1104     @param is_copy If not zero, a copy of the data is taken
1105     @return 0 on success; -1 on failure (out of memory).
1106 
1107     Sets the data that is returned by hts_idx_get_meta().
1108 
1109     If is_copy != 0, a copy of the input data is taken.  If not, ownership of
1110     the data pointed to by *meta passes to the index.
1111 */
1112 int hts_idx_set_meta(hts_idx_t* idx, uint l_meta, ubyte* meta, int is_copy);
1113 
1114 /// Get number of mapped and unmapped reads from an index
1115 /** @param      idx      Index
1116     @param      tid      Target ID
1117     @param[out] mapped   Location to store number of mapped reads
1118     @param[out] unmapped Location to store number of unmapped reads
1119     @return 0 on success; -1 on failure (data not available)
1120 
1121     BAI and CSI indexes store information on the number of reads for each
1122     target that were mapped or unmapped (unmapped reads will generally have
1123     a paired read that is mapped to the target).  This function returns this
1124     information if it is available.
1125 
1126     @note Cram CRAI indexes do not include this information.
1127 */
1128 int hts_idx_get_stat(
1129     const(hts_idx_t)* idx,
1130     int tid,
1131     ulong* mapped,
1132     ulong* unmapped);
1133 
1134 /// Return the number of unplaced reads from an index
1135 /** @param idx    Index
1136     @return Unplaced reads count
1137 
1138     Unplaced reads are not linked to any reference (e.g. RNAME is '*' in SAM
1139     files).
1140 */
1141 ulong hts_idx_get_n_no_coor(const(hts_idx_t)* idx);
1142 
1143 /// Return a list of target names from an index
1144 /** @param      idx    Index
1145     @param[out] n      Location to store the number of targets
1146     @param      getid  Callback function to get the name for a target ID
1147     @param      hdr    Header from indexed file
1148     @return An array of pointers to the names on success; NULL on failure
1149 
1150     @note The names are pointers into the header data structure.  When cleaning
1151     up, only the array should be freed, not the names.
1152  */
1153 const(char *)* hts_idx_seqnames(const(hts_idx_t)* idx, int* n, hts_id2name_f getid, void* hdr); // free only the array, not the values
1154 
1155 /// Return the number of targets from an index
1156 /** @param      idx    Index
1157     @return The number of targets
1158  */
1159 int hts_idx_nseq(const(hts_idx_t) *idx);
1160 
1161 ///////////////////////////////////////////////////////////
1162 // Region parsing
1163 
1164 enum HTS_PARSE_FLAGS : int 
1165 {
1166     HTS_PARSE_THOUSANDS_SEP = 1, ///< Ignore ',' separators within numbers
1167     HTS_PARSE_ONE_COORD = 2, ///< chr:pos means chr:pos-pos and not chr:pos-end
1168     HTS_PARSE_LIST = 4, ///< Expect a comma separated list of regions. (Disables HTS_PARSE_THOUSANDS_SEP)
1169 }
1170 
1171 /// Parse a numeric string
1172 /** The number may be expressed in scientific notation, and optionally may
1173     contain commas in the integer part (before any decimal point or E notation).
1174     @param str     String to be parsed
1175     @param strend  If non-NULL, set on return to point to the first character
1176                    in @a str after those forming the parsed number
1177     @param flags   Or'ed-together combination of HTS_PARSE_* flags
1178     @return  Converted value of the parsed number.
1179 
1180     When @a strend is NULL, a warning will be printed (if hts_verbose is HTS_LOG_WARNING
1181     or more) if there are any trailing characters after the number.
1182 */
1183 long hts_parse_decimal(const(char)* str, char** strend, HTS_PARSE_FLAGS flags);
1184 
1185 alias hts_name2id_f = int function(void*, const(char)*) *;
1186 
1187 /// Parse a "CHR:START-END"-style region string
1188 /** @param str  String to be parsed
1189     @param beg  Set on return to the 0-based start of the region
1190     @param end  Set on return to the 1-based end of the region
1191     @return  Pointer to the colon or '\0' after the reference sequence name,
1192              or NULL if @a str could not be parsed.
1193 
1194     NOTE: For compatibility with hts_parse_reg only.
1195     Please use hts_parse_region instead.
1196 */
1197 const(char)* hts_parse_reg64(const(char)* str, hts_pos_t* beg, hts_pos_t* end);
1198 
1199 /// Parse a "CHR:START-END"-style region string
1200 /** @param str  String to be parsed
1201     @param beg  Set on return to the 0-based start of the region
1202     @param end  Set on return to the 1-based end of the region
1203     @return  Pointer to the colon or '\0' after the reference sequence name,
1204              or NULL if @a str could not be parsed.
1205 */
1206 const(char)* hts_parse_reg(const(char)* str, int* beg, int* end);
1207 
1208 /// Parse a "CHR:START-END"-style region string
1209 /** @param str   String to be parsed
1210     @param tid   Set on return (if not NULL) to be reference index (-1 if invalid)
1211     @param beg   Set on return to the 0-based start of the region
1212     @param end   Set on return to the 1-based end of the region
1213     @param getid Function pointer.  Called if not NULL to set tid.
1214     @param hdr   Caller data passed to getid.
1215     @param flags Bitwise HTS_PARSE_* flags listed above.
1216     @return      Pointer to the byte after the end of the entire region
1217                  specifier (including any trailing comma) on success,
1218                  or NULL if @a str could not be parsed.
1219 
1220     A variant of hts_parse_reg which is reference-id aware.  It uses
1221     the iterator name2id callbacks to validate the region tokenisation works.
1222 
1223     This is necessary due to GRCh38 HLA additions which have reference names
1224     like "HLA-DRB1*12:17".
1225 
1226     To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200"
1227     are reference names, quote using curly braces.
1228     Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example.
1229 
1230     Flags are used to control how parsing works, and can be one of the below.
1231 
1232     HTS_PARSE_THOUSANDS_SEP:
1233         Ignore commas in numbers.  For example with this flag 1,234,567
1234         is interpreted as 1234567.
1235 
1236     HTS_PARSE_LIST:
1237         If present, the region is assmed to be a comma separated list and
1238         position parsing will not contain commas (this implicitly
1239         clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal).
1240         On success the return pointer will be the start of the next region, ie
1241         the character after the comma.  (If *ret != '\0' then the caller can
1242         assume another region is present in the list.)
1243 
1244         If not set then positions may contain commas.  In this case the return
1245         value should point to the end of the string, or NULL on failure.
1246 
1247     HTS_PARSE_ONE_COORD:
1248         If present, X:100 is treated as the single base pair region X:100-100.
1249         In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-<end>.
1250         (This is the standard bcftools region convention.)
1251 
1252         When not set X:100 is considered to be X:100-<end> where <end> is
1253         the end of chromosome X (set to INT_MAX here).  X:100- and X:-100 are
1254         invalid.
1255         (This is the standard samtools region convention.)
1256 
1257     Note the supplied string expects 1 based inclusive coordinates, but the
1258     returned coordinates start from 0 and are half open, so pos0 is valid
1259     for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}"
1260 
1261     If NULL is returned, the value in tid mat give additional information
1262     about the error:
1263 
1264         -2   Failed to parse @p hdr; or out of memory
1265         -1   The reference in @p str has mismatched braces, or does not
1266              exist in @p hdr
1267         >= 0 The specified range in @p str could not be parsed
1268 */
1269 const(char)* hts_parse_region(
1270     const(char)* s,
1271     int* tid,
1272     hts_pos_t* beg,
1273     hts_pos_t* end,
1274     hts_name2id_f getid,
1275     void* hdr,
1276     HTS_PARSE_FLAGS flags);
1277 
1278 ///////////////////////////////////////////////////////////
1279 // Generic iterators
1280 //
1281 // These functions provide the low-level infrastructure for iterators.
1282 // Wrappers around these are used to make iterators for specific file types.
1283 // See:
1284 //     htslib/sam.h  for SAM/BAM/CRAM iterators
1285 //     htslib/vcf.h  for VCF/BCF iterators
1286 //     htslib/tbx.h  for files indexed by tabix
1287 
1288 /// Create a single-region iterator
1289 /** @param idx      Index
1290     @param tid      Target ID
1291     @param beg      Start of region
1292     @param end      End of region
1293     @param readrec  Callback to read a record from the input file
1294     @return An iterator on success; NULL on failure
1295 
1296     The iterator struct returned by a successful call should be freed
1297     via hts_itr_destroy() when it is no longer needed.
1298  */
1299 hts_itr_t* hts_itr_query(
1300     const(hts_idx_t)* idx,
1301     int tid,
1302     hts_pos_t beg,
1303     hts_pos_t end,
1304     hts_readrec_func readrec);
1305 
1306 /// Free an iterator
1307 /** @param iter   Iterator to free
1308  */
1309 void hts_itr_destroy(hts_itr_t* iter);
1310 
1311 alias hts_itr_query_func = hts_itr_t* function(const(hts_idx_t)* idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func readrec);
1312 
1313 /// Create a single-region iterator from a text region specification
1314 /** @param idx       Index
1315     @param reg       Region specifier
1316     @param getid     Callback function to return the target ID for a name
1317     @param hdr       Input file header
1318     @param itr_query Callback function returning an iterator for a numeric tid,
1319                      start and end position
1320     @param readrec   Callback to read a record from the input file
1321     @return An iterator on success; NULL on error
1322 
1323     The iterator struct returned by a successful call should be freed
1324     via hts_itr_destroy() when it is no longer needed.
1325  */
1326 hts_itr_t* hts_itr_querys(
1327     const(hts_idx_t)* idx,
1328     const(char)* reg,
1329     hts_name2id_f getid,
1330     void* hdr,
1331     hts_itr_query_func itr_query,
1332     hts_readrec_func readrec);
1333 
1334 /// Return the next record from an iterator
1335 /** @param fp      Input file handle
1336     @param iter    Iterator
1337     @param r       Pointer to record placeholder
1338     @param data    Data passed to the readrec callback
1339     @return >= 0 on success, -1 when there is no more data, < -1 on error
1340  */
1341 int hts_itr_next(BGZF* fp, hts_itr_t* iter, void* r, void* data);
1342 
1343 
1344 /**********************************
1345  * Iterator with multiple regions *
1346  **********************************/
1347 
1348 alias hts_itr_multi_query_func = int function(const(hts_idx_t)* idx, hts_itr_t* itr);
1349 int hts_itr_multi_bam(const(hts_idx_t)* idx, hts_itr_t* iter);
1350 int hts_itr_multi_cram(const(hts_idx_t)* idx, hts_itr_t* iter);
1351 
1352 /// Create a multi-region iterator from a region list
1353 /** @param idx          Index
1354     @param reglist      Region list
1355     @param count        Number of items in region list
1356     @param getid        Callback to convert names to target IDs
1357     @param hdr          Indexed file header (passed to getid)
1358     @param itr_specific Filetype-specific callback function
1359     @param readrec      Callback to read an input file record
1360     @param seek         Callback to seek in the input file
1361     @param tell         Callback to return current input file location
1362     @return An iterator on success; NULL on failure
1363 
1364     The iterator struct returned by a successful call should be freed
1365     via hts_itr_destroy() when it is no longer needed.
1366  */
1367 hts_itr_t* hts_itr_regions(
1368     const(hts_idx_t)* idx,
1369     hts_reglist_t* reglist,
1370     int count,
1371     hts_name2id_f getid,
1372     void* hdr,
1373     hts_itr_multi_query_func itr_specific,
1374     hts_readrec_func readrec,
1375     hts_seek_func seek,
1376     hts_tell_func tell);
1377 
1378 /// Return the next record from an iterator
1379 /** @param fp      Input file handle
1380     @param iter    Iterator
1381     @param r       Pointer to record placeholder
1382     @return >= 0 on success, -1 when there is no more data, < -1 on error
1383  */
1384 int hts_itr_multi_next(htsFile* fd, hts_itr_t* iter, void* r);
1385 
1386 /// Create a region list from a char array
1387 /** @param argv      Char array of target:interval elements, e.g. chr1:2500-3600, chr1:5100, chr2
1388     @param argc      Number of items in the array
1389     @param r_count   Pointer to the number of items in the resulting region list
1390     @param hdr       Header for the sam/bam/cram file
1391     @param getid     Callback to convert target names to target ids.
1392     @return  A region list on success, NULL on failure
1393 
1394     The hts_reglist_t struct returned by a successful call should be freed
1395     via hts_reglist_free() when it is no longer needed.
1396  */
1397 hts_reglist_t* hts_reglist_create(
1398     char** argv,
1399     int argc,
1400     int* r_count,
1401     void* hdr,
1402     hts_name2id_f getid);
1403 
1404 /// Free a region list
1405 /** @param reglist    Region list
1406     @param count      Number of items in the list
1407  */
1408 void hts_reglist_free(hts_reglist_t* reglist, int count);
1409 
1410 /// Free a multi-region iterator
1411 /** @param iter   Iterator to free
1412  */
1413 alias hts_itr_multi_destroy = hts_itr_destroy;
1414 
1415 /**
1416  * hts_file_type() - Convenience function to determine file type
1417  * DEPRECATED:  This function has been replaced by hts_detect_format().
1418  * It and these FT_* macros will be removed in a future HTSlib release.
1419  */
1420 enum FT_UNKN = 0;
1421 enum FT_GZ = 1;
1422 enum FT_VCF = 2;
1423 enum FT_VCF_GZ = FT_GZ | FT_VCF;
1424 enum FT_BCF = 1 << 2;
1425 enum FT_BCF_GZ = FT_GZ | FT_BCF;
1426 enum FT_STDIN = 1 << 3;
1427 int hts_file_type(const(char)* fname);
1428 
1429 /***************************
1430  * Revised MAQ error model *
1431  ***************************/
1432 
1433 struct errmod_t;
1434 
1435 errmod_t* errmod_init(double depcorr);
1436 void errmod_destroy(errmod_t* em);
1437 
1438 /*
1439     n: number of bases
1440     m: maximum base
1441     bases[i]: qual:6, strand:1, base:4
1442     q[i*m+j]: phred-scaled likelihood of (i,j)
1443  */
1444 int errmod_cal(const(errmod_t)* em, int n, int m, ushort* bases, float* q);
1445 
1446 /*****************************************************
1447  * Probabilistic banded glocal alignment             *
1448  * See https://doi.org/10.1093/bioinformatics/btr076 *
1449  *****************************************************/
1450 
1451 struct probaln_par_t
1452 {
1453     float d;
1454     float e;
1455     int bw;
1456 }
1457 
1458 /// Perform probabilistic banded glocal alignment
1459 /** @param      ref     Reference sequence
1460     @param      l_ref   Length of reference
1461     @param      query   Query sequence
1462     @param      l_query Length of query sequence
1463     @param      iqual   Query base qualities
1464     @param      c       Alignment parameters
1465     @param[out] state   Output alignment
1466     @param[out] q    Phred scaled posterior probability of state[i] being wrong
1467     @return     Phred-scaled likelihood score, or INT_MIN on failure.
1468 
1469 The reference and query sequences are coded using integers 0,1,2,3,4 for
1470 bases A,C,G,T,N respectively (N here is for any ambiguity code).
1471 
1472 On output, state and q are arrays of length l_query. The higher 30
1473 bits give the reference position the query base is matched to and the
1474 lower two bits can be 0 (an alignment match) or 1 (an
1475 insertion). q[i] gives the phred scaled posterior probability of
1476 state[i] being wrong.
1477 
1478 On failure, errno will be set to EINVAL if the values of l_ref or l_query
1479 were invalid; or ENOMEM if a memory allocation failed.
1480 */
1481 
1482 int probaln_glocal(
1483     const(ubyte)* ref_,
1484     int l_ref,
1485     const(ubyte)* query,
1486     int l_query,
1487     const(ubyte)* iqual,
1488     const(probaln_par_t)* c,
1489     int* state,
1490     ubyte* q);
1491 
1492 /**********************
1493  * MD5 implementation *
1494  **********************/
1495 
1496 struct hts_md5_context;
1497 
1498 /*! @abstract   Initialises an MD5 context.
1499  *  @discussion
1500  *    The expected use is to allocate an hts_md5_context using
1501  *    hts_md5_init().  This pointer is then passed into one or more calls
1502  *    of hts_md5_update() to compute successive internal portions of the
1503  *    MD5 sum, which can then be externalised as a full 16-byte MD5sum
1504  *    calculation by calling hts_md5_final().  This can then be turned
1505  *    into ASCII via hts_md5_hex().
1506  *
1507  *    To dealloate any resources created by hts_md5_init() call the
1508  *    hts_md5_destroy() function.
1509  *
1510  *  @return     hts_md5_context pointer on success, NULL otherwise.
1511  */
1512 hts_md5_context* hts_md5_init();
1513 
1514 /*! @abstract Updates the context with the MD5 of the data. */
1515 void hts_md5_update(hts_md5_context* ctx, const(void)* data, c_ulong size);
1516 
1517 /*! @abstract Computes the final 128-bit MD5 hash from the given context */
1518 void hts_md5_final(ubyte* digest, hts_md5_context* ctx);
1519 
1520 /*! @abstract Resets an md5_context to the initial state, as returned
1521  *            by hts_md5_init().
1522  */
1523 void hts_md5_reset(hts_md5_context* ctx);
1524 
1525 /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated
1526  *            hex string.
1527  */
1528 void hts_md5_hex(char* hex, const(ubyte)* digest);
1529 
1530 /*! @abstract Deallocates any memory allocated by hts_md5_init. */
1531 void hts_md5_destroy(hts_md5_context* ctx);
1532 
1533 pragma(inline,true)
1534 long hts_reg2bin(hts_pos_t beg, hts_pos_t end, int min_shift, int n_lvls)
1535 {
1536     int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7;
1537     for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l))
1538         if (beg>>s == end>>s) return t + (beg>>s);
1539     return 0;
1540 }
1541 
1542 /// Compute the level of a bin in a binning index
1543 pragma(inline, true)
1544 int hts_bin_level(int bin) {
1545     int l, b;
1546     for (l = 0, b = bin; b; ++l){ b = hts_bin_parent(b);}
1547     return l;
1548 }
1549 
1550 //! Compute the corresponding entry into the linear index of a given bin from
1551 //! a binning index
1552 /*!
1553  *  @param bin    The bin number
1554  *  @param n_lvls The index depth (number of levels - 0 based)
1555  *  @return       The integer offset into the linear index
1556  *
1557  *  Explanation of the return value formula:
1558  *  Each bin on level l covers exp(2, (n_lvls - l)*3 + min_shift) base pairs.
1559  *  A linear index entry covers exp(2, min_shift) base pairs.
1560  */
1561 
1562 // compute the level of bin
1563 pragma(inline, true)
1564 int hts_bin_bot(int bin, int n_lvls)
1565 {
1566     int l = hts_bin_level(bin);
1567     return (bin - hts_bin_first(l)) << (n_lvls - l) * 3;
1568 }
1569 
1570 /**************
1571  * Endianness *
1572  **************/
1573 pragma(inline, true)
1574 int ed_is_big()
1575 {
1576     long one= 1;
1577     return !(*(cast(char *)(&one)));
1578 }
1579 pragma(inline, true)
1580 ushort ed_swap_2(ushort v)
1581 {
1582     return cast(ushort)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8));
1583 }
1584 pragma(inline, true)
1585 void *ed_swap_2p(void *x)
1586 {
1587     *cast(ushort*)x = ed_swap_2(*cast(ushort*)x);
1588     return x;
1589 }
1590 pragma(inline, true)
1591 uint ed_swap_4(uint v)
1592 {
1593     v = ((v & 0x0000FFFFU) << 16) | (v >> 16);
1594     return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8);
1595 }
1596 pragma(inline, true)
1597 void *ed_swap_4p(void *x)
1598 {
1599     *cast(uint*)x = ed_swap_4(*cast(uint*)x);
1600     return x;
1601 }
1602 pragma(inline, true)
1603 ulong ed_swap_8(ulong v)
1604 {
1605     v = ((v & 0x00000000FFFFFFFFLU) << 32) | (v >> 32);
1606     v = ((v & 0x0000FFFF0000FFFFLU) << 16) | ((v & 0xFFFF0000FFFF0000LU) >> 16);
1607     return ((v & 0x00FF00FF00FF00FFLU) << 8) | ((v & 0xFF00FF00FF00FF00LU) >> 8);
1608 }
1609 pragma(inline, true)
1610 void *ed_swap_8p(void *x)
1611 {
1612     *cast(ulong*)x = ed_swap_8(*cast(ulong*)x);
1613     return x;
1614 }
1615