1 // htslib-1.9 hts.h as D module 2 // Changes include: 3 // In D, const on either LHS or RHS of function declaration applies to the function, not return value, unless parents included: 4 // changed ^const <type> <fnname> to ^const(<type>) <fnname> 5 /* * aliased typedef'd function pointers */ 6 /* * changed C-style arrays (eg line 339, extern const char seq_nt16_str[];) to char[] seq_nt16_str */ 7 /* * as update to above, seq_nt16_str needs to be char[16], as C and D style char[] imply different things */ 8 module htslib.hts; 9 10 import std.bitmanip; 11 12 extern (C): 13 /// @file htslib/hts.h 14 /// Format-neutral I/O, indexing, and iterator API functions. 15 /* 16 Copyright (C) 2012-2019 Genome Research Ltd. 17 Copyright (C) 2010, 2012 Broad Institute. 18 Portions copyright (C) 2003-2006, 2008-2010 by Heng Li <lh3@live.co.uk> 19 20 Author: Heng Li <lh3@sanger.ac.uk> 21 22 Permission is hereby granted, free of charge, to any person obtaining a copy 23 of this software and associated documentation files (the "Software"), to deal 24 in the Software without restriction, including without limitation the rights 25 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 26 copies of the Software, and to permit persons to whom the Software is 27 furnished to do so, subject to the following conditions: 28 29 The above copyright notice and this permission notice shall be included in 30 all copies or substantial portions of the Software. 31 32 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 33 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 34 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 35 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 36 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 37 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 38 DEALINGS IN THE SOFTWARE. */ 39 40 //#include <stddef.h> 41 import core.stdc.stdint; 42 /+ 43 #include "hts_defs.h" 44 #include "hts_log.h" 45 +/ 46 47 // Separator used to split HTS_PATH (for plugins); REF_PATH (cram references) 48 version(Windows) 49 { 50 enum HTS_PATH_SEPARATOR_CHAR = ';'; 51 enum HTS_PATH_SEPARATOR_STR = ";"; 52 } 53 else 54 { 55 enum HTS_PATH_SEPARATOR_CHAR = ':'; 56 enum HTS_PATH_SEPARATOR_STR = ":"; 57 } 58 59 import htslib.bgzf; 60 61 /// see cram.h, sam.h, sam.d 62 struct cram_fd; // @suppress(dscanner.style.phobos_naming_convention) 63 /// see hfile.d 64 //struct hFILE; // @suppress(dscanner.style.phobos_naming_convention) 65 import htslib.hfile: hFILE; 66 /// see thread_pool.d 67 struct hts_tpool; // @suppress(dscanner.style.phobos_naming_convention) 68 import htslib.sam : sam_hdr_t; 69 70 import htslib.kstring: kstring_t; 71 72 /+ 73 #ifndef kroundup32 74 #define kroundup32(x) (--(x), (x)|=(x)>>1, (x)|=(x)>>2, (x)|=(x)>>4, (x)|=(x)>>8, (x)|=(x)>>16, ++(x)) 75 #endif 76 77 /** 78 * @hideinitializer 79 * Deprecated macro to expand a dynamic array of a given type 80 * 81 * @param type_t The type of the array elements 82 * @param[in] n Requested number of elements of type type_t 83 * @param[in,out] m Size of memory allocated 84 * @param[in,out] ptr Pointer to the array 85 * 86 * @discussion 87 * Do not use this macro. Use hts_resize() instead as allows allocation 88 * failures to be handled more gracefully. 89 * 90 * The array *ptr will be expanded if necessary so that it can hold @p n 91 * or more elements. If the array is expanded then the new size will be 92 * written to @p m and the value in @p ptr may change. 93 * 94 * It must be possible to take the address of @p ptr and @p m must be usable 95 * as an lvalue. 96 * 97 * @bug 98 * If the memory allocation fails, this will call exit(1). This is 99 * not ideal behaviour in a library. 100 */ 101 #define hts_expand(type_t, n, m, ptr) do { \ 102 if ((n) > (m)) { \ 103 size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ 104 int, void **, const char *); \ 105 (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ 106 sizeof(type_t), 0, \ 107 (void **)&(ptr), __func__); \ 108 } \ 109 } while (0) 110 111 /** 112 * @hideinitializer 113 * Macro to expand a dynamic array, zeroing any newly-allocated memory 114 * 115 * @param type_t The type of the array elements 116 * @param[in] n Requested number of elements of type type_t 117 * @param[in,out] m Size of memory allocated 118 * @param[in,out] ptr Pointer to the array 119 * 120 * @discussion 121 * Do not use this macro. Use hts_resize() instead as allows allocation 122 * failures to be handled more gracefully. 123 * 124 * As for hts_expand(), except the bytes that make up the array elements 125 * between the old and new values of @p m are set to zero using memset(). 126 * 127 * @bug 128 * If the memory allocation fails, this will call exit(1). This is 129 * not ideal behaviour in a library. 130 */ 131 132 133 #define hts_expand0(type_t, n, m, ptr) do { \ 134 if ((n) > (m)) { \ 135 size_t hts_realloc_or_die(size_t, size_t, size_t, size_t, \ 136 int, void **, const char *); \ 137 (m) = hts_realloc_or_die((n) >= 1 ? (n) : 1, (m), sizeof(m), \ 138 sizeof(type_t), 1, \ 139 (void **)&(ptr), __func__); \ 140 } \ 141 } while (0) 142 +/ 143 144 // For internal use (by hts_resize()) only 145 int hts_resize_array_(size_t, size_t, size_t, void *, void **, int, 146 const(char) *); 147 148 enum HTS_RESIZE_CLEAR = 1; 149 150 /** 151 * @hideinitializer 152 * Template fn to expand a dynamic array of a given type 153 * 154 * @param T The type of the array elements 155 * @param[in] num Requested number of elements of type T 156 * @param[in,out] size Pointer to where the size (in elements) of the 157 array is stored. 158 * @param[in,out] ptr Location of the pointer to the array 159 * @param[in] flags Option flags 160 * 161 * @return 0 for success, or negative if an error occurred. 162 * 163 * @discussion 164 * The array *ptr will be expanded if necessary so that it can hold @p num 165 * or more elements. If the array is expanded then the new size will be 166 * written to @p *size_ptr and the value in @p *ptr may change. 167 * 168 * If ( @p flags & HTS_RESIZE_CLEAR ) is set, any newly allocated memory will 169 * be cleared. 170 */ 171 172 pragma(inline,true) 173 int hts_resize(T)(size_t num, ref size_t size, T* ptr, int flags) 174 { 175 return (num > size) 176 ? hts_resize_array_(T.sizeof, num, size_t.sizeof, &size, cast(void **)&ptr, flags, __FUNCTION__) 177 : 0; 178 } 179 180 /** 181 * Wrapper function for free(). Enables memory deallocation across DLL 182 * boundary. Should be used by all applications, which are compiled 183 * with a different standard library than htslib and call htslib 184 * methods that return dynamically allocated data. 185 */ 186 void hts_free(void *ptr); 187 188 /************ 189 * File I/O * 190 ************/ 191 192 // Add new entries only at the end (but before the *_maximum entry) 193 // of these enums, as their numbering is part of the htslib ABI. 194 195 /// Broad format category (sequence data, variant data, index, regions, etc.) 196 enum htsFormatCategory { // @suppress(dscanner.style.phobos_naming_convention) 197 unknown_category, 198 sequence_data, // Sequence data -- SAM, BAM, CRAM, etc 199 variant_data, // Variant calling data -- VCF, BCF, etc 200 index_file, // Index file associated with some data file 201 region_list, // Coordinate intervals or regions -- BED, etc 202 category_maximum = 32_767 203 } 204 205 /// Specific format (SAM, BAM, CRAM, BCF, VCF, TBI, BED, etc.) 206 enum htsExactFormat { // @suppress(dscanner.style.phobos_naming_convention) 207 unknown_format, 208 binary_format, text_format, 209 sam, bam, bai, cram, crai, vcf, bcf, csi, gzi, tbi, bed, 210 htsget, 211 //deprecated("Use htsExactFormat 'htsget' instead") json = htsget, 212 empty_format, // File is empty (or empty after decompression) 213 fasta_format, fastq_format, fai_format, fqi_format, 214 format_maximum = 32_767 215 } 216 217 /// Compression type 218 enum htsCompression { // @suppress(dscanner.style.phobos_naming_convention) 219 no_compression, gzip, bgzf, custom, bzip2_compression, 220 compression_maximum = 32_767 221 } 222 223 /// hts file complete file format information 224 // NB: version is a reserved keyword in D -- changed to "vers" 225 struct htsFormat { // @suppress(dscanner.style.phobos_naming_convention) 226 htsFormatCategory category; /// Broad format category (sequence data, variant data, index, regions, etc.) 227 htsExactFormat format; /// Specific format (SAM, BAM, CRAM, BCF, VCF, TBI, BED, etc.) 228 /// format version 229 struct Vers { short major, minor; } // @suppress(dscanner.style.undocumented_declaration) 230 Vers v; /// format version 231 htsCompression compression; /// Compression type 232 short compression_level;/// currently unused 233 void *specific; /// format specific options; see struct hts_opt. 234 } 235 236 /// index data (opaque) 237 struct __hts_idx_t; 238 alias hts_idx_t = __hts_idx_t; 239 240 // Maintainers note htsFile cannot be an opaque structure because some of its 241 // fields are part of libhts.so's ABI (hence these fields must not be moved): 242 // - fp is used in the public sam_itr_next()/etc macros 243 // - is_bin is used directly in samtools <= 1.1 and bcftools <= 1.1 244 // - is_write and is_cram are used directly in samtools <= 1.1 245 // - fp is used directly in samtools (up to and including current develop) 246 // - line is used directly in bcftools (up to and including current develop) 247 /// Data and metadata for an hts file; part of public and private ABI 248 struct htsFile { // @suppress(dscanner.style.phobos_naming_convention) 249 pragma(msg, "htsFile: bitfield order assumed starting with LSB"); 250 //uint32_t is_bin:1, is_write:1, is_be:1, is_cram:1, is_bgzf:1, dummy:27; 251 mixin(bitfields!( 252 bool, "is_bin", 1, 253 bool, "is_write", 1, 254 bool, "is_be", 1, 255 bool, "is_cram", 1, 256 bool, "is_bgzf", 1, 257 uint, "padding27", 27 )); 258 int64_t lineno; /// uncompressed(?) file line no. 259 kstring_t line; /// buffer to hold line 260 char *fn; /// filename 261 char *fn_aux; /// auxillary (i.e, index) file name 262 /// hFile plus any needed bgzf or CRAM (if applicable) structure data 263 union FP { 264 BGZF *bgzf; /// see bgzf.d 265 cram_fd *cram; /// see cram.d 266 hFILE *hfile; /// see hfile.d 267 } 268 FP fp; /// hFile plus any needed bgzf or CRAM (if applicable) structure data 269 void *state; /// format specific state information 270 htsFormat format; /// hts file complete file format information 271 hts_idx_t *idx; 272 const(char) *fnidx; 273 sam_hdr_t *bam_header; 274 } 275 276 /// A combined thread pool and queue allocation size. 277 /// The pool should already be defined, but qsize may be zero to 278 /// indicate an appropriate queue size is taken from the pool. 279 /// 280 /// Reasons for explicitly setting it could be where many more file 281 /// descriptors are in use than threads, so keeping memory low is 282 /// important. 283 struct htsThreadPool { // @suppress(dscanner.style.phobos_naming_convention) 284 hts_tpool *pool;/// The shared thread pool itself 285 int qsize; /// Size of I/O queue to use for this fp 286 } 287 288 /// REQUIRED_FIELDS 289 enum sam_fields { // @suppress(dscanner.style.phobos_naming_convention) 290 SAM_QNAME = 0x00000001, 291 SAM_FLAG = 0x00000002, 292 SAM_RNAME = 0x00000004, 293 SAM_POS = 0x00000008, 294 SAM_MAPQ = 0x00000010, 295 SAM_CIGAR = 0x00000020, 296 SAM_RNEXT = 0x00000040, 297 SAM_PNEXT = 0x00000080, 298 SAM_TLEN = 0x00000100, 299 SAM_SEQ = 0x00000200, 300 SAM_QUAL = 0x00000400, 301 SAM_AUX = 0x00000800, 302 SAM_RGAUX = 0x00001000, 303 } 304 305 /// Mostly CRAM only, but this could also include other format options 306 enum hts_fmt_option { // @suppress(dscanner.style.phobos_naming_convention) 307 // CRAM specific 308 CRAM_OPT_DECODE_MD, 309 CRAM_OPT_PREFIX, 310 CRAM_OPT_VERBOSITY, /// obsolete, use hts_set_log_level() instead 311 CRAM_OPT_SEQS_PER_SLICE, 312 CRAM_OPT_SLICES_PER_CONTAINER, 313 CRAM_OPT_RANGE, 314 CRAM_OPT_VERSION, /// rename to cram_version? 315 CRAM_OPT_EMBED_REF, 316 CRAM_OPT_IGNORE_MD5, 317 CRAM_OPT_REFERENCE, // make general 318 CRAM_OPT_MULTI_SEQ_PER_SLICE, 319 CRAM_OPT_NO_REF, 320 CRAM_OPT_USE_BZIP2, 321 CRAM_OPT_SHARED_REF, 322 CRAM_OPT_NTHREADS, /// deprecated, use HTS_OPT_NTHREADS 323 CRAM_OPT_THREAD_POOL,/// make general 324 CRAM_OPT_USE_LZMA, 325 CRAM_OPT_USE_RANS, 326 CRAM_OPT_REQUIRED_FIELDS, 327 CRAM_OPT_LOSSY_NAMES, 328 CRAM_OPT_BASES_PER_SLICE, 329 CRAM_OPT_STORE_MD, 330 CRAM_OPT_STORE_NM, 331 332 // General purpose 333 HTS_OPT_COMPRESSION_LEVEL = 100, 334 HTS_OPT_NTHREADS, 335 HTS_OPT_THREAD_POOL, 336 HTS_OPT_CACHE_SIZE, 337 HTS_OPT_BLOCK_SIZE, 338 } 339 340 /// For backwards compatibility 341 alias cram_option = hts_fmt_option; 342 343 /// Options for cache, (de)compression, threads, CRAM, etc. 344 struct hts_opt { // @suppress(dscanner.style.phobos_naming_convention) 345 char *arg; /// string form, strdup()ed 346 hts_fmt_option opt; /// tokenised key 347 /// option value 348 union VAL { /// ... and value 349 int i; /// int value 350 char *s; /// string value 351 } 352 VAL val; /// value 353 hts_opt *next; /// next option (linked list) 354 } 355 356 //#define HTS_FILE_OPTS_INIT {{0},0} 357 // Not apparently used in htslib-1.7 358 359 /** 360 * Explicit index file name delimiter, see below 361 */ 362 enum HTS_IDX_DELIM = "##idx##"; 363 364 365 /********************** 366 * Exported functions * 367 **********************/ 368 369 /** 370 * Parses arg and appends it to the option list. 371 * 372 * Returns 0 on success; 373 * -1 on failure. 374 */ 375 int hts_opt_add(hts_opt **opts, const(char) *c_arg); 376 377 /** 378 * Applies an hts_opt option list to a given htsFile. 379 * 380 * Returns 0 on success 381 * -1 on failure 382 */ 383 int hts_opt_apply(htsFile *fp, hts_opt *opts); 384 385 /** 386 * Frees an hts_opt list. 387 */ 388 void hts_opt_free(hts_opt *opts); 389 390 /** 391 * Accepts a string file format (sam, bam, cram, vcf, bam) optionally 392 * followed by a comma separated list of key=value options and splits 393 * these up into the fields of htsFormat struct. 394 * 395 * Returns 0 on success 396 * -1 on failure. 397 */ 398 int hts_parse_format(htsFormat *opt, const(char) *str); 399 400 /** 401 * Tokenise options as (key(=value)?,)*(key(=value)?)? 402 * NB: No provision for ',' appearing in the value! 403 * Add backslashing rules? 404 * 405 * This could be used as part of a general command line option parser or 406 * as a string concatenated onto the file open mode. 407 * 408 * Returns 0 on success 409 * -1 on failure. 410 */ 411 int hts_parse_opt_list(htsFormat *opt, const(char) *str); 412 413 /**! @abstract Table for converting a nucleotide character to 4-bit encoding. 414 The input character may be either an IUPAC ambiguity code, '=' for 0, or 415 '0'/'1'/'2'/'3' for a result of 1/2/4/8. The result is encoded as 1/2/4/8 416 for A/C/G/T or combinations of these bits for ambiguous bases. 417 */ 418 419 version(Windows){ 420 const (char)[256] seq_nt16_table = [ 421 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 422 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 423 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 424 1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15, 425 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 426 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, 427 15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15, 428 15,15, 5, 6, 8,15, 7, 9, 15,10,15,15, 15,15,15,15, 429 430 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 431 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 432 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 433 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 434 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 435 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 436 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15, 437 15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15 438 ]; 439 }else{ 440 extern const(char)[256] seq_nt16_table; 441 } 442 443 /**! @abstract Table for converting a 4-bit encoded nucleotide to an IUPAC 444 ambiguity code letter (or '=' when given 0). 445 */ 446 447 version(Windows) __gshared const (char)[16] seq_nt16_str = ['=','A','C','M','G','R','S','V','T','W','Y','H','K','D','B','N']; 448 else extern __gshared const(char)[16] seq_nt16_str; 449 /**! @abstract Table for converting a 4-bit encoded nucleotide to about 2 bits. 450 Returns 0/1/2/3 for 1/2/4/8 (i.e., A/C/G/T), or 4 otherwise (0 or ambiguous). 451 */ 452 version(Windows) const (int)[16] seq_nt16_int = [ 4, 0, 1, 4, 2, 4, 4, 4, 3, 4, 4, 4, 4, 4, 4, 4 ]; 453 else extern const int[] seq_nt16_int; 454 /**! 455 @abstract Get the htslib version number 456 @return For released versions, a string like "N.N[.N]"; or git describe 457 output if using a library built within a Git repository. 458 */ 459 const(char) *hts_version(); 460 461 /*! 462 @abstract Compile-time HTSlib version number, for use in #if checks 463 @return For released versions X.Y[.Z], an integer of the form XYYYZZ; 464 useful for preprocessor conditionals such as 465 #if HTS_VERSION >= 101000 // Check for v1.10 or later 466 */ 467 // Maintainers: Bump this in the final stage of preparing a new release. 468 // Immediately after release, bump ZZ to 90 to distinguish in-development 469 // Git repository builds from the release; you may wish to increment this 470 // further when significant features are merged. 471 enum HTS_VERSION = 101000; // @suppress(dscanner.style.number_literals) 472 473 /**! 474 @abstract Determine format by peeking at the start of a file 475 @param fp File opened for reading, positioned at the beginning 476 @param fmt Format structure that will be filled out on return 477 @return 0 for success, or negative if an error occurred. 478 */ 479 int hts_detect_format(hFILE *fp, htsFormat *fmt); 480 481 /**! 482 @abstract Get a human-readable description of the file format 483 @param fmt Format structure holding type, version, compression, etc. 484 @return Description string, to be freed by the caller after use. 485 */ 486 char *hts_format_description(const(htsFormat) *format); 487 488 /**! 489 @abstract Open a sequence data (SAM/BAM/CRAM) or variant data (VCF/BCF) 490 or possibly-compressed textual line-orientated file 491 @param fn The file name or "-" for stdin/stdout. For indexed files 492 with a non-standard naming, the file name can include the 493 name of the index file delimited with HTS_IDX_DELIM 494 @param mode Mode matching / [rwa][bceguxz0-9]* / 495 @discussion 496 With 'r' opens for reading; any further format mode letters are ignored 497 as the format is detected by checking the first few bytes or BGZF blocks 498 of the file. With 'w' or 'a' opens for writing or appending, with format 499 specifier letters: 500 b binary format (BAM, BCF, etc) rather than text (SAM, VCF, etc) 501 c CRAM format 502 g gzip compressed 503 u uncompressed 504 z bgzf compressed 505 [0-9] zlib compression level 506 and with non-format option letters (for any of 'r'/'w'/'a'): 507 e close the file on exec(2) (opens with O_CLOEXEC, where supported) 508 x create the file exclusively (opens with O_EXCL, where supported) 509 Note that there is a distinction between 'u' and '0': the first yields 510 plain uncompressed output whereas the latter outputs uncompressed data 511 wrapped in the zlib format. 512 @example 513 [rw]b .. compressed BCF, BAM, FAI 514 [rw]bu .. uncompressed BCF 515 [rw]z .. compressed VCF 516 [rw] .. uncompressed VCF 517 */ 518 htsFile *hts_open(const(char) *fn, const(char) *mode); 519 520 /**! 521 @abstract Open a SAM/BAM/CRAM/VCF/BCF/etc file 522 @param fn The file name or "-" for stdin/stdout 523 @param mode Open mode, as per hts_open() 524 @param fmt Optional format specific parameters 525 @discussion 526 See hts_open() for description of fn and mode. 527 // TODO Update documentation for s/opts/fmt/ 528 Opts contains a format string (sam, bam, cram, vcf, bcf) which will, 529 if defined, override mode. Opts also contains a linked list of hts_opt 530 structures to apply to the open file handle. These can contain things 531 like pointers to the reference or information on compression levels, 532 block sizes, etc. 533 */ 534 htsFile *hts_open_format(const(char) *fn, const(char) *mode, const(htsFormat) *fmt); 535 536 /**! 537 @abstract Open an existing stream as a SAM/BAM/CRAM/VCF/BCF/etc file 538 @param fn The already-open file handle 539 @param mode Open mode, as per hts_open() 540 */ 541 htsFile *hts_hopen(hFILE *fp, const(char) *fn, const(char) *mode); 542 543 /**! 544 @abstract Close a file handle, flushing buffered data for output streams 545 @param fp The file handle to be closed 546 @return 0 for success, or negative if an error occurred. 547 */ 548 int hts_close(htsFile *fp); 549 550 /**! 551 @abstract Returns the file's format information 552 @param fp The file handle 553 @return Read-only pointer to the file's htsFormat. 554 */ 555 const(htsFormat *) hts_get_format(htsFile *fp); 556 557 /**! 558 @ abstract Returns a string containing the file format extension. 559 @ param format Format structure containing the file type. 560 @ return A string ("sam", "bam", etc) or "?" for unknown formats. 561 */ 562 const(char *) hts_format_file_extension(const(htsFormat) *format); 563 564 /**! 565 @abstract Sets a specified CRAM option on the open file handle. 566 @param fp The file handle open the open file. 567 @param opt The CRAM_OPT_* option. 568 @param ... Optional arguments, dependent on the option used. 569 @return 0 for success, or negative if an error occurred. 570 */ 571 int hts_set_opt(htsFile *fp, hts_fmt_option opt, ...); 572 573 /// ?Get line as string from line-oriented flat file (undocumented in hts.h) 574 int hts_getline(htsFile *fp, int delimiter, kstring_t *str); 575 /// ?Get _n lines into buffer from line-oriented flat file; sets _n as number read (undocumented in hts.h) 576 char **hts_readlines(const(char) *fn, int *_n); 577 578 /**! 579 @abstract Parse comma-separated list or read list from a file 580 @param list File name or comma-separated list 581 @param is_file 582 @param _n Size of the output array (number of items read) 583 @return NULL on failure or pointer to newly allocated array of 584 strings 585 */ 586 char **hts_readlist(const(char) *fn, int is_file, int *_n); 587 588 /**! 589 @abstract Create extra threads to aid compress/decompression for this file 590 @param fp The file handle 591 @param n The number of worker threads to create 592 @return 0 for success, or negative if an error occurred. 593 @notes This function creates non-shared threads for use solely by fp. 594 The hts_set_thread_pool function is the recommended alternative. 595 */ 596 int hts_set_threads(htsFile *fp, int n); 597 598 /**! 599 @abstract Create extra threads to aid compress/decompression for this file 600 @param fp The file handle 601 @param p A pool of worker threads, previously allocated by hts_create_threads(). 602 @return 0 for success, or negative if an error occurred. 603 */ 604 int hts_set_thread_pool(htsFile *fp, htsThreadPool *p); 605 606 /**! 607 @abstract Adds a cache of decompressed blocks, potentially speeding up seeks. 608 This may not work for all file types (currently it is bgzf only). 609 @param fp The file handle 610 @param n The size of cache, in bytes 611 */ 612 void hts_set_cache_size(htsFile *fp, int n); 613 614 /**! 615 @abstract Set .fai filename for a file opened for reading 616 @return 0 for success, negative on failure 617 @discussion 618 Called before *_hdr_read(), this provides the name of a .fai file 619 used to provide a reference list if the htsFile contains no @SQ headers. 620 */ 621 int hts_set_fai_filename(htsFile *fp, const(char) *fn_aux); 622 623 624 /**! 625 @abstract Determine whether a given htsFile contains a valid EOF block 626 @return 3 for a non-EOF checkable filetype; 627 2 for an unseekable file type where EOF cannot be checked; 628 1 for a valid EOF block; 629 0 for if the EOF marker is absent when it should be present; 630 -1 (with errno set) on failure 631 @discussion 632 Check if the BGZF end-of-file (EOF) marker is present 633 */ 634 int hts_check_EOF(htsFile *fp); 635 636 /************ 637 * Indexing * 638 ************/ 639 640 /*! 641 These HTS_IDX_* macros are used as special tid values for hts_itr_query()/etc, 642 producing iterators operating as follows: 643 - HTS_IDX_NOCOOR iterates over unmapped reads sorted at the end of the file 644 - HTS_IDX_START iterates over the entire file 645 - HTS_IDX_REST iterates from the current position to the end of the file 646 - HTS_IDX_NONE always returns "no more alignment records" 647 When one of these special tid values is used, beg and end are ignored. 648 When REST or NONE is used, idx is also ignored and may be NULL. 649 */ 650 enum HTS_IDX_NOCOOR = (-2); /// iterates over unmapped reads sorted at the end of the file 651 enum HTS_IDX_START = (-3); /// iterates over the entire file 652 enum HTS_IDX_REST = (-4); /// iterates from the current position to the end of the file 653 enum HTS_IDX_NONE = (-5); /// always returns "no more alignment records" 654 655 enum int HTS_FMT_CSI = 0; /// coordinate-sorted index (new) 656 enum int HTS_FMT_BAI = 1; /// BAM index (old) 657 enum int HTS_FMT_TBI = 2; /// Tabix index 658 enum int HTS_FMT_CRAI= 3; /// CRAM index (not sure if superceded by CSI?) 659 660 // Almost INT64_MAX, but when cast into a 32-bit int it's 661 // also INT_MAX instead of -1. This avoids bugs with old code 662 // using the new hts_pos_t data type. 663 enum HTS_POS_MAX = (((cast(int64_t)int.max)<<32)|int.max); 664 enum HTS_POS_MIN = int64_t.min; 665 //enum PRIhts_pos = PRId64; 666 alias hts_pos_t = int64_t; 667 668 // For comparison with previous release: 669 // 670 // #define HTS_POS_MAX INT_MAX 671 // #define HTS_POS_MIN INT_MIN 672 // #define PRIhts_pos PRId32 673 // typedef int32_t hts_pos_t; 674 675 struct hts_pair_pos_t { 676 hts_pos_t beg, end; 677 } 678 679 alias hts_pair32_t = hts_pair_pos_t; // For backwards compatibility 680 681 struct hts_pair64_t { // @suppress(dscanner.style.phobos_naming_convention) 682 /// start, end coordinates (64-bit) 683 ulong u, v; 684 } 685 686 /// 64-bit start, end coordinate pair tracking max (internally used in hts.c) 687 struct hts_pair64_max_t { // @suppress(dscanner.style.phobos_naming_convention) 688 /// ? 689 ulong u, v; 690 /// ? 691 ulong max; 692 } 693 694 /// Region list used in iterators (NB: apparently confined to single contig/tid) 695 struct hts_reglist_t { // @suppress(dscanner.style.phobos_naming_convention) 696 const(char) *reg; /// Region string 697 hts_pair_pos_t *intervals; /// (start,end) intervals 698 int tid; /// Contig id 699 uint32_t count; /// How many intervals 700 /// absolute bounds 701 hts_pos_t min_beg, max_end; 702 } 703 704 //typedef int hts_readrec_func(BGZF *fp, void *data, void *r, int *tid, int *beg, int *end); 705 alias hts_readrec_func = int function(BGZF *fp, void *data, void *r, int *tid, hts_pos_t *beg, hts_pos_t *end); 706 707 //typedef int hts_seek_func(void *fp, int64_t offset, int where); 708 alias hts_seek_func = int function(void *fp, ulong offset, int where); 709 710 //typedef int64_t hts_tell_func(void *fp); 711 alias hts_tell_func = long function(void *fp); 712 713 /// iterator 714 struct hts_itr_t { // @suppress(dscanner.style.phobos_naming_convention) 715 pragma(msg, "hts_itr_t: bitfield order assumed starting with LSB"); 716 // uint32_t read_rest:1, finished:1, is_cram:1, dummy:29; 717 mixin(bitfields!( 718 bool, "read_rest", 1, 719 bool, "finished", 1, 720 bool, "is_cram", 1, 721 bool, "nocoor", 1, 722 bool, "multi", 1, 723 uint, "dummy27", 27)); 724 /// iterator position data 725 int tid, n_off, i, n_reg; 726 hts_pos_t beg, end; 727 hts_reglist_t *reg_list; 728 int curr_tid, curr_reg, curr_intv; 729 hts_pos_t curr_beg, curr_end; 730 uint64_t curr_off, nocoor_off; 731 hts_pair64_max_t *off; /// ? (start, end) offset 732 hts_readrec_func *readrec; /// record parsing fn pointer 733 hts_seek_func *seek; 734 hts_tell_func *tell; 735 /// ??? 736 struct Bins { 737 /// ??? 738 int n, m; 739 int *a; /// ??? 740 } 741 Bins bins; /// ??? 742 } 743 744 /// ? index key 745 struct aux_key_t { // @suppress(dscanner.style.phobos_naming_convention) 746 int key; /// ??? 747 /// ??? 748 ulong min_off, max_off; 749 } 750 751 alias hts_itr_multi_t = hts_itr_t; 752 753 pragma(inline, true) 754 { 755 /// ??? 756 auto hts_bin_first(T)(T l) { return (((1<<(((l)<<1) + (l))) - 1) / 7); } // #define hts_bin_first(l) (((1<<(((l)<<1) + (l))) - 1) / 7) 757 /// ??? 758 auto hts_bin_parent(T)(T l){ return (((l) - 1) >> 3); } // #define hts_bin_parent(l) (((l) - 1) >> 3) 759 } 760 761 /+ 762 /// Initialize index 763 hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls); 764 /// Destroy index 765 void hts_idx_destroy(hts_idx_t *idx); 766 /// Add to index 767 int hts_idx_push(hts_idx_t *idx, int tid, int beg, int end, uint64_t offset, int is_mapped); 768 /// ?finalize index 769 void hts_idx_finish(hts_idx_t *idx, uint64_t final_offset); 770 +/ 771 /////////////////////////////////////////////////////////// 772 // Low-level API for building indexes. 773 774 /// Create a BAI/CSI/TBI type index structure 775 /** @param n Initial number of targets 776 @param fmt Format, one of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI 777 @param offset0 Initial file offset 778 @param min_shift Number of bits for the minimal interval 779 @param n_lvls Number of levels in the binning index 780 @return An initialised hts_idx_t struct on success; NULL on failure 781 782 The struct returned by a successful call should be freed via hts_idx_destroy() 783 when it is no longer needed. 784 */ 785 hts_idx_t *hts_idx_init(int n, int fmt, uint64_t offset0, int min_shift, int n_lvls); 786 787 /// Free a BAI/CSI/TBI type index 788 /** @param idx Index structure to free 789 */ 790 void hts_idx_destroy(hts_idx_t *idx); 791 792 /// Push an index entry 793 /** @param idx Index 794 @param tid Target id 795 @param beg Range start (zero-based) 796 @param end Range end (zero-based, half-open) 797 @param offset File offset 798 @param is_mapped Range corresponds to a mapped read 799 @return 0 on success; -1 on failure 800 801 The @p is_mapped parameter is used to update the n_mapped / n_unmapped counts 802 stored in the meta-data bin. 803 */ 804 int hts_idx_push(hts_idx_t *idx, int tid, hts_pos_t beg, hts_pos_t end, uint64_t offset, int is_mapped); 805 806 /// Finish building an index 807 /** @param idx Index 808 @param final_offset Last file offset 809 @return 0 on success; non-zero on failure. 810 */ 811 int hts_idx_finish(hts_idx_t *idx, uint64_t final_offset); 812 813 /// Returns index format 814 /** @param idx Index 815 @return One of HTS_FMT_CSI, HTS_FMT_BAI or HTS_FMT_TBI 816 */ 817 int hts_idx_fmt(hts_idx_t *idx); 818 819 /// Add name to TBI index meta-data 820 /** @param idx Index 821 @param tid Target identifier 822 @param name Target name 823 @return Index number of name in names list on success; -1 on failure. 824 */ 825 int hts_idx_tbi_name(hts_idx_t *idx, int tid, const(char) *name); 826 827 // Index loading and saving 828 829 830 831 /// Save an index to a file 832 /** @param idx Index to be written 833 @param fn Input BAM/BCF/etc filename, to which .bai/.csi/etc will be added 834 @param fmt One of the HTS_FMT_* index formats 835 @return 0 if successful, or negative if an error occurred. 836 */ 837 int hts_idx_save(const(hts_idx_t) *idx, const(char) *fn, int fmt); 838 839 /// Save an index to a specific file 840 /** @param idx Index to be written 841 @param fn Input BAM/BCF/etc filename 842 @param fnidx Output filename, or NULL to add .bai/.csi/etc to @a fn 843 @param fmt One of the HTS_FMT_* index formats 844 @return 0 if successful, or negative if an error occurred. 845 */ 846 int hts_idx_save_as(const(hts_idx_t) *idx, const(char) *fn, const(char) *fnidx, int fmt); 847 848 849 /// Load an index file 850 /** @param fn BAM/BCF/etc filename, to which .bai/.csi/etc will be added or 851 the extension substituted, to search for an existing index file. 852 In case of a non-standard naming, the file name can include the 853 name of the index file delimited with HTS_IDX_DELIM. 854 855 @param fmt One of the HTS_FMT_* index formats 856 @return The index, or NULL if an error occurred. 857 858 If @p fn contains the string "##idx##" (HTS_IDX_DELIM), the part before 859 the delimiter will be used as the name of the data file and the part after 860 it will be used as the name of the index. 861 862 Otherwise, this function tries to work out the index name as follows: 863 864 It will try appending ".csi" to @p fn 865 It will try substituting an existing suffix (e.g. .bam, .vcf) with ".csi" 866 Then, if @p fmt is HTS_FMT_BAI: 867 It will try appending ".bai" to @p fn 868 To will substituting the existing suffix (e.g. .bam) with ".bai" 869 else if @p fmt is HTS_FMT_TBI: 870 It will try appending ".tbi" to @p fn 871 To will substituting the existing suffix (e.g. .vcf) with ".tbi" 872 873 If the index file is remote (served over a protocol like https), first a check 874 is made to see is a locally cached copy is available. This is done for all 875 of the possible names listed above. If a cached copy is not available then 876 the index will be downloaded and stored in the current working directory, 877 with the same name as the remote index. 878 879 Equivalent to hts_idx_load3(fn, NULL, fmt, HTS_IDX_SAVE_REMOTE); 880 */ 881 hts_idx_t *hts_idx_load(const(char) *fn, int fmt); 882 883 /// Load a specific index file 884 /** @param fn Input BAM/BCF/etc filename 885 @param fnidx The input index filename 886 @return The index, or NULL if an error occurred. 887 888 Equivalent to hts_idx_load3(fn, fnidx, 0, 0); 889 890 This function will not attempt to save index files locally. 891 */ 892 hts_idx_t *hts_idx_load2(const(char) *fn, const(char) *fnidx); 893 894 /// Load a specific index file 895 /** @param fn Input BAM/BCF/etc filename 896 @param fnidx The input index filename 897 @param fmt One of the HTS_FMT_* index formats 898 @param flags Flags to alter behaviour (see description) 899 @return The index, or NULL if an error occurred. 900 901 If @p fnidx is NULL, the index name will be derived from @p fn in the 902 same way as hts_idx_load(). 903 904 If @p fnidx is not NULL, @p fmt is ignored. 905 906 The @p flags parameter can be set to a combination of the following 907 values: 908 909 HTS_IDX_SAVE_REMOTE Save a local copy of any remote indexes 910 HTS_IDX_SILENT_FAIL Fail silently if the index is not present 911 912 The index struct returned by a successful call should be freed 913 via hts_idx_destroy() when it is no longer needed. 914 */ 915 hts_idx_t *hts_idx_load3(const(char) *fn, const(char) *fnidx, int fmt, HTS_IDX_FLAG flags); 916 917 /// Flags for hts_idx_load3() ( and also sam_idx_load3(), tbx_idx_load3() ) 918 enum HTS_IDX_FLAG : int { 919 HTS_IDX_SAVE_REMOTE = 1, 920 HTS_IDX_SILENT_FAIL = 2 921 } 922 923 /////////////////////////////////////////////////////////// 924 // Functions for accessing meta-data stored in indexes 925 926 /// Get extra index meta-data 927 /** @param idx The index 928 @param l_meta Pointer to where the length of the extra data is stored 929 @return Pointer to the extra data if present; NULL otherwise 930 931 Indexes (both .tbi and .csi) made by tabix include extra data about 932 the indexed file. The returns a pointer to this data. Note that the 933 data is stored exactly as it is in the index. Callers need to interpret 934 the results themselves, including knowing what sort of data to expect; 935 byte swapping etc. 936 */ 937 uint8_t *hts_idx_get_meta(hts_idx_t *idx, uint32_t *l_meta); 938 939 /// Set extra index meta-data 940 /** @param idx The index 941 @param l_meta Length of data 942 @param meta Pointer to the extra data 943 @param is_copy If not zero, a copy of the data is taken 944 @return 0 on success; -1 on failure (out of memory). 945 946 Sets the data that is returned by hts_idx_get_meta(). 947 948 If is_copy != 0, a copy of the input data is taken. If not, ownership of 949 the data pointed to by *meta passes to the index. 950 */ 951 int hts_idx_set_meta(hts_idx_t *idx, uint32_t l_meta, uint8_t *meta, int is_copy); 952 953 /// Get number of mapped and unmapped reads from an index 954 /** @param idx Index 955 @param tid Target ID 956 @param[out] mapped Location to store number of mapped reads 957 @param[out] unmapped Location to store number of unmapped reads 958 @return 0 on success; -1 on failure (data not available) 959 960 BAI and CSI indexes store information on the number of reads for each 961 target that were mapped or unmapped (unmapped reads will generally have 962 a paired read that is mapped to the target). This function returns this 963 infomation if it is available. 964 965 @note Cram CRAI indexes do not include this information. 966 */ 967 int hts_idx_get_stat(const(hts_idx_t)* idx, int tid, uint64_t* mapped, uint64_t* unmapped); 968 969 /// Return the number of unplaced reads from an index 970 /** @param idx Index 971 @return Unplaced reads count 972 973 Unplaced reads are not linked to any reference (e.g. RNAME is '*' in SAM 974 files). 975 */ 976 uint64_t hts_idx_get_n_no_coor(const(hts_idx_t)* idx); 977 978 /////////////////////////////////////////////////////////// 979 // Region parsing 980 981 enum HTS_PARSE_FLAGS : int { 982 HTS_PARSE_THOUSANDS_SEP = 1, ///< Ignore ',' separators within numbers 983 HTS_PARSE_ONE_COORD = 2, ///< chr:pos means chr:pos-pos and not chr:pos-end 984 HTS_PARSE_LIST = 4 ///< Expect a comma separated list of regions. (Disables HTS_PARSE_THOUSANDS_SEP) 985 } 986 987 /// Parse a numeric string 988 /** The number may be expressed in scientific notation, and optionally may 989 contain commas in the integer part (before any decimal point or E notation). 990 @param str String to be parsed 991 @param strend If non-NULL, set on return to point to the first character 992 in @a str after those forming the parsed number 993 @param flags Or'ed-together combination of HTS_PARSE_* flags 994 @return Converted value of the parsed number. 995 996 When @a strend is NULL, a warning will be printed (if hts_verbose is HTS_LOG_WARNING 997 or more) if there are any trailing characters after the number. 998 */ 999 long hts_parse_decimal(const(char) *str, char **strend, HTS_PARSE_FLAGS flags); 1000 1001 alias hts_name2id_f = int function(void*, const(char)*); 1002 alias hts_id2name_f = const(char)* function(void*, int); 1003 1004 /// Parse a "CHR:START-END"-style region string 1005 /** @param str String to be parsed 1006 @param beg Set on return to the 0-based start of the region 1007 @param end Set on return to the 1-based end of the region 1008 @return Pointer to the colon or '\0' after the reference sequence name, 1009 or NULL if @a str could not be parsed. 1010 1011 NOTE: For compatibility with hts_parse_reg only. 1012 Please use hts_parse_region instead. 1013 */ 1014 const(char) *hts_parse_reg64(const(char) *str, hts_pos_t *beg, hts_pos_t *end); 1015 1016 /// Parse a "CHR:START-END"-style region string 1017 /** @param str String to be parsed 1018 @param beg Set on return to the 0-based start of the region 1019 @param end Set on return to the 1-based end of the region 1020 @return Pointer to the colon or '\0' after the reference sequence name, 1021 or NULL if @a str could not be parsed. 1022 */ 1023 const(char) *hts_parse_reg(const(char) *str, int *beg, int *end); 1024 1025 /// Parse a "CHR:START-END"-style region string 1026 /** @param str String to be parsed 1027 @param tid Set on return (if not NULL) to be reference index (-1 if invalid) 1028 @param beg Set on return to the 0-based start of the region 1029 @param end Set on return to the 1-based end of the region 1030 @param getid Function pointer. Called if not NULL to set tid. 1031 @param hdr Caller data passed to getid. 1032 @param flags Bitwise HTS_PARSE_* flags listed above. 1033 @return Pointer to the byte after the end of the entire region 1034 specifier (including any trailing comma) on success, 1035 or NULL if @a str could not be parsed. 1036 1037 A variant of hts_parse_reg which is reference-id aware. It uses 1038 the iterator name2id callbacks to validate the region tokenisation works. 1039 1040 This is necessary due to GRCh38 HLA additions which have reference names 1041 like "HLA-DRB1*12:17". 1042 1043 To work around ambiguous parsing issues, eg both "chr1" and "chr1:100-200" 1044 are reference names, quote using curly braces. 1045 Thus "{chr1}:100-200" and "{chr1:100-200}" disambiguate the above example. 1046 1047 Flags are used to control how parsing works, and can be one of the below. 1048 1049 HTS_PARSE_THOUSANDS_SEP: 1050 Ignore commas in numbers. For example with this flag 1,234,567 1051 is interpreted as 1234567. 1052 1053 HTS_PARSE_LIST: 1054 If present, the region is assmed to be a comma separated list and 1055 position parsing will not contain commas (this implicitly 1056 clears HTS_PARSE_THOUSANDS_SEP in the call to hts_parse_decimal). 1057 On success the return pointer will be the start of the next region, ie 1058 the character after the comma. (If *ret != '\0' then the caller can 1059 assume another region is present in the list.) 1060 1061 If not set then positions may contain commas. In this case the return 1062 value should point to the end of the string, or NULL on failure. 1063 1064 HTS_PARSE_ONE_COORD: 1065 If present, X:100 is treated as the single base pair region X:100-100. 1066 In this case X:-100 is shorthand for X:1-100 and X:100- is X:100-<end>. 1067 (This is the standard bcftools region convention.) 1068 1069 When not set X:100 is considered to be X:100-<end> where <end> is 1070 the end of chromosome X (set to INT_MAX here). X:100- and X:-100 are 1071 invalid. 1072 (This is the standard samtools region convention.) 1073 1074 Note the supplied string expects 1 based inclusive coordinates, but the 1075 returned coordinates start from 0 and are half open, so pos0 is valid 1076 for use in e.g. "for (pos0 = beg; pos0 < end; pos0++) {...}" 1077 1078 If NULL is returned, the value in tid mat give additional information 1079 about the error: 1080 1081 -2 Failed to parse @p hdr; or out of memory 1082 -1 The reference in @p str has mismatched braces, or does not 1083 exist in @p hdr 1084 >= 0 The specified range in @p str could not be parsed 1085 */ 1086 const(char) *hts_parse_region(const(char)*s, int *tid, hts_pos_t *beg, 1087 hts_pos_t *end, hts_name2id_f getid, void *hdr, 1088 HTS_PARSE_FLAGS flags); 1089 1090 /////////////////////////////////////////////////////////// 1091 // Generic iterators 1092 // 1093 // These functions provide the low-level infrastructure for iterators. 1094 // Wrappers around these are used to make iterators for specific file types. 1095 // See: 1096 // htslib/sam.h for SAM/BAM/CRAM iterators 1097 // htslib/vcf.h for VCF/BCF iterators 1098 // htslib/tbx.h for files indexed by tabix 1099 1100 /// Create a single-region iterator 1101 /** @param idx Index 1102 @param tid Target ID 1103 @param beg Start of region 1104 @param end End of region 1105 @param readrec Callback to read a record from the input file 1106 @return An iterator on success; NULL on failure 1107 1108 The iterator struct returned by a successful call should be freed 1109 via hts_itr_destroy() when it is no longer needed. 1110 */ 1111 hts_itr_t *hts_itr_query(const(hts_idx_t)*idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func readrec); 1112 1113 /// Free an iterator 1114 /** @param iter Iterator to free 1115 */ 1116 void hts_itr_destroy(hts_itr_t *iter); 1117 1118 alias hts_itr_query_func = 1119 hts_itr_t * function(const(hts_idx_t)*idx, int tid, hts_pos_t beg, hts_pos_t end, hts_readrec_func readrec); 1120 1121 /// Create a single-region iterator from a text region specification 1122 /** @param idx Index 1123 @param reg Region specifier 1124 @param getid Callback function to return the target ID for a name 1125 @param hdr Input file header 1126 @param itr_query Callback function returning an iterator for a numeric tid, 1127 start and end position 1128 @param readrec Callback to read a record from the input file 1129 @return An iterator on success; NULL on error 1130 1131 The iterator struct returned by a successful call should be freed 1132 via hts_itr_destroy() when it is no longer needed. 1133 */ 1134 hts_itr_t *hts_itr_querys(const(hts_idx_t) *idx, const(char) *reg, hts_name2id_f getid, void *hdr, 1135 hts_itr_query_func itr_query, hts_readrec_func readrec); 1136 1137 /// Return the next record from an iterator 1138 /** @param fp Input file handle 1139 @param iter Iterator 1140 @param r Pointer to record placeholder 1141 @param data Data passed to the readrec callback 1142 @return >= 0 on success, -1 when there is no more data, < -1 on error 1143 */ 1144 int hts_itr_next(BGZF *fp, hts_itr_t *iter, void *r, void *data); 1145 1146 /// Return a list of target names from an index 1147 /** @param idx Index 1148 @param[out] n Location to store the number of targets 1149 @param getid Callback function to get the name for a target ID 1150 @param hdr Header from indexed file 1151 @return An array of pointers to the names on success; NULL on failure 1152 1153 @note The names are pointers into the header data structure. When cleaning 1154 up, only the array should be freed, not the names. 1155 */ 1156 const(char)** hts_idx_seqnames(const(hts_idx_t) *idx, int *n, hts_id2name_f getid, void *hdr); // free only the array, not the values 1157 1158 /********************************** 1159 * Iterator with multiple regions * 1160 **********************************/ 1161 1162 alias hts_itr_multi_query_func = int function(const(hts_idx_t) *idx, hts_itr_t *itr); 1163 int hts_itr_multi_bam(const(hts_idx_t) *idx, hts_itr_t *iter); 1164 int hts_itr_multi_cram(const(hts_idx_t) *idx, hts_itr_t *iter); 1165 1166 /// Create a multi-region iterator from a region list 1167 /** @param idx Index 1168 @param reglist Region list 1169 @param count Number of items in region list 1170 @param getid Callback to convert names to target IDs 1171 @param hdr Indexed file header (passed to getid) 1172 @param itr_specific Filetype-specific callback function 1173 @param readrec Callback to read an input file record 1174 @param seek Callback to seek in the input file 1175 @param tell Callback to return current input file location 1176 @return An iterator on success; NULL on failure 1177 1178 The iterator struct returned by a successful call should be freed 1179 via hts_itr_destroy() when it is no longer needed. 1180 */ 1181 hts_itr_t *hts_itr_regions(const(hts_idx_t) *idx, hts_reglist_t *reglist, int count, hts_name2id_f getid, void *hdr, 1182 hts_itr_multi_query_func itr_specific, hts_readrec_func readrec, hts_seek_func seek, hts_tell_func tell); 1183 1184 /// Return the next record from an iterator 1185 /** @param fp Input file handle 1186 @param iter Iterator 1187 @param r Pointer to record placeholder 1188 @return >= 0 on success, -1 when there is no more data, < -1 on error 1189 */ 1190 int hts_itr_multi_next(htsFile *fd, hts_itr_t *iter, void *r); 1191 1192 /// Create a region list from a char array 1193 /** @param argv Char array of target:interval elements, e.g. chr1:2500-3600, chr1:5100, chr2 1194 @param argc Number of items in the array 1195 @param r_count Pointer to the number of items in the resulting region list 1196 @param hdr Header for the sam/bam/cram file 1197 @param getid Callback to convert target names to target ids. 1198 @return A region list on success, NULL on failure 1199 1200 The hts_reglist_t struct returned by a successful call should be freed 1201 via hts_reglist_free() when it is no longer needed. 1202 */ 1203 hts_reglist_t *hts_reglist_create(char **argv, int argc, int *r_count, void *hdr, hts_name2id_f getid); 1204 1205 /// Free a region list 1206 /** @param reglist Region list 1207 @param count Number of items in the list 1208 */ 1209 void hts_reglist_free(hts_reglist_t *reglist, int count); 1210 1211 /// Free a multi-region iterator 1212 /** @param iter Iterator to free 1213 */ 1214 alias hts_itr_multi_destroy = hts_itr_destroy; 1215 1216 /** 1217 * hts_file_type() - Convenience function to determine file type 1218 * DEPRECATED: This function has been replaced by hts_detect_format(). 1219 * It and these FT_* macros will be removed in a future HTSlib release. 1220 */ 1221 enum FT_UNKN = 0; 1222 enum FT_GZ = 1; /// ditto 1223 enum FT_VCF = 2; /// ditto 1224 enum FT_VCF_GZ = (FT_GZ|FT_VCF); /// ditto 1225 enum FT_BCF = (1<<2); /// ditto 1226 enum FT_BCF_GZ = (FT_GZ|FT_BCF); /// ditto 1227 enum FT_STDIN = (1<<3); /// ditto 1228 deprecated("This function has been replaced by hts_detect_format(). " 1229 ~ "It and these FT_* macros will be removed in a future HTSlib release.") 1230 int hts_file_type(const(char) *fname); 1231 1232 /+ 1233 /*************************** 1234 * Revised MAQ error model * 1235 ***************************/ 1236 1237 struct errmod_t; 1238 typedef struct errmod_t errmod_t; 1239 1240 errmod_t *errmod_init(double depcorr); 1241 void errmod_destroy(errmod_t *em); 1242 1243 /* 1244 n: number of bases 1245 m: maximum base 1246 bases[i]: qual:6, strand:1, base:4 1247 q[i*m+j]: phred-scaled likelihood of (i,j) 1248 */ 1249 int errmod_cal(const errmod_t *em, int n, int m, uint16_t *bases, float *q); 1250 1251 1252 /***************************************************** 1253 * Probabilistic banded glocal alignment * 1254 * See https://doi.org/10.1093/bioinformatics/btr076 * 1255 *****************************************************/ 1256 1257 typedef struct probaln_par_t { 1258 float d, e; 1259 int bw; 1260 } probaln_par_t; 1261 1262 /// Perform probabilistic banded glocal alignment 1263 /** @param ref Reference sequence 1264 @param l_ref Length of reference 1265 @param query Query sequence 1266 @param l_query Length of query sequence 1267 @param iqual Query base qualities 1268 @param c Alignment parameters 1269 @param[out] state Output alignment 1270 @param[out] q Phred scaled posterior probability of state[i] being wrong 1271 @return Phred-scaled likelihood score, or INT_MIN on failure. 1272 1273 The reference and query sequences are coded using integers 0,1,2,3,4 for 1274 bases A,C,G,T,N respectively (N here is for any ambiguity code). 1275 1276 On output, state and q are arrays of length l_query. The higher 30 1277 bits give the reference position the query base is matched to and the 1278 lower two bits can be 0 (an alignment match) or 1 (an 1279 insertion). q[i] gives the phred scaled posterior probability of 1280 state[i] being wrong. 1281 1282 On failure, errno will be set to EINVAL if the values of l_ref or l_query 1283 were invalid; or ENOMEM if a memory allocation failed. 1284 */ 1285 1286 int probaln_glocal(const uint8_t *ref, int l_ref, const uint8_t *query, int l_query, const uint8_t *iqual, const probaln_par_t *c, int *state, uint8_t *q); 1287 1288 1289 /********************** 1290 * MD5 implementation * 1291 **********************/ 1292 1293 struct hts_md5_context; 1294 typedef struct hts_md5_context hts_md5_context; 1295 1296 /*! @abstract Intialises an MD5 context. 1297 * @discussion 1298 * The expected use is to allocate an hts_md5_context using 1299 * hts_md5_init(). This pointer is then passed into one or more calls 1300 * of hts_md5_update() to compute successive internal portions of the 1301 * MD5 sum, which can then be externalised as a full 16-byte MD5sum 1302 * calculation by calling hts_md5_final(). This can then be turned 1303 * into ASCII via hts_md5_hex(). 1304 * 1305 * To dealloate any resources created by hts_md5_init() call the 1306 * hts_md5_destroy() function. 1307 * 1308 * @return hts_md5_context pointer on success, NULL otherwise. 1309 */ 1310 hts_md5_context *hts_md5_init(void); 1311 1312 /*! @abstract Updates the context with the MD5 of the data. */ 1313 void hts_md5_update(hts_md5_context *ctx, const void *data, unsigned long size); 1314 1315 /*! @abstract Computes the final 128-bit MD5 hash from the given context */ 1316 void hts_md5_final(unsigned char *digest, hts_md5_context *ctx); 1317 1318 /*! @abstract Resets an md5_context to the initial state, as returned 1319 * by hts_md5_init(). 1320 */ 1321 void hts_md5_reset(hts_md5_context *ctx); 1322 1323 /*! @abstract Converts a 128-bit MD5 hash into a 33-byte nul-termninated 1324 * hex string. 1325 */ 1326 void hts_md5_hex(char *hex, const unsigned char *digest); 1327 1328 /*! @abstract Deallocates any memory allocated by hts_md5_init. */ 1329 void hts_md5_destroy(hts_md5_context *ctx); 1330 +/ 1331 1332 pragma(inline,true) 1333 long hts_reg2bin(hts_pos_t beg, hts_pos_t end, int min_shift, int n_lvls) 1334 { 1335 int l, s = min_shift, t = ((1<<((n_lvls<<1) + n_lvls)) - 1) / 7; 1336 for (--end, l = n_lvls; l > 0; --l, s += 3, t -= 1<<((l<<1)+l)) 1337 if (beg>>s == end>>s) return t + (beg>>s); 1338 return 0; 1339 } 1340 /+ 1341 static inline int hts_bin_bot(int bin, int n_lvls) 1342 { 1343 int l, b; 1344 for (l = 0, b = bin; b; ++l, b = hts_bin_parent(b)); // compute the level of bin 1345 return (bin - hts_bin_first(l)) << (n_lvls - l) * 3; 1346 } 1347 1348 /************** 1349 * Endianness * 1350 **************/ 1351 1352 static inline int ed_is_big(void) 1353 { 1354 long one= 1; 1355 return !(*((char *)(&one))); 1356 } 1357 static inline uint16_t ed_swap_2(uint16_t v) 1358 { 1359 return (uint16_t)(((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8)); 1360 } 1361 static inline void *ed_swap_2p(void *x) 1362 { 1363 *(uint16_t*)x = ed_swap_2(*(uint16_t*)x); 1364 return x; 1365 } 1366 static inline uint32_t ed_swap_4(uint32_t v) 1367 { 1368 v = ((v & 0x0000FFFFU) << 16) | (v >> 16); 1369 return ((v & 0x00FF00FFU) << 8) | ((v & 0xFF00FF00U) >> 8); 1370 } 1371 static inline void *ed_swap_4p(void *x) 1372 { 1373 *(uint32_t*)x = ed_swap_4(*(uint32_t*)x); 1374 return x; 1375 } 1376 static inline uint64_t ed_swap_8(uint64_t v) 1377 { 1378 v = ((v & 0x00000000FFFFFFFFLLU) << 32) | (v >> 32); 1379 v = ((v & 0x0000FFFF0000FFFFLLU) << 16) | ((v & 0xFFFF0000FFFF0000LLU) >> 16); 1380 return ((v & 0x00FF00FF00FF00FFLLU) << 8) | ((v & 0xFF00FF00FF00FF00LLU) >> 8); 1381 } 1382 static inline void *ed_swap_8p(void *x) 1383 { 1384 *(uint64_t*)x = ed_swap_8(*(uint64_t*)x); 1385 return x; 1386 } 1387 1388 +/