1 // htslib-1.9 faidx.h as D module 2 module dhtslib.htslib.faidx; 3 import dhtslib.htslib.bgzf : BGZF; 4 extern (C): 5 6 // @file htslib/faidx.h 7 // FASTA random access. 8 /* 9 Copyright (C) 2008, 2009, 2013, 2014, 2016, 2017-2018 Genome Research Ltd. 10 11 Author: Heng Li <lh3@sanger.ac.uk> 12 13 Permission is hereby granted, free of charge, to any person obtaining 14 a copy of this software and associated documentation files (the 15 "Software"), to deal in the Software without restriction, including 16 without limitation the rights to use, copy, modify, merge, publish, 17 distribute, sublicense, and/or sell copies of the Software, and to 18 permit persons to whom the Software is furnished to do so, subject to 19 the following conditions: 20 21 The above copyright notice and this permission notice shall be 22 included in all copies or substantial portions of the Software. 23 24 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 25 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 26 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 27 NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 28 BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 29 ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 30 CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 31 SOFTWARE. 32 */ 33 34 /** @file 35 36 Index FASTA or FASTQ files and extract subsequence. 37 38 The fai file index columns for FASTA are: 39 - chromosome name 40 - chromosome length: number of bases 41 - offset: number of bytes to skip to get to the first base 42 from the beginning of the file, including the length 43 of the sequence description string (`>chr ..\n`) 44 - line length: number of bases per line (excluding `\n`) 45 - binary line length: number of bytes, including `\n` 46 47 The index for FASTQ is similar to above: 48 - chromosome name 49 - chromosome length: number of bases 50 - sequence offset: number of bytes to skip to get to the first base 51 from the beginning of the file, including the length 52 of the sequence description string (`@chr ..\n`) 53 - line length: number of bases per line (excluding `\n`) 54 - binary line length: number of bytes, including `\n` 55 - quality offset: number of bytes to skip from the beginning of the file 56 to get to the first quality value in the indexed entry. 57 58 The FASTQ version of the index uses line length and binary line length 59 for both the sequence and the quality values, so they must be line 60 wrapped in the same way. 61 */ 62 63 /// Opaque structure representing FASTA index 64 struct __faidx_t 65 { 66 BGZF *bgzf; 67 }; // @suppress(dscanner.style.phobos_naming_convention) 68 /// ditto 69 alias faidx_t = __faidx_t; 70 71 /// File format to be dealing with. 72 enum fai_format_options { // @suppress(dscanner.style.phobos_naming_convention) 73 FAI_NONE, 74 FAI_FASTA, 75 FAI_FASTQ 76 } 77 78 /// Build index for a FASTA or FASTQ or bgzip-compressed FASTA or FASTQ file. 79 /** @param fn FASTA/FASTQ file name 80 @param fnfai Name of .fai file to build. 81 @param fngzi Name of .gzi file to build (if fn is bgzip-compressed). 82 @return 0 on success; or -1 on failure 83 84 If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name. 85 If fngzi is NULL, ".gzi" will be appended to fn for the GZI file. The GZI 86 file will only be built if fn is bgzip-compressed. 87 */ 88 int fai_build3(const(char) *fn, const(char) *fnfai, const(char) *fngzi); 89 90 /// Build index for a FASTA or FASTQ or bgzip-compressed FASTA or FASTQ file. 91 /** @param fn FASTA/FASTQ file name @return 0 on success; or -1 on failure 92 93 File "fn.fai" will be generated. This function is equivalent to 94 fai_build3(fn, NULL, NULL); 95 */ 96 int fai_build(const(char) *fn); 97 98 /// Destroy a faidx_t struct 99 void fai_destroy(faidx_t *fai); 100 101 /// Options for fai_load functions 102 enum fai_load_options { // @suppress(dscanner.style.phobos_naming_convention) 103 FAI_CREATE = 0x01, 104 } 105 106 /// Load FASTA indexes. 107 /** @param fn File name of the FASTA file (can be compressed with bgzip). 108 @param fnfai File name of the FASTA index. 109 @param fngzi File name of the bgzip index. 110 @param flags Option flags to control index file caching and creation. 111 @return Pointer to a faidx_t struct on success, NULL on failure. 112 113 If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name. 114 If fngzi is NULL, ".gzi" will be appended to fn for the bgzip index name. 115 The bgzip index is only needed if fn is compressed. 116 117 If (flags & FAI_CREATE) is true, the index files will be built using 118 fai_build3() if they are not already present. 119 */ 120 faidx_t *fai_load3(const(char) *fn, const(char) *fnfai, const(char) *fngzi, 121 int flags); 122 123 /// Load index from "fn.fai". 124 /** @param fn File name of the FASTA file 125 @return Pointer to a faidx_t struct on success, NULL on failure. 126 127 This function is equivalent to fai_load3(fn, NULL, NULL, FAI_CREATE|FAI_CACHE); 128 */ 129 faidx_t *fai_load(const(char) *fn); 130 131 /// Load FASTA or FASTQ indexes. 132 /** @param fn File name of the FASTA/FASTQ file (can be compressed with bgzip). 133 @param fnfai File name of the FASTA/FASTQ index. 134 @param fngzi File name of the bgzip index. 135 @param flags Option flags to control index file caching and creation. 136 @param format FASTA or FASTQ file format 137 @return Pointer to a faidx_t struct on success, NULL on failure. 138 139 If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name. 140 If fngzi is NULL, ".gzi" will be appended to fn for the bgzip index name. 141 The bgzip index is only needed if fn is compressed. 142 143 If (flags & FAI_CREATE) is true, the index files will be built using 144 fai_build3() if they are not already present. 145 */ 146 faidx_t *fai_load3_format(const(char) *fn, const(char) *fnfai, const(char) *fngzi, 147 int flags, fai_format_options format); 148 149 /// Load index from "fn.fai". 150 /** @param fn File name of the FASTA/FASTQ file 151 @param format FASTA or FASTQ file format 152 @return Pointer to a faidx_t struct on success, NULL on failure. 153 154 This function is equivalent to fai_load3_format(fn, NULL, NULL, FAI_CREATE|FAI_CACHE, format); 155 */ 156 faidx_t *fai_load_format(const(char) *fn, fai_format_options format); 157 158 /// Fetch the sequence in a region 159 /** @param fai Pointer to the faidx_t struct 160 @param reg Region in the format "chr2:20,000-30,000" 161 @param len Length of the region; -2 if seq not present, -1 general error 162 @return Pointer to the sequence; `NULL` on failure 163 164 The returned sequence is allocated by `malloc()` family and should be destroyed 165 by end users by calling `free()` on it. 166 */ 167 char *fai_fetch(const(faidx_t) *fai, const(char) *reg, int *len); 168 169 /// Fetch the quality string for a region for FASTQ files 170 /** @param fai Pointer to the faidx_t struct 171 @param reg Region in the format "chr2:20,000-30,000" 172 @param len Length of the region; -2 if seq not present, -1 general error 173 @return Pointer to the quality string; null on failure 174 175 The returned quality string is allocated by `malloc()` family and should be destroyed 176 by end users by calling `free()` on it. 177 */ 178 char *fai_fetchqual(const(faidx_t) *fai, const(char) *reg, int *len); 179 180 /// Fetch the number of sequences 181 /** @param fai Pointer to the faidx_t struct 182 @return The number of sequences 183 */ 184 ///int faidx_fetch_nseq(const faidx_t *fai) HTS_DEPRECATED("Please use faidx_nseq instead"); 185 186 /// Fetch the sequence in a region 187 /** @param fai Pointer to the faidx_t struct 188 @param c_name Region name 189 @param p_beg_i Beginning position number (zero-based) 190 @param p_end_i End position number (zero-based) 191 @param len Length of the region; -2 if c_name not present, -1 general error 192 @return Pointer to the sequence; null on failure 193 194 The returned sequence is allocated by `malloc()` family and should be destroyed 195 by end users by calling `free()` on it. 196 */ 197 char *faidx_fetch_seq(const(faidx_t) *fai, const(char) *c_name, int p_beg_i, int p_end_i, int *len); 198 199 /// Fetch the quality string in a region for FASTQ files 200 /** @param fai Pointer to the faidx_t struct 201 @param c_name Region name 202 @param p_beg_i Beginning position number (zero-based) 203 @param p_end_i End position number (zero-based) 204 @param len Length of the region; -2 if c_name not present, -1 general error 205 @return Pointer to the sequence; null on failure 206 207 The returned sequence is allocated by `malloc()` family and should be destroyed 208 by end users by calling `free()` on it. 209 */ 210 char *faidx_fetch_qual(const(faidx_t) *fai, const(char) *c_name, int p_beg_i, int p_end_i, int *len); 211 212 /// Query if sequence is present 213 /** @param fai Pointer to the faidx_t struct 214 @param seq Sequence name 215 @return 1 if present or 0 if absent 216 */ 217 int faidx_has_seq(const(faidx_t) *fai, const(char) *seq); 218 219 /// Return number of sequences in fai index 220 int faidx_nseq(const(faidx_t) *fai); 221 222 /// Return name of i-th sequence 223 const(char) *faidx_iseq(const(faidx_t) *fai, int i); 224 225 /// Return sequence length, -1 if not present 226 int faidx_seq_len(const(faidx_t) *fai, const(char) *seq);