1 // htslib-1.9 faidx.h as D module
2 module dhtslib.htslib.faidx;
3 import dhtslib.htslib.bgzf : BGZF;
4 extern (C):
5 
6 // @file htslib/faidx.h
7 // FASTA random access.
8 /*
9    Copyright (C) 2008, 2009, 2013, 2014, 2016, 2017-2018 Genome Research Ltd.
10 
11    Author: Heng Li <lh3@sanger.ac.uk>
12 
13    Permission is hereby granted, free of charge, to any person obtaining
14    a copy of this software and associated documentation files (the
15    "Software"), to deal in the Software without restriction, including
16    without limitation the rights to use, copy, modify, merge, publish,
17    distribute, sublicense, and/or sell copies of the Software, and to
18    permit persons to whom the Software is furnished to do so, subject to
19    the following conditions:
20 
21    The above copyright notice and this permission notice shall be
22    included in all copies or substantial portions of the Software.
23 
24    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
25    EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
26    MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
27    NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
28    BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
29    ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
30    CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31    SOFTWARE.
32 */
33 
34 /** @file
35 
36   Index FASTA or FASTQ files and extract subsequence.
37 
38   The fai file index columns for FASTA are:
39     - chromosome name
40     - chromosome length: number of bases
41     - offset: number of bytes to skip to get to the first base
42         from the beginning of the file, including the length
43         of the sequence description string (`>chr ..\n`)
44     - line length: number of bases per line (excluding `\n`)
45     - binary line length: number of bytes, including `\n`
46 
47    The index for FASTQ is similar to above:
48     - chromosome name
49     - chromosome length: number of bases
50     - sequence offset: number of bytes to skip to get to the first base
51         from the beginning of the file, including the length
52         of the sequence description string (`@chr ..\n`)
53     - line length: number of bases per line (excluding `\n`)
54     - binary line length: number of bytes, including `\n`
55     - quality offset: number of bytes to skip from the beginning of the file
56         to get to the first quality value in the indexed entry.
57 
58     The FASTQ version of the index uses line length and binary line length
59     for both the sequence and the quality values, so they must be line
60     wrapped in the same way.
61  */
62 
63 /// Opaque structure representing FASTA index
64 struct __faidx_t
65 {
66     BGZF *bgzf;
67 }; // @suppress(dscanner.style.phobos_naming_convention)
68 /// ditto
69 alias faidx_t = __faidx_t;
70 
71 /// File format to be dealing with.
72 enum fai_format_options { // @suppress(dscanner.style.phobos_naming_convention)
73     FAI_NONE,
74     FAI_FASTA,
75     FAI_FASTQ
76 }
77 
78 /// Build index for a FASTA or FASTQ or bgzip-compressed FASTA or FASTQ file.
79 /**  @param  fn  FASTA/FASTQ file name
80      @param  fnfai Name of .fai file to build.
81      @param  fngzi Name of .gzi file to build (if fn is bgzip-compressed).
82      @return     0 on success; or -1 on failure
83 
84 If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name.
85 If fngzi is NULL, ".gzi" will be appended to fn for the GZI file.  The GZI
86 file will only be built if fn is bgzip-compressed.
87 */
88 int fai_build3(const(char) *fn, const(char) *fnfai, const(char) *fngzi);
89 
90 /// Build index for a FASTA or FASTQ or bgzip-compressed FASTA or FASTQ file.
91 /** @param  fn  FASTA/FASTQ file name    @return     0 on success; or -1 on failure
92 
93 File "fn.fai" will be generated.  This function is equivalent to
94 fai_build3(fn, NULL, NULL);
95 */
96 int fai_build(const(char) *fn);
97 
98 /// Destroy a faidx_t struct
99 void fai_destroy(faidx_t *fai);
100 
101 /// Options for fai_load functions
102 enum fai_load_options { // @suppress(dscanner.style.phobos_naming_convention)
103     FAI_CREATE = 0x01,
104 }
105 
106 /// Load FASTA indexes.
107 /** @param  fn  File name of the FASTA file (can be compressed with bgzip).
108     @param  fnfai File name of the FASTA index.
109     @param  fngzi File name of the bgzip index.
110     @param  flags Option flags to control index file caching and creation.
111     @return Pointer to a faidx_t struct on success, NULL on failure.
112 
113 If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name.
114 If fngzi is NULL, ".gzi" will be appended to fn for the bgzip index name.
115 The bgzip index is only needed if fn is compressed.
116 
117 If (flags & FAI_CREATE) is true, the index files will be built using
118 fai_build3() if they are not already present.
119 */
120 faidx_t *fai_load3(const(char) *fn, const(char) *fnfai, const(char) *fngzi,
121                    int flags);
122 
123 /// Load index from "fn.fai".
124 /** @param  fn  File name of the FASTA file
125     @return Pointer to a faidx_t struct on success, NULL on failure.
126 
127 This function is equivalent to fai_load3(fn, NULL, NULL, FAI_CREATE|FAI_CACHE);
128 */
129 faidx_t *fai_load(const(char) *fn);
130 
131 /// Load FASTA or FASTQ indexes.
132 /** @param  fn  File name of the FASTA/FASTQ file (can be compressed with bgzip).
133     @param  fnfai File name of the FASTA/FASTQ index.
134     @param  fngzi File name of the bgzip index.
135     @param  flags Option flags to control index file caching and creation.
136     @param  format FASTA or FASTQ file format
137     @return Pointer to a faidx_t struct on success, NULL on failure.
138 
139 If fnfai is NULL, ".fai" will be appended to fn to make the FAI file name.
140 If fngzi is NULL, ".gzi" will be appended to fn for the bgzip index name.
141 The bgzip index is only needed if fn is compressed.
142 
143 If (flags & FAI_CREATE) is true, the index files will be built using
144 fai_build3() if they are not already present.
145 */
146 faidx_t *fai_load3_format(const(char) *fn, const(char) *fnfai, const(char) *fngzi,
147                    int flags, fai_format_options format);
148 
149 /// Load index from "fn.fai".
150 /** @param  fn  File name of the FASTA/FASTQ file
151     @param  format FASTA or FASTQ file format
152     @return Pointer to a faidx_t struct on success, NULL on failure.
153 
154 This function is equivalent to fai_load3_format(fn, NULL, NULL, FAI_CREATE|FAI_CACHE, format);
155 */
156 faidx_t *fai_load_format(const(char) *fn, fai_format_options format);
157 
158 /// Fetch the sequence in a region
159 /** @param  fai  Pointer to the faidx_t struct
160     @param  reg  Region in the format "chr2:20,000-30,000"
161     @param  len  Length of the region; -2 if seq not present, -1 general error
162     @return      Pointer to the sequence; `NULL` on failure
163 
164 The returned sequence is allocated by `malloc()` family and should be destroyed
165 by end users by calling `free()` on it.
166 */
167 char *fai_fetch(const(faidx_t) *fai, const(char) *reg, int *len);
168 
169 /// Fetch the quality string for a region for FASTQ files
170 /** @param  fai  Pointer to the faidx_t struct
171     @param  reg  Region in the format "chr2:20,000-30,000"
172     @param  len  Length of the region; -2 if seq not present, -1 general error
173     @return      Pointer to the quality string; null on failure
174 
175 The returned quality string is allocated by `malloc()` family and should be destroyed
176 by end users by calling `free()` on it.
177 */
178 char *fai_fetchqual(const(faidx_t) *fai, const(char) *reg, int *len);
179 
180 /// Fetch the number of sequences
181 /** @param  fai  Pointer to the faidx_t struct
182     @return      The number of sequences
183 */
184 ///int faidx_fetch_nseq(const faidx_t *fai) HTS_DEPRECATED("Please use faidx_nseq instead");
185 
186 /// Fetch the sequence in a region
187 /** @param  fai  Pointer to the faidx_t struct
188     @param  c_name Region name
189     @param  p_beg_i  Beginning position number (zero-based)
190     @param  p_end_i  End position number (zero-based)
191     @param  len  Length of the region; -2 if c_name not present, -1 general error
192     @return      Pointer to the sequence; null on failure
193 
194 The returned sequence is allocated by `malloc()` family and should be destroyed
195 by end users by calling `free()` on it.
196 */
197 char *faidx_fetch_seq(const(faidx_t) *fai, const(char) *c_name, int p_beg_i, int p_end_i, int *len);
198 
199 /// Fetch the quality string in a region for FASTQ files
200 /** @param  fai  Pointer to the faidx_t struct
201     @param  c_name Region name
202     @param  p_beg_i  Beginning position number (zero-based)
203     @param  p_end_i  End position number (zero-based)
204     @param  len  Length of the region; -2 if c_name not present, -1 general error
205     @return      Pointer to the sequence; null on failure
206 
207 The returned sequence is allocated by `malloc()` family and should be destroyed
208 by end users by calling `free()` on it.
209 */
210 char *faidx_fetch_qual(const(faidx_t) *fai, const(char) *c_name, int p_beg_i, int p_end_i, int *len);
211 
212 /// Query if sequence is present
213 /**   @param  fai  Pointer to the faidx_t struct
214       @param  seq  Sequence name
215       @return      1 if present or 0 if absent
216 */
217 int faidx_has_seq(const(faidx_t) *fai, const(char) *seq);
218 
219 /// Return number of sequences in fai index
220 int faidx_nseq(const(faidx_t) *fai);
221 
222 /// Return name of i-th sequence
223 const(char) *faidx_iseq(const(faidx_t) *fai, int i);
224 
225 /// Return sequence length, -1 if not present
226 int faidx_seq_len(const(faidx_t) *fai, const(char) *seq);