1 /// @file htslib/hfile.h
2 /// Buffered low-level input/output streams.
3 /*
4     Copyright (C) 2013-2020 Genome Research Ltd.
5 
6     Author: John Marshall <jm18@sanger.ac.uk>
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 DEALINGS IN THE SOFTWARE.  */
25 module htslib.hfile;
26 
27 import core.sys.posix.sys.types;
28 import core.stdc.string : memcpy, strlen;
29 
30 import htslib.kstring : kstring_t;
31 
32 @system:
33 nothrow:
34 @nogc:
35 
36 extern (C):
37 
38 //#include <sys/types.h>
39 alias off_t = size_t;
40 alias ssize_t = size_t;
41 
42 /// internal
43 struct hFILE_backend; // @suppress(dscanner.style.phobos_naming_convention)
44 
45 /// Low-level input/output stream handle
46 /** The fields of this structure are declared here solely for the benefit
47 of the hFILE-related inline functions.  They may change in future releases.
48 User code should not use them directly; you should imagine that hFILE is an
49 opaque incomplete type.
50 */
51 struct hFILE
52 {
53     import std.bitmanip : bitfields;
54 
55     // @cond internal
56     char* buffer;
57     char* begin;
58     char* end;
59     char* limit;
60     const(hFILE_backend)* backend;
61     off_t offset;
62 
63     mixin(bitfields!(
64         uint, "at_eof", 1,
65         uint, "mobile", 1,
66         uint, "readonly", 1,
67         uint, "", 5));
68 
69     int has_errno;
70     // @endcond
71 }
72 
73 /// Open the named file or URL as a stream
74 /** @return An hFILE pointer, or `NULL` (with _errno_ set) if an error occurred.
75 
76 The usual `fopen(3)` _mode_ letters are supported: one of
77 `r` (read), `w` (write), `a` (append), optionally followed by any of
78 `+` (update), `e` (close on `exec(2)`), `x` (create exclusively),
79 `:` (indicates scheme-specific variable arguments follow).
80 */
81 hFILE* hopen(const(char)* filename, const(char)* mode, ...);
82 
83 /// Associate a stream with an existing open file descriptor
84 /** @return An hFILE pointer, or `NULL` (with _errno_ set) if an error occurred.
85 
86 Note that the file must be opened in binary mode, or else
87 there will be problems on platforms that make a difference
88 between text and binary mode.
89 
90 For socket descriptors (on Windows), _mode_ should contain `s`.
91 */
92 hFILE* hdopen(int fd, const(char)* mode);
93 
94 /// Report whether the file name or URL denotes remote storage
95 /** @return  0 if local, 1 if remote.
96 
97 "Remote" means involving e.g. explicit network access, with the implication
98 that callers may wish to cache such files' contents locally.
99 */
100 int hisremote(const(char)* filename);
101 
102 /// Append an extension or replace an existing extension
103 /** @param buffer     The kstring to be used to store the modified filename
104     @param filename   The filename to be (copied and) adjusted
105     @param replace    If non-zero, one extension (if any) is removed first
106     @param extension  The extension to be added (e.g. ".csi")
107     @return  The modified filename (i.e., `buffer.s`), or NULL on error.
108     @since   1.10
109 
110 If _filename_ is an URL, alters extensions at the end of the `hier-part`,
111 leaving any trailing `?query` or `#fragment` unchanged.
112 */
113 char* haddextension(
114     kstring_t* buffer,
115     const(char)* filename,
116     int replace,
117     const(char)* extension);
118 
119 /// Flush (for output streams) and close the stream
120 /** @return  0 if successful, or `EOF` (with _errno_ set) if an error occurred.
121 */
122 int hclose(hFILE* fp);
123 
124 /// Close the stream, without flushing or propagating errors
125 /** For use while cleaning up after an error only.  Preserves _errno_.
126 */
127 void hclose_abruptly(hFILE* fp);
128 
129 /// Return the stream's error indicator
130 /** @return  Non-zero (in fact, an _errno_ value) if an error has occurred.
131 
132 This would be called `herror()` and return true/false to parallel `ferror(3)`,
133 but a networking-related `herror(3)` function already exists.
134 */
135 pragma(inline, true)
136 int herrno(hFILE* fp)
137 {
138     return fp.has_errno;
139 }
140 
141 /// Clear the stream's error indicator
142 pragma(inline, true)
143 void hclearerr(hFILE* fp)
144 {
145     fp.has_errno = 0;
146 }
147 
148 /// Reposition the read/write stream offset
149 /** @return  The resulting offset within the stream (as per `lseek(2)`),
150     or negative if an error occurred.
151 */
152 off_t hseek(hFILE* fp, off_t offset, int whence);
153 
154 /// Report the current stream offset
155 /** @return  The offset within the stream, starting from zero.
156 */
157 pragma(inline, true)
158 off_t htell(hFILE* fp)
159 {
160     return fp.offset + (fp.begin - fp.buffer);
161 }
162 
163 /// Read one character from the stream
164 /** @return  The character read, or `EOF` on end-of-file or error.
165 */
166 pragma(inline, true)
167 int hgetc(hFILE* fp)
168 {
169     
170     return (fp.end > fp.begin)? cast(ubyte) *(fp.begin++) : hgetc2(fp);
171 }
172 
173 int hgetc2(hFILE* );
174 
175 /// Read from the stream until the delimiter, up to a maximum length
176 /** @param buffer  The buffer into which bytes will be written
177     @param size    The size of the buffer
178     @param delim   The delimiter (interpreted as an `unsigned char`)
179     @param fp      The file stream
180     @return  The number of bytes read, or negative on error.
181     @since   1.4
182 
183 Bytes will be read into the buffer up to and including a delimiter, until
184 EOF is reached, or _size-1_ bytes have been written, whichever comes first.
185 The string will then be terminated with a NUL byte (`\0`).
186 */
187 ssize_t hgetdelim(char* buffer, size_t size, int delim, hFILE* fp);
188 
189 /// Read a line from the stream, up to a maximum length
190 /** @param buffer  The buffer into which bytes will be written
191     @param size    The size of the buffer
192     @param fp      The file stream
193     @return  The number of bytes read, or negative on error.
194     @since   1.4
195 
196 Specialization of hgetdelim() for a `\n` delimiter.
197 */
198 pragma(inline, true)
199 ssize_t hgetln(char* buffer, size_t size, hFILE* fp)
200 {
201     return hgetdelim(buffer, size, '\n', fp);
202 }
203 
204 /// Read a line from the stream, up to a maximum length
205 /** @param buffer  The buffer into which bytes will be written
206     @param size    The size of the buffer (must be > 1 to be useful)
207     @param fp      The file stream
208     @return  _buffer_ on success, or `NULL` if an error occurred.
209     @since   1.4
210 
211 This function can be used as a replacement for `fgets(3)`, or together with
212 kstring's `kgetline()` to read arbitrarily-long lines into a _kstring_t_.
213 */
214 char* hgets(char* buffer, int size, hFILE* fp);
215 
216 /// Peek at characters to be read without removing them from buffers
217 /** @param fp      The file stream
218     @param buffer  The buffer to which the peeked bytes will be written
219     @param nbytes  The number of bytes to peek at; limited by the size of the
220                    internal buffer, which could be as small as 4K.
221     @return  The number of bytes peeked, which may be less than _nbytes_
222              if EOF is encountered; or negative, if there was an I/O error.
223 
224 The characters peeked at remain in the stream's internal buffer, and will be
225 returned by later hread() etc calls.
226 */
227 ssize_t hpeek(hFILE* fp, void* buffer, size_t nbytes);
228 
229 /// Read a block of characters from the file
230 /** @return  The number of bytes read, or negative if an error occurred.
231 
232 The full _nbytes_ requested will be returned, except as limited by EOF
233 or I/O errors.
234 */
235 pragma(inline, true)
236 ssize_t hread(hFILE* fp, void* buffer, size_t nbytes)
237 {
238     size_t n = fp.end - fp.begin;
239     if (n > nbytes) n = nbytes;
240     memcpy(buffer, fp.begin, n);
241     fp.begin += n;
242     return (n == nbytes || !fp.mobile)? cast(ssize_t) n : hread2(fp, buffer, nbytes, n);
243 }
244 /// ditto
245 ssize_t hread2(hFILE* , void* , size_t, size_t);
246 
247 /// Write a character to the stream
248 /** @return  The character written, or `EOF` if an error occurred.
249 */
250 pragma(inline, true)
251 int hputc(int c, hFILE* fp)
252 {
253     if (fp.begin < fp.limit) *(fp.begin++) = cast(char) c;
254     else c = hputc2(c, fp);
255     return c;
256 }
257 /// ditto
258 int hputc2(int, hFILE* );
259 
260 /// Write a string to the stream
261 /** @return  0 if successful, or `EOF` if an error occurred.
262 */
263 pragma(inline, true)
264 int hputs(const(char)* text, hFILE* fp)
265 {
266 
267     size_t nbytes = strlen(text), n = fp.limit - fp.begin;
268     if (n > nbytes) n = nbytes;
269     memcpy(fp.begin, text, n);
270     fp.begin += n;
271     return (n == nbytes)? 0 : hputs2(text, nbytes, n, fp);
272 }
273 /// ditto
274 int hputs2(const (char)*, size_t, size_t, hFILE* );
275 
276 /// Write a block of characters to the file
277 /** @return  Either _nbytes_, or negative if an error occurred.
278 
279 In the absence of I/O errors, the full _nbytes_ will be written.
280 */
281 
282 // Go straight to hwrite2 if the buffer is empty and the request
283 // won't fit.
284 pragma(inline, true)
285 ssize_t hwrite(hFILE* fp, const(void)* buffer, size_t nbytes)
286 {
287 
288     if (!fp.mobile) {
289         size_t n = fp.limit - fp.begin;
290         if (n < nbytes) {
291             hfile_set_blksize(fp, fp.limit - fp.buffer + nbytes);
292             fp.end = fp.limit;
293         }
294     }
295 
296     size_t n = fp.limit - fp.begin;
297     if (nbytes >= n && fp.begin == fp.buffer) {
298         // Go straight to hwrite2 if the buffer is empty and the request
299         // won't fit.
300         return hwrite2(fp, buffer, nbytes, 0);
301     }
302 
303     if (n > nbytes) n = nbytes;
304     memcpy(fp.begin, buffer, n);
305     fp.begin += n;
306     return (n==nbytes)? cast(ssize_t) n : hwrite2(fp, buffer, nbytes, n);
307 }
308 /// ditto
309 ssize_t hwrite2(hFILE* , const(void)* , size_t, size_t);
310 /// set hfile blocksize
311 int hfile_set_blksize(hFILE* fp, size_t bufsiz);
312 
313 /// For writing streams, flush buffered output to the underlying stream
314 /** @return  0 if successful, or `EOF` if an error occurred.
315 
316 This includes low-level flushing such as via `fdatasync(2)`.
317 */
318 int hflush(hFILE* fp);
319 
320 /// For hfile_mem: get the internal buffer and it's size from a hfile
321 /** @return  buffer if successful, or NULL if an error occurred
322 
323 The buffer returned should not be freed as this will happen when the
324 hFILE is closed.
325 */
326 char* hfile_mem_get_buffer(hFILE* file, size_t* length);
327 
328 /// For hfile_mem: get the internal buffer and it's size from a hfile.
329 /** @return  buffer if successful, or NULL if an error occurred
330 
331 This is similar to hfile_mem_get_buffer except that ownership of the
332 buffer is granted to the caller, who now has responsibility for freeing
333 it.  From this point onwards, the hFILE should not be used for any
334 purpose other than closing.
335 */
336 char* hfile_mem_steal_buffer(hFILE* file, size_t* length);
337 
338 /// Fills out sc_list[] with the list of known URL schemes.
339 /**
340  * @param plugin   [in]     Restricts schemes to only those from 'plugin.
341  * @param sc_list  [out]    Filled out with the scheme names
342  * @param nschemes [in/out] Size of sc_list (in) and number returned (out)
343  *
344  * Plugin may be passed in as NULL in which case all schemes are returned.
345  * Use plugin "built-in" to list the built in schemes.
346  * The size of sc_list is determined by the input value of *nschemes.
347  * This is updated to return the output size.  It is up to the caller to
348  * determine whether to call again with a larger number if this is too small.
349  *
350  * The return value represents the total number found matching plugin, which
351  * may be larger than *nschemes if too small a value was specified.
352  *
353  * @return the number of schemes found on success.
354  *         -1 on failure
355  */
356 int hfile_list_schemes(
357     const(char)* plugin,
358     const(char)** sc_list,
359     int* nschemes);
360 
361 /// Fills out plist[] with the list of known hFILE plugins.
362 /*
363  * @param plist    [out]    Filled out with the plugin names
364  * @param nplugins [in/out] Size of plist (in) and number returned (out)
365  *
366  * The size of plist is determined by the input value of *nplugins.
367  * This is updated to return the output size.  It is up to the caller to
368  * determine whether to call again with a larger number if this is too small.
369  *
370  * The return value represents the total number found, which may be
371  * larger than *nplugins if too small a value was specified.
372  *
373  * @return the number of plugins found on success.
374  *         -1 on failure
375  */
376 int hfile_list_plugins(const(char)** plist, int* nplugins);
377 
378 /// Tests for the presence of a specific hFILE plugin.
379 /*
380  * @param name     The name of the plugin to query.
381  *
382  * @return 1 if found, 0 otherwise.
383  */
384 int hfile_has_plugin(const(char)* name);
385