1 // htslib-1.9 bgzf.h as D module
2 // Changes include:
3 // Removed if(n)defs
4 // Changed #defines to const/immutable
5 // Removed all HTS_RESULT_USED (__attribute__ ((__warn_unused_result__)))
6 // HTS_DEPRECATED(message) to deprecated("message")
7 // Do not #include "hts_defs.h"
8 // Change numeric #defines to enum int
9 // typedef struct to alias
10 // modified bitfields in struct and aligned(1)
11 // removed redundant struct declarations when declaring struct pointers
12 // replace local definition with import kstring_t
13 // const TYPE * to const(TYPE) *
14 module htslib.bgzf;
15 
16 import std.bitmanip;
17 import htslib.kstring;
18 
19 extern (C):
20 
21 // @file htslib/bgzf.h
22 // Low-level routines for direct BGZF operations.
23 /*
24    Copyright (c) 2008 Broad Institute / Massachusetts Institute of Technology
25                  2011, 2012 Attractive Chaos <attractor@live.co.uk>
26    Copyright (C) 2009, 2013, 2014, 2017, 2018-2019 Genome Research Ltd
27    Permission is hereby granted, free of charge, to any person obtaining a copy
28    of this software and associated documentation files (the "Software"), to deal
29    in the Software without restriction, including without limitation the rights
30    to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
31    copies of the Software, and to permit persons to whom the Software is
32    furnished to do so, subject to the following conditions:
33 
34    The above copyright notice and this permission notice shall be included in
35    all copies or substantial portions of the Software.
36 
37    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
38    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
39    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
40    AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
41    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
42    OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
43    THE SOFTWARE.
44 */
45 
46 /* The BGZF library was originally written by Bob Handsaker from the Broad
47  * Institute. It was later improved by the SAMtools developers. */
48 
49 import core.stdc.stdint;
50 import core.stdc.stdio;
51 import core.sys.posix.sys.types;
52 
53 // ssize_t doesn't exist in core.sys.posix.sys.types for windows builds
54 version(Windows){
55     version(Win32){
56         alias ssize_t = int;
57     }
58     version(Win64){
59         alias ssize_t = long;
60     }
61 }
62 
63 enum int BGZF_BLOCK_SIZE =     0xff00;  /// make sure compressBound(BGZF_BLOCK_SIZE) < BGZF_MAX_BLOCK_SIZE
64 enum int BGZF_MAX_BLOCK_SIZE = 0x10000; /// ditto
65 
66 enum int BGZF_ERR_ZLIB   = 1;   /// zlib error
67 enum int BGZF_ERR_HEADER = 2;   /// header format error
68 enum int BGZF_ERR_IO     = 4;   /// io error
69 enum int BGZF_ERR_MISUSE = 8;   /// misuse error: (a) writeable file (b) where not SEEK_SET (c) GZ (rather than BGZ) file
70 enum int BGZF_ERR_MT     = 16;  /// stream cannot be multi-threaded
71 enum int BGZF_ERR_CRC    = 32;  /// returned by inflate_block() when bgzf_uncompress() has CRC error
72 
73 /// see hts.d
74 struct hFILE; // @suppress(dscanner.style.phobos_naming_convention)
75 /// see thread_pool.d
76 struct hts_tpool; // @suppress(dscanner.style.phobos_naming_convention)
77 /// klib kstring
78 // struct kstring_t; // @suppress(dscanner.style.phobos_naming_convention)
79 /// Memory pool for bgzf_job structs, to avoid many malloc/free, see htslib/bgzf.c
80 struct bgzf_mtaux_t; // @suppress(dscanner.style.phobos_naming_convention)
81 /// BGZF index
82 struct __bgzidx_t; // @suppress(dscanner.style.phobos_naming_convention)
83 alias bgzidx_t = __bgzidx_t;
84 /// bgzf cache
85 struct bgzf_cache_t; // @suppress(dscanner.style.phobos_naming_convention)
86 /// replaces zlib::z_stream?
87 struct z_stream_s; // @suppress(dscanner.style.phobos_naming_convention)
88 
89 /// Block Gzipped File
90 struct BGZF {
91     // Reserved bits should be written as 0; read as "don't care"
92     //unsigned errcode:16, reserved:1, is_write:1, no_eof_block:1, is_be:1;
93     //signed compress_level:9;
94     //unsigned last_block_eof:1, is_compressed:1, is_gzip:1;
95     mixin(bitfields!(
96         uint, "errcode", 16,
97         bool, "reserved", 1,
98         bool, "is_write", 1,
99         bool, "no_eof_block", 1,
100         bool, "is_be", 1,
101         int, "compress_level", 9,
102         bool, "last_block_eof", 1,
103         bool, "is_compressed",  1,
104         bool, "is_gzip",        1));
105     int cache_size;     /// cache size in bytes
106     int block_length;   /// ?
107     int block_clength;  /// ?
108     int block_offset;   /// ?
109     int64_t block_address;  /// ?
110     int64_t uncompressed_address; /// ?
111     void *uncompressed_block;   /// data ptr
112     void *compressed_block;     /// data ptr
113     bgzf_cache_t *cache;/// cache
114     hFILE *fp;          /// actual file handle
115     bgzf_mtaux_t *mt;   /// only used for multi-threading
116     bgzidx_t *idx;      /// BGZF index
117     int idx_build_otf;  /// build index on the fly, set by bgzf_index_build_init()
118     z_stream_s *gz_stream;/// for gzip-compressed files
119     int64_t seeked;     /// virtual offset of last seek
120 }
121 
122     /******************
123      * Basic routines *
124      ******************/
125 
126     /**
127      * Open an existing file descriptor for reading or writing.
128      *
129      * @param fd    file descriptor
130      *              Note that the file must be opened in binary mode, or else
131      *              there will be problems on platforms that make a difference
132      *              between text and binary mode.
133      * @param mode  mode matching /[rwag][u0-9]+/: 'r' for reading, 'w' for
134      *              writing, 'a' for appending, 'g' for gzip rather than BGZF
135      *              compression (with 'w' only), and digit specifies the zlib
136      *              compression level.
137      *              Note that there is a distinction between 'u' and '0': the
138      *              first yields plain uncompressed output whereas the latter
139      *              outputs uncompressed data wrapped in the zlib format.
140      * @return      BGZF file handler; 0 on error
141      */
142     BGZF* bgzf_dopen(int fd, const(char) *mode);
143 
144     /// ditto
145     pragma(inline, true)
146     BGZF* bgzf_fdopen(int fd, const(char) *mode) { return bgzf_dopen(fd, mode); }    // for backward compatibility
147     
148     /**
149      * Open the specified file for reading or writing.
150      */
151     BGZF* bgzf_open(const(char)* path, const(char) *mode);
152 
153     /**
154      * Open an existing hFILE stream for reading or writing.
155      */
156     BGZF* bgzf_hopen(hFILE *fp, const(char) *mode);
157 
158     /**
159      * Close the BGZF and free all associated resources.
160      *
161      * @param fp    BGZF file handler
162      * @return      0 on success and -1 on error
163      */
164     int bgzf_close(BGZF *fp);
165 
166     /**
167      * Read up to _length_ bytes from the file storing into _data_.
168      *
169      * @param fp     BGZF file handler
170      * @param data   data array to read into
171      * @param length size of data to read
172      * @return       number of bytes actually read; 0 on end-of-file and -1 on error
173      */
174     ssize_t bgzf_read(BGZF *fp, void *data, size_t length);
175 
176     /**
177      * Write _length_ bytes from _data_ to the file.  If no I/O errors occur,
178      * the complete _length_ bytes will be written (or queued for writing).
179      *
180      * @param fp     BGZF file handler
181      * @param data   data array to write
182      * @param length size of data to write
183      * @return       number of bytes written (i.e., _length_); negative on error
184      */
185     ssize_t bgzf_write(BGZF *fp, const(void) *data, size_t length);
186 
187     /**
188      * Write _length_ bytes from _data_ to the file, the index will be used to
189      * decide the amount of uncompressed data to be writen to each bgzip block.
190      * If no I/O errors occur, the complete _length_ bytes will be written (or
191      * queued for writing).
192      * @param fp     BGZF file handler
193      * @param data   data array to write
194      * @param length size of data to write
195      * @return       number of bytes written (i.e., _length_); negative on error
196      */
197     ssize_t bgzf_block_write(BGZF *fp, const(void) *data, size_t length);
198 
199     /**
200      * Returns the next byte in the file without consuming it.
201      * @param fp     BGZF file handler
202      * @return       -1 on EOF,
203      *               -2 on error,
204      *               otherwise the unsigned byte value.
205      */
206     int bgzf_peek(BGZF *fp);
207 
208     /**
209      * Read up to _length_ bytes directly from the underlying stream without
210      * decompressing.  Bypasses BGZF blocking, so must be used with care in
211      * specialised circumstances only.
212      *
213      * @param fp     BGZF file handler
214      * @param data   data array to read into
215      * @param length number of raw bytes to read
216      * @return       number of bytes actually read; 0 on end-of-file and -1 on error
217      */
218     ssize_t bgzf_raw_read(BGZF *fp, void *data, size_t length);
219 
220     /**
221      * Write _length_ bytes directly to the underlying stream without
222      * compressing.  Bypasses BGZF blocking, so must be used with care
223      * in specialised circumstances only.
224      *
225      * @param fp     BGZF file handler
226      * @param data   data array to write
227      * @param length number of raw bytes to write
228      * @return       number of bytes actually written; -1 on error
229      */
230     ssize_t bgzf_raw_write(BGZF *fp, const(void) *data, size_t length);
231 
232     /**
233      * Write the data in the buffer to the file.
234      *
235      * @param fp     BGZF file handle
236      * @return       0 on success and -1 on error
237      */
238     int bgzf_flush(BGZF *fp);
239 
240     /**
241      * Return a virtual file pointer to the current location in the file.
242      * No interpretation of the value should be made, other than a subsequent
243      * call to bgzf_seek can be used to position the file at the same point.
244      * Return value is non-negative on success.
245      */
246     pragma(inline, true)
247     ulong bgzf_tell(BGZF *fp) { return ((*fp).block_address << 16) | ((*fp).block_offset & 0xFFFF); }
248     
249     /**
250      * Set the file to read from the location specified by _pos_.
251      *
252      * @param fp     BGZF file handler
253      * @param pos    virtual file offset returned by bgzf_tell()
254      * @param whence must be SEEK_SET
255      * @return       0 on success and -1 on error
256      * 
257      * @note It is not permitted to seek on files open for writing,
258      * or files compressed with gzip (as opposed to bgzip).
259      */
260     int64_t bgzf_seek(BGZF *fp, int64_t pos, int whence);
261 
262     /**
263      * Check if the BGZF end-of-file (EOF) marker is present
264      *
265      * @param fp    BGZF file handler opened for reading
266      * @return      1 if the EOF marker is present and correct;
267      *              2 if it can't be checked, e.g., because fp isn't seekable;
268      *              0 if the EOF marker is absent;
269      *              -1 (with errno set) on error
270      */
271     int bgzf_check_EOF(BGZF *fp);
272 
273     /** Return the file's compression format
274      *
275      * @param fp  BGZF file handle
276      * @return    A small integer matching the corresponding
277      *            `enum htsCompression` value:
278      *   - 0 / `no_compression` if the file is uncompressed
279      *   - 1 / `gzip` if the file is plain GZIP-compressed
280      *   - 2 / `bgzf` if the file is BGZF-compressed
281      * @since 1.4
282      */
283     int bgzf_compression(BGZF *fp);
284 
285     /**
286      * Check if a file is in the BGZF format
287      *
288      * @param fn    file name
289      * @return      1 if _fn_ is BGZF; 0 if not or on I/O error
290      */
291     deprecated("Use bgzf_compression() or hts_detect_format() instead")
292     int bgzf_is_bgzf(const(char) *fn);
293 
294     /*********************
295      * Advanced routines *
296      *********************/
297 
298     /**
299      * Set the cache size. Only effective when compiled with -DBGZF_CACHE.
300      *
301      * @param fp    BGZF file handler
302      * @param size  size of cache in bytes; 0 to disable caching (default)
303      */
304     void bgzf_set_cache_size(BGZF *fp, int size);
305 
306     /**
307      * Flush the file if the remaining buffer size is smaller than _size_
308      * @return      0 if flushing succeeded or was not needed; negative on error
309      */
310     int bgzf_flush_try(BGZF *fp, ssize_t size);
311 
312     /**
313      * Read one byte from a BGZF file. It is faster than bgzf_read()
314      * @param fp     BGZF file handler
315      * @return       byte read; -1 on end-of-file or error
316      */
317     int bgzf_getc(BGZF *fp);
318 
319     /**
320      * Read one line from a BGZF file. It is faster than bgzf_getc()
321      *
322      * @param fp     BGZF file handler
323      * @param delim  delimitor
324      * @param str    string to write to; must be initialized
325      * @return       length of the string; -1 on end-of-file; <= -2 on error
326      */
327     int bgzf_getline(BGZF *fp, int delim, kstring_t *str);
328 
329     /**
330      * Read the next BGZF block.
331      */
332     int bgzf_read_block(BGZF *fp);
333 
334     /**
335      * Enable multi-threading (when compiled with -DBGZF_MT) via a shared
336      * thread pool.  This means both encoder and decoder can balance
337      * usage across a single pool of worker jobs.
338      *
339      * @param fp          BGZF file handler; must be opened for writing
340      * @param pool        The thread pool (see hts_create_threads)
341      */
342     int bgzf_thread_pool(BGZF *fp, hts_tpool *pool, int qsize);
343 
344     /**
345      * Enable multi-threading (only effective when the library was compiled
346      * with -DBGZF_MT)
347      *
348      * @param fp          BGZF file handler; must be opened for writing
349      * @param n_threads   #threads used for writing
350      * @param n_sub_blks  #blocks processed by each thread; a value 64-256 is recommended
351      */
352     int bgzf_mt(BGZF *fp, int n_threads, int n_sub_blks);
353 
354     /**
355      * Compress a single BGZF block.
356      *
357      * @param dst    output buffer (must have size >= BGZF_MAX_BLOCK_SIZE)
358      * @param dlen   size of output buffer; updated on return to the number
359      *               of bytes actually written to dst
360      * @param src    buffer to be compressed
361      * @param slen   size of data to compress (must be <= BGZF_BLOCK_SIZE)
362      * @param level  compression level
363      * @return       0 on success and negative on error
364      */
365     int bgzf_compress(void *dst, size_t *dlen, const(void) *src, size_t slen, int level);
366 
367     /*******************
368      * bgzidx routines *
369      *******************/
370 
371     /**
372      *  Position BGZF at the uncompressed offset
373      *
374      *  @param fp           BGZF file handler; must be opened for reading
375      *  @param uoffset      file offset in the uncompressed data
376      *  @param where        must be SEEK_SET
377      *
378      *  Returns 0 on success and -1 on error.
379      *
380      *  @note It is not permitted to seek on files open for writing,
381      *  or files compressed with gzip (as opposed to bgzip).
382      */
383     int bgzf_useek(BGZF *fp, off_t uoffset, int where);
384 
385     /**
386      *  Position in uncompressed BGZF
387      *
388      *  @param fp           BGZF file handler; must be opened for reading
389      *
390      *  Returns the current offset on success and -1 on error.
391      */
392     off_t bgzf_utell(BGZF *fp);
393 
394     /**
395      * Tell BGZF to build index while compressing.
396      *
397      * @param fp          BGZF file handler; can be opened for reading or writing.
398      *
399      * Returns 0 on success and -1 on error.
400      *
401      * @note This function must be called before any data has been read or
402      * written, and in particular before calling bgzf_mt() on the same
403      * file handle (as threads may start reading data before the index
404      * has been set up).
405      */
406     int bgzf_index_build_init(BGZF *fp);
407 
408     /// Load BGZF index
409     /**
410      * @param fp          BGZF file handler
411      * @param bname       base name
412      * @param suffix      suffix to add to bname (can be NULL)
413      * @return 0 on success and -1 on error.
414      */
415     int bgzf_index_load(BGZF *fp,
416                         const(char) *bname, const(char) *suffix);
417 
418     /// Load BGZF index from an hFILE
419     /**
420      * @param fp   BGZF file handle
421      * @param idx  hFILE to read from
422      * @param name file name (for error reporting only; can be NULL)
423      * @return 0 on success and -1 on error.
424      *
425      * Populates @p fp with index data read from the hFILE handle @p idx.
426      * The file pointer to @idx should point to the start of the index
427      * data when this function is called.
428      *
429      * The file name can optionally be passed in the @p name parameter.  This
430      * is only used for printing error messages; if NULL the word "index" is
431      * used instead.
432      */
433     int bgzf_index_load_hfile(BGZF *fp, hFILE *idx,
434                               const(char) *name);
435 
436     /// Save BGZF index
437     /**
438      * @param fp          BGZF file handler
439      * @param bname       base name
440      * @param suffix      suffix to add to bname (can be NULL)
441      * @return 0 on success and -1 on error.
442      */
443     int bgzf_index_dump(BGZF *fp,
444                         const(char) *bname, const(char) *suffix);
445 
446     /// Write a BGZF index to an hFILE
447     /**
448      * @param fp     BGZF file handle
449      * @param idx    hFILE to write to
450      * @param name   file name (for error reporting only, can be NULL)
451      * @return 0 on success and -1 on error.
452      *
453      * Write index data from @p fp to the file @p idx.
454      *
455      * The file name can optionally be passed in the @p name parameter.  This
456      * is only used for printing error messages; if NULL the word "index" is
457      * used instead.
458      */
459 
460     int bgzf_index_dump_hfile(BGZF *fp, hFILE *idx,
461                               const(char) *name);