htslib.hts_endian source code

1 /// @file hts_endian.h
2 /// Byte swapping and unaligned access functions.
3 /*
4    Copyright (C) 2017 Genome Research Ltd.
5 
6     Author: Rob Davies <rmd@sanger.ac.uk>
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 DEALINGS IN THE SOFTWARE.  */
25 module htslib.hts_endian;
26 
27 import core.stdc.config;
28 
29 @system:
30 @nogc:
31 
32 /*
33  * Compile-time endianness tests.
34  *
35  * Note that these tests may fail.  They should only be used to enable
36  * faster versions of endian-neutral implementations.  The endian-neutral
37  * version should always be available as a fall-back.
38  *
39  * See https://sourceforge.net/p/predef/wiki/Endianness/
40  */
41 
42 version(X86) enum HTS_x86 = true;
43 version(X86_64) enum HTS_x86 = true;
44 else enum HTS_x86 = false;
45 
46 /* Save typing as both endian and unaligned tests want to know about x86 */ /* x86 and x86_64 platform */
47 
48 /** @def HTS_LITTLE_ENDIAN
49  *  @brief Defined if platform is known to be little-endian
50  */
51 
52 version(LittleEndian) enum HTS_LITTLE_ENDIAN = true;
53 else enum HTS_LITTLE_ENDIAN = false;
54 
55 /** @def HTS_BIG_ENDIAN
56  *  @brief Defined if platform is known to be big-endian
57  */
58 
59 version(BigEndian) enum HTS_BIG_ENDIAN = true;
60 else enum HTS_BIG_ENDIAN = false;
61 
62 /** @def HTS_ENDIAN_NEUTRAL
63  *  @brief Define this to disable any endian-specific optimizations
64  */
65 
66 /* Disable all endian-specific code. */
67 version(HTS_ENDIAN_NEUTRAL)
68 {
69     enum HTS_LITTLE_ENDIAN = false;
70     enum HTS_BIG_ENDIAN = false;
71 }
72 
73 /** @def HTS_ALLOW_UNALIGNED
74  *  @brief Control use of unaligned memory access.
75  *
76  * Defining HTS_ALLOW_UNALIGNED=1 converts shift-and-or to simple casts on
77  * little-endian platforms that can tolerate unaligned access (notably Intel
78  * x86).
79  *
80  * Defining HTS_ALLOW_UNALIGNED=0 forces shift-and-or.
81  */
82 
83 static if(HTS_x86) enum HTS_ALLOW_UNALIGNED = true;
84 else enum HTS_ALLOW_UNALIGNED = false;
85 
86 // Consider using AX_CHECK_ALIGNED_ACCESS_REQUIRED in autoconf.
87 
88 // This prevents problems with gcc's vectoriser generating the wrong
89 // instructions for unaligned data.
90 
91 static if(HTS_ALLOW_UNALIGNED){
92     alias uint16_u = align(1) ushort;
93     alias uint32_u = align(1) uint;
94     alias uint64_u = align(1) c_ulong;
95 }else{
96     alias uint16_u = ushort;
97     alias uint32_u = uint;
98     alias uint64_u = c_ulong;    
99 }
100 
101 /// Basically just a byte
102 /// This workaround to avoid int promotion issues
103 /// implmentation direvied from this post on the dlang forums
104 /// https://forum.dlang.org/post/r0imk7$14b7$3@digitalmars.com
105 private struct int8_t
106 {
107     byte _val;
108     alias _val this;
109 
110     pragma(inline, true)
111     this(byte x) @nogc
112     {
113         _val = x;
114     }
115 
116     pragma(inline, true)
117     int8_t opBinary(string op, T)(T other)
118         if ((is(T == int8_t) || is(T == byte) || is(T == ubyte)))
119     {
120         return mixin("int8_t(cast(byte)(_val " ~ op ~ " cast(byte)other))");
121     }
122 
123     pragma(inline, true)
124     int8_t opBinaryRight(string op, T)(T other)
125         if ((is(T == int8_t) || is(T == byte) || is(T == ubyte)))
126     {
127         return mixin("int8_t(cast(byte)(_val " ~ op ~ " cast(byte)other))");
128     }
129 
130     pragma(inline, true)
131     int8_t opUnary(string op: "-")()
132     {
133         return (cast(int8_t) _val ^ cast(ubyte) 0xFF) + cast(ubyte) 1;
134     }
135 }
136 
137 unittest
138 {
139     assert(-int8_t(cast(byte) 8) == int8_t(cast(byte) -8));
140 }
141 
142 /// Basically just a short
143 /// This workaround to avoid int promotion issues
144 /// implmentation direvied from this post on the dlang forums
145 /// https://forum.dlang.org/post/r0imk7$14b7$3@digitalmars.com
146 private struct int16_t
147 {
148     short _val;
149     alias _val this;
150 
151     pragma(inline, true)
152     this(short x) @nogc
153     {
154         _val = x;
155     }
156 
157     pragma(inline, true)
158     int16_t opBinary(string op, T)(T other)
159         if ((is(T == int16_t) || is(T == short) || is(T == ushort)))
160     {
161         return mixin("int16_t(cast(short)(_val " ~ op ~ " cast(short)other))");
162     }
163 
164     pragma(inline, true)
165     int16_t opBinaryRight(string op, T)(T other)
166         if ((is(T == int16_t) || is(T == short) || is(T == ushort)))
167     {
168         return mixin("int16_t(cast(short)(_val " ~ op ~ " cast(short)other))");
169     }
170 
171     pragma(inline, true)
172     int16_t opUnary(string op: "-")()
173     {
174         return (cast(int16_t) _val ^ cast(ushort) 0xFFFF) + cast(ushort) 1;
175     }
176 }
177 
178 unittest
179 {
180     assert(-int16_t(cast(short) 8) == int16_t(cast(short) -8));
181 }
182 
183 pragma(inline, true):
184 @nogc:
185 @system:
186 /// Get a ushort value from an unsigned byte array
187 /** @param buf Pointer to source byte, may be unaligned
188  *  @return A 16 bit unsigned integer
189  *  The input is read in little-endian byte order.
190  */
191 ushort le_to_u16(const(ubyte)* buf)
192 {
193     static if(HTS_LITTLE_ENDIAN && HTS_ALLOW_UNALIGNED)
194         return *(cast(uint16_u *) buf);
195     else
196         return cast(ushort) buf[0] | (cast(ushort) buf[1] << 8);
197 }
198 
199 /// Get a uint value from an unsigned byte array
200 /** @param buf Pointer to source byte array, may be unaligned
201  *  @return A 32 bit unsigned integer
202  *  The input is read in little-endian byte order.
203  */
204 uint le_to_u32(const(ubyte)* buf)
205 {
206     static if(HTS_LITTLE_ENDIAN && HTS_ALLOW_UNALIGNED)
207         return *(cast(uint32_u *) buf);
208     else
209         return (cast(uint) buf[0] |
210             (cast(uint) buf[1] << 8) |
211             (cast(uint) buf[2] << 16) |
212             (cast(uint) buf[3] << 24));
213 }
214 
215 /// Get a ulong value from an unsigned byte array
216 /** @param buf Pointer to source byte array, may be unaligned
217  *  @return A 64 bit unsigned integer
218  *  The input is read in little-endian byte order.
219  */
220 ulong le_to_u64(const(ubyte)* buf)
221 {
222     static if(HTS_LITTLE_ENDIAN && HTS_ALLOW_UNALIGNED)
223         return *(cast(uint64_u *) buf);
224     else
225         return (cast(ulong) buf[0] |
226             (cast(ulong) buf[1] << 8) |
227             (cast(ulong) buf[2] << 16) |
228             (cast(ulong) buf[3] << 24) |
229             (cast(ulong) buf[4] << 32) |
230             (cast(ulong) buf[5] << 40) |
231             (cast(ulong) buf[6] << 48) |
232             (cast(ulong) buf[7] << 56));
233 }
234 
235 /// Store a ushort value in little-endian byte order
236 /** @param val The value to store
237  *  @param buf Where to store it (may be unaligned)
238  */
239 void u16_to_le(ushort val, ubyte* buf)
240 {
241     static if(HTS_LITTLE_ENDIAN && HTS_ALLOW_UNALIGNED)
242         *(cast(uint16_u *) buf) = val;
243     else{
244         buf[0] = val & 0xff;
245         buf[1] = (val >> 8) & 0xff;
246     }
247 }
248 
249 /// Store a uint value in little-endian byte order
250 /** @param val The value to store
251  *  @param buf Where to store it (may be unaligned)
252  */
253 void u32_to_le(uint val, ubyte* buf)
254 {
255     static if(HTS_LITTLE_ENDIAN && HTS_ALLOW_UNALIGNED)
256         *(cast(uint32_u *) buf) = val;
257     else{
258         buf[0] = val & 0xff;
259         buf[1] = (val >> 8) & 0xff;
260         buf[2] = (val >> 16) & 0xff;
261         buf[3] = (val >> 24) & 0xff;
262     }
263 }
264 
265 /// Store a ulong value in little-endian byte order
266 /** @param val The value to store
267  *  @param buf Where to store it (may be unaligned)
268  */
269 void u64_to_le(ulong val, ubyte* buf)
270 {
271     static if(HTS_LITTLE_ENDIAN && HTS_ALLOW_UNALIGNED)
272         *(cast(uint64_u *) buf) = val;
273     else{
274         buf[0] = val & 0xff;
275         buf[1] = (val >> 8) & 0xff;
276         buf[2] = (val >> 16) & 0xff;
277         buf[3] = (val >> 24) & 0xff;
278         buf[4] = (val >> 32) & 0xff;
279         buf[5] = (val >> 40) & 0xff;
280         buf[6] = (val >> 48) & 0xff;
281         buf[7] = (val >> 56) & 0xff;
282     }
283 }
284 
285 /* Signed values.  Grab the data as unsigned, then convert to signed without
286  * triggering undefined behaviour.  On any sensible platform, the conversion
287  * should optimise away to nothing.
288  */
289 
290 /// Get an int8_t value from an unsigned byte array
291 /** @param buf Pointer to source byte array, may be unaligned
292  *  @return A 8 bit signed integer
293  *  The input data is interpreted as 2's complement representation.
294  */
295 byte le_to_i8(const(ubyte)* buf)
296 {
297     return *buf < 0x80 ? cast(int8_t) *buf : -((int8_t(cast(byte)0xff) - cast(int8_t)*buf)) - int8_t(1);
298 }
299 
300 /// Get an short value from an unsigned byte array
301 /** @param buf Pointer to source byte array, may be unaligned
302  *  @return A 16 bit signed integer
303  *  The input data is interpreted as 2's complement representation in
304  *  little-endian byte order.
305  */
306 short le_to_i16(const(ubyte)* buf)
307 {
308     ushort v = le_to_u16(buf);
309     return v < 0x8000 ? cast(int16_t) v : -((int16_t(cast(short)0xffff) - v)) - cast(int16_t)1;
310 }
311 
312 /// Get an int value from an unsigned byte array
313 /** @param buf Pointer to source byte array, may be unaligned
314  *  @return A 32 bit signed integer
315  *  The input data is interpreted as 2's complement representation in
316  *  little-endian byte order.
317  */
318 int le_to_i32(const(ubyte)* buf)
319 {
320     uint v = le_to_u32(buf);
321     return v < 0x80000000U ? cast(int) v : -(cast(int) (0xffffffffU - v)) - 1;
322 }
323 
324 /// Get an long value from an unsigned byte array
325 /** @param buf Pointer to source byte array, may be unaligned
326  *  @return A 64 bit signed integer
327  *  The input data is interpreted as 2's complement representation in
328  *  little-endian byte order.
329  */
330 long le_to_i64(const(ubyte)* buf)
331 {
332     ulong v = le_to_u64(buf);
333     return (v < 0x8000000000000000UL
334             ? cast(long) v : -(cast(long) (0xffffffffffffffffUL - v)) - 1);
335 }
336 
337 // Converting the other way is easier as signed -> unsigned is well defined.
338 
339 /// Store a ushort value in little-endian byte order
340 /** @param val The value to store
341  *  @param buf Where to store it (may be unaligned)
342  */
343 void i16_to_le(short val, ubyte* buf)
344 {
345     u16_to_le(val, buf);
346 }
347 
348 /// Store a uint value in little-endian byte order
349 /** @param val The value to store
350  *  @param buf Where to store it (may be unaligned)
351  */
352 void i32_to_le(int val, ubyte* buf)
353 {
354     u32_to_le(val, buf);
355 }
356 
357 /// Store a ulong value in little-endian byte order
358 /** @param val The value to store
359  *  @param buf Where to store it (may be unaligned)
360  */
361 void i64_to_le(long val, ubyte* buf)
362 {
363     u64_to_le(val, buf);
364 }
365 
366 /* Floating point.  Assumptions:
367  *  Platform uses IEEE 754 format
368  *  sizeof(float) == sizeof(uint)
369  *  sizeof(double) == sizeof(ulong)
370  *  Endian-ness is the same for both floating point and integer
371  *  Type-punning via a union is allowed
372  */
373 
374 /// Get a float value from an unsigned byte array
375 /** @param buf Pointer to source byte array, may be unaligned
376  *  @return A 32 bit floating point value
377  *  The input is interpreted as an IEEE 754 format float in little-endian
378  *  byte order.
379  */
380 float le_to_float(const(ubyte)* buf)
381 {
382     union CONVERT 
383     {
384         uint u;
385         float   f;
386     }
387 
388     CONVERT convert;
389     convert.u = le_to_u32(buf);
390     return convert.f;
391 }
392 
393 /// Get a double value from an unsigned byte array
394 /** @param buf Pointer to source byte array, may be unaligned
395  *  @return A 64 bit floating point value
396  *  The input is interpreted as an IEEE 754 format double in little-endian
397  *  byte order.
398  */
399 double le_to_double(const(ubyte)* buf)
400 {
401     union CONVERT 
402     {
403         ulong u;
404         double   f;
405     }
406     CONVERT convert;
407     convert.u = le_to_u64(buf);
408     return convert.f;
409 }
410 
411 /// Store a float value in little-endian byte order
412 /** @param val The value to store
413  *  @param buf Where to store it (may be unaligned)
414  */
415 void float_to_le(float val, ubyte* buf)
416 {
417     union CONVERT 
418     {
419         uint u;
420         float f;
421     }
422     CONVERT convert;
423     convert.f = val;
424     u32_to_le(convert.u, buf);
425 }
426 
427 /// Store a double value in little-endian byte order
428 /** @param val The value to store
429  *  @param buf Where to store it (may be unaligned)
430  */
431 void double_to_le(double val, ubyte* buf)
432 {
433     union CONVERT 
434     {
435         ulong u;
436         double f;
437     }
438     CONVERT convert;
439     convert.f = val;
440     u64_to_le(convert.u, buf);
441 }
442 
443 /* HTS_ENDIAN_H */