1 module dhtslib.vcf;
2 
3 public import dhtslib.vcf.record;
4 public import dhtslib.vcf.reader;
5 public import dhtslib.vcf.header;
6 public import dhtslib.vcf.writer;
7 
8 import std.meta : AliasSeq;
9 import htslib.vcf;
10 
11 /// Represents the classification of a headerline
12 ///
13 /// ##INFO=<...>
14 ///   ====
15 /// 
16 /// Replacement for htslib BCF_HL_*
17 enum HeaderRecordType
18 {
19     None = -1,
20     Filter =    BCF_HL_FLT, /// header line: FILTER
21     Info =      BCF_HL_INFO,/// header line: INFO
22     Format =    BCF_HL_FMT, /// header line: FORMAT
23     Contig =    BCF_HL_CTG, /// header line: contig
24     Struct =    BCF_HL_STR, /// header line: structured header line TAG=<A=..,B=..>
25     Generic =   BCF_HL_GEN, /// header line: generic header line
26 }
27 
28 /// Strings for HeaderRecordType
29 static immutable HeaderRecordTypeStrings = ["FILTER","INFO","FORMAT","contig", "struct", "generic"];
30 
31 /// Represents the classification of a headerline
32 ///
33 /// ##INFO=<Number=A, Type=Integer>
34 ///                       =======
35 /// 
36 /// Replacement for htslib BCF_HT_*
37 enum HeaderTypes
38 {
39     None =      -1,
40     Flag =      BCF_HT_FLAG, /// header type: FLAG// header type
41     Integer =   BCF_HT_INT, /// header type: INTEGER
42     Float =      BCF_HT_REAL, /// header type: REAL,
43     String =    BCF_HT_STR, /// header type: STRING
44     Character = 4,
45     Long =      BCF_HT_LONG, // BCF_HT_INT, but for int64_t values; VCF only!
46 }
47 
48 /// Strings for HeaderTypes
49 /// 
50 /// doesn't work as needs compile time
51 /// enum HeaderTypesStrings = __traits(allMembers, HeaderTypes);
52 ///
53 /// works but includes "None" which is throwing off indexing
54 /// enum HeaderTypesStrings = [__traits(allMembers, HeaderTypes)];
55 enum HeaderTypesStrings = [__traits(allMembers, HeaderTypes)][1..$];
56 
57 
58 /// Represents the classification of a headerline
59 ///
60 /// ##INFO=<Number=A, Type=Integer>
61 ///               =
62 ///
63 /// if FIXED
64 /// ##INFO=<Number=2, Type=Integer>
65 ///               =
66 /// 
67 /// Replacement for htslib BCF_VL_*
68 enum HeaderLengths
69 {
70     None =              -1,
71     Fixed =             BCF_VL_FIXED, /// variable length: fixed length
72     Variable =          BCF_VL_VAR, /// variable length: variable
73     OnePerAltAllele =   BCF_VL_A, /// variable length: one field per alt allele
74     OnePerGenotype =    BCF_VL_G, /// variable length: one field per genotype
75     OnePerAllele =      BCF_VL_R, /// variable length: one field per allele including ref
76 }
77 
78 /// Strings for HDR_LENGTH
79 static immutable  HeaderLengthsStrings = ["FIXED",".","A","G","R"];
80 
81 /// Used to index into bcf_hdr_t's id field of type bcf_idpair_t*[3]
82 ///
83 /// i.e as used from VCFRecord where this.vcfheader is a VCFHeader:
84 /// this.vcfheader.hdr.id[HeaderDictTypes.Id]
85 ///
86 /// Replacement for htslib BCF_DT_*
87 enum HeaderDictTypes
88 {
89     Id =        BCF_DT_ID, /// dictionary type: ID
90     Contig =    BCF_DT_CTG, /// dictionary type: Contig
91     Sample =    BCF_DT_SAMPLE, /// dictionary type: SAMPLE
92 }
93 
94 /// Used by InfoField (bcf_info_t) and FormatField (bcf_fmt_t) 
95 /// to identify the underlying htslib/bcf1_t info and format data
96 /// type and size. This data is stored in ubyte arrays.
97 ///
98 /// 
99 /// Replacement for htslib BCF_BT_*
100 enum BcfRecordType
101 {
102     Null =   0,  /// null
103     Int8 =   BCF_BT_INT8,  /// int8
104     Int16 =  BCF_BT_INT16,  /// int16
105     Int32 =  BCF_BT_INT32,  /// int32
106     Int64 =  BCF_BT_INT64,  /// Unofficial, for internal use only per htslib headers 
107     Float =  BCF_BT_FLOAT,  /// float (32?)
108     Char =   BCF_BT_CHAR  /// char (8 bit)
109 }
110 
111 /// Byte sizes for RecordType
112 static immutable RecordTypeSizes = [0, byte.sizeof, short.sizeof, int.sizeof, long.sizeof, float.sizeof, 0, char.sizeof];
113 alias RecordTypeToDType = AliasSeq!(null, byte, short, int, long, float, null, string);
114 
115 /// Replacement for htslib VCF_*
116 enum VariantType
117 {
118     Ref =           VCF_REF,  /// ref (e.g. in a gVCF)
119     Snp =           VCF_SNP,  /// SNP 
120     Mnp =           VCF_MNP,  /// MNP
121     Indel =         VCF_INDEL,  /// INDEL
122     Other =         VCF_OTHER,  /// other (e.g. SV)
123     Breakend =      VCF_BND, /// breakend
124     Overlap =       VCF_OVERLAP, /// overlapping deletion, ALT=* 
125 }
126 
127 /// Levels identifiers for unpacking the underlying variable length
128 /// data in the bcf1_t. Values are inclusive 
129 /// i.e UnpackLevel.AltAllele unpacks all data before and including the ALT allele
130 /// Replacement for htslib BCF_UN_*
131 enum UnpackLevel
132 {
133     None =              0,
134     AltAllele =         BCF_UN_STR, // up to ALT inclusive
135     Filter =            BCF_UN_FLT, // up to Filter
136     Info =              BCF_UN_INFO, // up to Info
137     SharedFields =      BCF_UN_SHR, // all shared information
138     Format =            BCF_UN_FMT, // unpack format and each sample
139     IndividualFields =  BCF_UN_IND, // a synonym of UNPACK.FMT
140     All =               BCF_UN_ALL, // everything
141 }