1 /// @file htslib/vcfutils.h
2 /// Allele-related utility functions.
3 /*
4     Copyright (C) 2012, 2013, 2015-2016 Genome Research Ltd.
5 
6     Author: Petr Danecek <pd3@sanger.ac.uk>
7 
8 Permission is hereby granted, free of charge, to any person obtaining a copy
9 of this software and associated documentation files (the "Software"), to deal
10 in the Software without restriction, including without limitation the rights
11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12 copies of the Software, and to permit persons to whom the Software is
13 furnished to do so, subject to the following conditions:
14 
15 The above copyright notice and this permission notice shall be included in
16 all copies or substantial portions of the Software.
17 
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
24 DEALINGS IN THE SOFTWARE.  */
25 module htslib.vcfutils;
26 
27 import htslib.vcf;
28 
29 @system:
30 nothrow:
31 @nogc:
32 
33 extern (C):
34 
35 struct kbitset_t;
36 
37 /**
38  *  bcf_trim_alleles() - remove ALT alleles unused in genotype fields
39  *  @header:  for access to BCF_DT_ID dictionary
40  *  @line:    VCF line obtain from vcf_parse1
41  *
42  *  Returns the number of removed alleles on success or negative
43  *  on error:
44  *      -1 .. some allele index is out of bounds
45  *      -2 .. could not remove alleles
46  */
47 int bcf_trim_alleles(const(bcf_hdr_t)* header, bcf1_t* line);
48 
49 /**
50  *  bcf_remove_alleles() - remove ALT alleles according to bitmask @mask
51  *  @header:  for access to BCF_DT_ID dictionary
52  *  @line:    VCF line obtained from vcf_parse1
53  *  @mask:    alleles to remove
54  *
55  *  If you have more than 31 alleles, then the integer bit mask will
56  *  overflow, so use bcf_remove_allele_set instead
57  *  Returns 0 on success, <0 on error
58  */
59 int bcf_remove_alleles(const(bcf_hdr_t)* header, bcf1_t* line, int mask);
60 
61 /**
62  *  bcf_remove_allele_set() - remove ALT alleles according to bitset @rm_set
63  *  @header:  for access to BCF_DT_ID dictionary
64  *  @line:    VCF line obtained from vcf_parse1
65  *  @rm_set:  pointer to kbitset_t object with bits set for allele
66  *            indexes to remove
67  *
68  *  Returns 0 on success or -1 on failure
69  *
70  *  Number=A,R,G INFO and FORMAT fields will be updated accordingly.
71  */
72 int bcf_remove_allele_set(
73     const(bcf_hdr_t)* header,
74     bcf1_t* line,
75     const(kbitset_t)* rm_set);
76 
77 /**
78  *  bcf_calc_ac() - calculate the number of REF and ALT alleles
79  *  @header:  for access to BCF_DT_ID dictionary
80  *  @line:    VCF line obtained from vcf_parse1
81  *  @ac:      array of length line->n_allele
82  *  @which:   determine if INFO/AN,AC and indv fields be used
83  *
84  *  Returns 1 if the call succeeded, or 0 if the value could not
85  *  be determined.
86  *
87  *  The value of @which determines if existing INFO/AC,AN can be
88  *  used (BCF_UN_INFO) and and if indv fields can be split (BCF_UN_FMT).
89  */
90 int bcf_calc_ac(const(bcf_hdr_t)* header, bcf1_t* line, int* ac, int which);
91 
92 /**
93  * bcf_gt_type() - determines type of the genotype
94  * @fmt_ptr:  the GT format field as set for example by set_fmt_ptr
95  * @isample:  sample index (starting from 0)
96  * @ial:      index of the 1st non-reference allele (starting from 1)
97  * @jal:      index of the 2nd non-reference allele (starting from 1)
98  *
99  * Returns the type of the genotype (one of GT_HOM_RR, GT_HET_RA,
100  * GT_HOM_AA, GT_HET_AA, GT_HAPL_R, GT_HAPL_A or GT_UNKN). If $ial
101  * is not NULL and the genotype has one or more non-reference
102  * alleles, $ial will be set. In case of GT_HET_AA, $ial is the
103  * position of the allele which appeared first in ALT. If $jal is
104  * not null and the genotype is GT_HET_AA, $jal will be set and is
105  * the position of the second allele in ALT.
106  */
107 enum GT_HOM_RR = 0; // note: the actual value of GT_* matters, used in dosage r2 calculation
108 enum GT_HOM_AA = 1;
109 enum GT_HET_RA = 2;
110 enum GT_HET_AA = 3;
111 enum GT_HAPL_R = 4;
112 enum GT_HAPL_A = 5;
113 enum GT_UNKN = 6;
114 int bcf_gt_type(bcf_fmt_t* fmt_ptr, int isample, int* ial, int* jal);
115 
116 pragma(inline, true)
117 int bcf_acgt2int(char c)
118 {
119     if ( cast(int)c>96 ) c -= 32;
120     if ( c=='A' ) return 0;
121     if ( c=='C' ) return 1;
122     if ( c=='G' ) return 2;
123     if ( c=='T' ) return 3;
124     return -1;
125 }
126 
127 extern (D) auto bcf_int2acgt(T)(auto ref T i)
128 {
129     return "ACGT"[i];
130 }
131 
132 /**
133   * bcf_ij2G() - common task: allele indexes to Number=G index (diploid)
134   * @i,j:  allele indexes, 0-based, i<=j
135   *
136   * Returns index to the Number=G diploid array
137   */
138 extern (D) auto bcf_ij2G(T0, T1)(auto ref T0 i, auto ref T1 j)
139 {
140     return j * (j + 1) / 2 + i;
141 }