1 /// @file htslib/vcfutils.h 2 /// Allele-related utility functions. 3 /* 4 Copyright (C) 2012, 2013, 2015-2016 Genome Research Ltd. 5 6 Author: Petr Danecek <pd3@sanger.ac.uk> 7 8 Permission is hereby granted, free of charge, to any person obtaining a copy 9 of this software and associated documentation files (the "Software"), to deal 10 in the Software without restriction, including without limitation the rights 11 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 12 copies of the Software, and to permit persons to whom the Software is 13 furnished to do so, subject to the following conditions: 14 15 The above copyright notice and this permission notice shall be included in 16 all copies or substantial portions of the Software. 17 18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 21 THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 22 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 23 FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 24 DEALINGS IN THE SOFTWARE. */ 25 module htslib.vcfutils; 26 27 import htslib.vcf; 28 29 @system: 30 nothrow: 31 @nogc: 32 33 extern (C): 34 35 struct kbitset_t; 36 37 /** 38 * bcf_trim_alleles() - remove ALT alleles unused in genotype fields 39 * @header: for access to BCF_DT_ID dictionary 40 * @line: VCF line obtain from vcf_parse1 41 * 42 * Returns the number of removed alleles on success or negative 43 * on error: 44 * -1 .. some allele index is out of bounds 45 * -2 .. could not remove alleles 46 */ 47 int bcf_trim_alleles(const(bcf_hdr_t)* header, bcf1_t* line); 48 49 /** 50 * bcf_remove_alleles() - remove ALT alleles according to bitmask @mask 51 * @header: for access to BCF_DT_ID dictionary 52 * @line: VCF line obtained from vcf_parse1 53 * @mask: alleles to remove 54 * 55 * If you have more than 31 alleles, then the integer bit mask will 56 * overflow, so use bcf_remove_allele_set instead 57 * Returns 0 on success, <0 on error 58 */ 59 int bcf_remove_alleles(const(bcf_hdr_t)* header, bcf1_t* line, int mask); 60 61 /** 62 * bcf_remove_allele_set() - remove ALT alleles according to bitset @rm_set 63 * @header: for access to BCF_DT_ID dictionary 64 * @line: VCF line obtained from vcf_parse1 65 * @rm_set: pointer to kbitset_t object with bits set for allele 66 * indexes to remove 67 * 68 * Returns 0 on success or -1 on failure 69 * 70 * Number=A,R,G INFO and FORMAT fields will be updated accordingly. 71 */ 72 int bcf_remove_allele_set( 73 const(bcf_hdr_t)* header, 74 bcf1_t* line, 75 const(kbitset_t)* rm_set); 76 77 /** 78 * bcf_calc_ac() - calculate the number of REF and ALT alleles 79 * @header: for access to BCF_DT_ID dictionary 80 * @line: VCF line obtained from vcf_parse1 81 * @ac: array of length line->n_allele 82 * @which: determine if INFO/AN,AC and indv fields be used 83 * 84 * Returns 1 if the call succeeded, or 0 if the value could not 85 * be determined. 86 * 87 * The value of @which determines if existing INFO/AC,AN can be 88 * used (BCF_UN_INFO) and and if indv fields can be split (BCF_UN_FMT). 89 */ 90 int bcf_calc_ac(const(bcf_hdr_t)* header, bcf1_t* line, int* ac, int which); 91 92 /** 93 * bcf_gt_type() - determines type of the genotype 94 * @fmt_ptr: the GT format field as set for example by set_fmt_ptr 95 * @isample: sample index (starting from 0) 96 * @ial: index of the 1st non-reference allele (starting from 1) 97 * @jal: index of the 2nd non-reference allele (starting from 1) 98 * 99 * Returns the type of the genotype (one of GT_HOM_RR, GT_HET_RA, 100 * GT_HOM_AA, GT_HET_AA, GT_HAPL_R, GT_HAPL_A or GT_UNKN). If $ial 101 * is not NULL and the genotype has one or more non-reference 102 * alleles, $ial will be set. In case of GT_HET_AA, $ial is the 103 * position of the allele which appeared first in ALT. If $jal is 104 * not null and the genotype is GT_HET_AA, $jal will be set and is 105 * the position of the second allele in ALT. 106 */ 107 enum GT_HOM_RR = 0; // note: the actual value of GT_* matters, used in dosage r2 calculation 108 enum GT_HOM_AA = 1; 109 enum GT_HET_RA = 2; 110 enum GT_HET_AA = 3; 111 enum GT_HAPL_R = 4; 112 enum GT_HAPL_A = 5; 113 enum GT_UNKN = 6; 114 int bcf_gt_type(bcf_fmt_t* fmt_ptr, int isample, int* ial, int* jal); 115 116 pragma(inline, true) 117 int bcf_acgt2int(char c) 118 { 119 if ( cast(int)c>96 ) c -= 32; 120 if ( c=='A' ) return 0; 121 if ( c=='C' ) return 1; 122 if ( c=='G' ) return 2; 123 if ( c=='T' ) return 3; 124 return -1; 125 } 126 127 extern (D) auto bcf_int2acgt(T)(auto ref T i) 128 { 129 return "ACGT"[i]; 130 } 131 132 /** 133 * bcf_ij2G() - common task: allele indexes to Number=G index (diploid) 134 * @i,j: allele indexes, 0-based, i<=j 135 * 136 * Returns index to the Number=G diploid array 137 */ 138 extern (D) auto bcf_ij2G(T0, T1)(auto ref T0 i, auto ref T1 j) 139 { 140 return j * (j + 1) / 2 + i; 141 }