1 module dhtslib.bed.record; 2 3 import std.range : inputRangeObject, InputRangeObject; 4 import std.algorithm: splitter, map; 5 import std.range: drop, enumerate; 6 import std.conv: to; 7 import std.array: join, split; 8 9 import dhtslib.coordinates; 10 import dhtslib.bgzf; 11 import dhtslib.tabix; 12 13 struct RGB 14 { 15 ubyte red; 16 ubyte green; 17 ubyte blue; 18 } 19 20 /// Represents a record in a bed file. 21 /// Based on UCSC format and methods are derived from 22 /// UCSC's specifications. 23 struct BedRecord 24 { 25 private string line; 26 27 private bool unpacked = true; 28 private string[] fields; 29 30 /// string ctor 31 this(string line) 32 { 33 this.line = line; 34 this.unpacked = false; 35 } 36 37 /// unpack fields of bed line for mutability 38 private void unpack() 39 { 40 if(unpacked) return; 41 this.fields = this.line.split("\t"); 42 this.unpacked = true; 43 } 44 45 /// column 1: The name of the chromosome or scaffold. 46 /// getter 47 @property contig() const 48 { 49 if(unpacked) return this.fields[0]; 50 return this.line.splitter('\t').front; 51 } 52 53 /// column 1: The name of the chromosome or scaffold. 54 /// setter 55 @property contig(string val) 56 { 57 unpack; 58 if(this.fields.length == 0) this.fields.length = 3; 59 this.fields[0] = val; 60 } 61 62 /// Columns 2 & 3 as coordinate pair, Zero-based half-open. 63 /// column 2: The starting position of the feature in the chromosome or scaffold. 64 /// column 3: The ending position of the feature in the chromosome or scaffold. 65 /// getter 66 @property coordinates() const 67 { 68 if(unpacked) 69 return ZBHO(this.fields[1].to!long, this.fields[2].to!long); 70 auto start = (cast(string)this.line.splitter('\t').drop(1).front).to!long; 71 auto end = (cast(string)this.line.splitter('\t').drop(2).front).to!long; 72 return ZBHO(start, end); 73 } 74 75 /// Columns 2 & 3 as coordinate pair, Zero-based half-open. 76 /// column 2: The starting position of the feature in the chromosome or scaffold. 77 /// column 3: The ending position of the feature in the chromosome or scaffold. 78 /// setter 79 @property coordinates(CoordSystem cs)(Interval!cs coords) 80 { 81 unpack; 82 if(this.fields.length < 3) this.fields.length = 3; 83 auto newCoords = coords.to!(CoordSystem.zbho); 84 this.fields[1] = newCoords.start.pos.to!string; 85 this.fields[2] = newCoords.end.pos.to!string; 86 } 87 88 /// column 2: The starting position of the feature in the chromosome or scaffold. 89 @property start() const { return this.coordinates.start; } 90 91 /// column 3: The ending position of the feature in the chromosome or scaffold. 92 @property end() const { return this.coordinates.end; } 93 94 /// column 4: Defines the name of the BED line. 95 /// getter 96 @property name() const 97 { 98 if(unpacked) return this.fields[3]; 99 return this.line.splitter('\t').drop(3).front; 100 } 101 102 /// column 4: Defines the name of the BED line. 103 /// setter 104 @property name(string val) 105 { 106 unpack; 107 if(this.fields.length < 4) this.fields.length = 4; 108 this.fields[3] = val; 109 } 110 111 /// column 5: A score between 0 and 1000. 112 /// getter 113 @property score() const 114 { 115 if(unpacked) return this.fields[4].to!int; 116 return this.line.splitter('\t').drop(4).front.to!int; 117 } 118 119 /// column 5: A score between 0 and 1000. 120 /// setter 121 @property score(int val) 122 { 123 unpack; 124 if(this.fields.length < 5) this.fields.length = 5; 125 this.fields[4] = val.to!string; 126 } 127 128 /// column 6: Defines the strand. Either "." (=no strand) or "+" or "-". 129 /// getter 130 @property strand() const 131 { 132 if(unpacked) return this.fields[5][0]; 133 return cast(char)this.line.splitter('\t').drop(5).front[0]; 134 } 135 136 /// column 6: Defines the strand. Either "." (=no strand) or "+" or "-". 137 /// setter 138 @property strand(char val) 139 { 140 assert(val == '.' || val == '+'|| val == '-'); 141 unpack; 142 if(this.fields.length < 6) this.fields.length = 6; 143 this.fields[5] = [val].idup; 144 } 145 146 /// column 7: The starting position at which the feature is drawn thickly; 147 /// getter 148 @property thickStart() const 149 { 150 if(unpacked) return this.fields[6].to!int; 151 return this.line.splitter('\t').drop(6).front.to!int; 152 } 153 154 /// column 7: The starting position at which the feature is drawn thickly; 155 /// setter 156 @property thickStart(int val) 157 { 158 unpack; 159 if(this.fields.length < 7) this.fields.length = 7; 160 this.fields[6] = val.to!string; 161 } 162 163 /// column 8: The ending position at which the feature is drawn thickly 164 /// getter 165 @property thickEnd() const 166 { 167 if(unpacked) return this.fields[7].to!int; 168 return this.line.splitter('\t').drop(7).front.to!int; 169 } 170 171 /// column 8: The ending position at which the feature is drawn thickly 172 /// setter 173 @property thickEnd(int val) 174 { 175 unpack; 176 if(this.fields.length < 8) this.fields.length = 8; 177 this.fields[7] = val.to!string; 178 } 179 180 /// column 9: An RGB value of the form R,G,B (e.g. 255,0,0). 181 /// getter 182 @property itemRGB() const 183 { 184 string str; 185 if(unpacked) 186 str = this.fields[8]; 187 else 188 str = this.line.splitter('\t').drop(8).front; 189 if(str == "0") return RGB.init; 190 return RGB( 191 str.splitter(',').front.to!ubyte, 192 str.splitter(',').drop(1).front.to!ubyte, 193 str.splitter(',').drop(2).front.to!ubyte 194 ); 195 } 196 197 /// column 9: An RGB value of the form R,G,B (e.g. 255,0,0). 198 /// Setter 199 @property itemRGB(RGB rgb) 200 { 201 unpack; 202 if(this.fields.length < 9) this.fields.length = 9; 203 this.fields[8] = [rgb.red, rgb.green, rgb.blue].map!(x => x.to!string).join(","); 204 } 205 206 /// column 10: The number of blocks (exons) in the BED line. 207 /// getter 208 @property blockCount() const 209 { 210 if(unpacked) return this.fields[9].to!int; 211 return this.line.splitter('\t').drop(9).front.to!int; 212 } 213 214 /// column 10: The number of blocks (exons) in the BED line. 215 /// setter 216 @property blockCount(int count) 217 { 218 unpack; 219 if(this.fields.length < 10) this.fields.length = 10; 220 this.fields[9] = count.to!string; 221 } 222 223 /// column 11: A comma-separated list of the block sizes. 224 /// The number of items in this list should correspond to blockCount. 225 /// getter 226 @property blockSizes() const 227 { 228 string str; 229 if(unpacked) 230 str = this.fields[10]; 231 else 232 str = this.line.splitter('\t').drop(10).front; 233 int[] arr = new int[this.blockCount]; 234 foreach (i, key; str.splitter(',').enumerate) 235 { 236 arr[i] = key.to!int; 237 } 238 return arr; 239 } 240 241 /// column 11: A comma-separated list of the block sizes. 242 /// The number of items in this list should correspond to blockCount. 243 /// setter 244 @property blockSizes(int[] vals) 245 { 246 unpack; 247 if(this.fields.length < 11) this.fields.length = 11; 248 this.fields[10] = vals.map!(x => x.to!string).join(","); 249 } 250 251 252 /// column 12: A comma-separated list of block starts. 253 /// All of the blockStart positions should be calculated relative to chromStart. 254 /// The number of items in this list should correspond to blockCount. 255 /// getter 256 @property blockStarts() const 257 { 258 string str; 259 if(unpacked) 260 str = this.fields[11]; 261 else 262 str = this.line.splitter('\t').drop(11).front; 263 int[] arr = new int[this.blockCount]; 264 foreach (i, key; str.splitter(',').enumerate) 265 { 266 arr[i] = key.to!int; 267 } 268 return arr; 269 } 270 271 /// column 12: A comma-separated list of block starts. 272 /// All of the blockStart positions should be calculated relative to chromStart. 273 /// The number of items in this list should correspond to blockCount. 274 /// setter 275 @property blockStarts(int[] vals) 276 { 277 unpack; 278 if(this.fields.length < 12) this.fields.length = 12; 279 this.fields[11] = vals.map!(x => x.to!string).join(","); 280 } 281 282 /// get column idx as a string 283 /// helps if your bed file isn't a ucsc bed file 284 auto opIndex(ulong idx) 285 { 286 if(unpacked) return this.fields[idx]; 287 return this.line.splitter('\t').drop(idx).front; 288 } 289 290 /// set column idx as a string 291 /// helps if your bed file isn't a ucsc bed file 292 auto opIndexAssign(string val, ulong idx) 293 { 294 unpack; 295 if(this.fields.length < idx) this.fields.length = idx; 296 return this.fields[idx] = val; 297 } 298 299 /// return bed line 300 string toString() const 301 { 302 if(unpacked) return this.fields.join("\t"); 303 return this.line; 304 } 305 } 306