1 module dhtslib.bed.record;
2 
3 import std.range : inputRangeObject, InputRangeObject;
4 import std.algorithm: splitter, map;
5 import std.range: drop, enumerate;
6 import std.conv: to;
7 import std.array: join, split;
8 
9 import dhtslib.coordinates;
10 import dhtslib.bgzf;
11 import dhtslib.tabix;
12 
13 struct RGB
14 {
15     ubyte red;
16     ubyte green;
17     ubyte blue;
18 }
19 
20 /// Represents a record in a bed file.
21 /// Based on UCSC format and methods are derived from
22 /// UCSC's specifications.
23 struct BedRecord
24 {
25     private string line;
26 
27     private bool unpacked = true;
28     private string[] fields;
29 
30     /// string ctor
31     this(string line)
32     {
33         this.line = line;
34         this.unpacked = false;
35     }
36 
37     /// unpack fields of bed line for mutability
38     private void unpack()
39     {
40         if(unpacked) return;
41         this.fields = this.line.split("\t");
42         this.unpacked = true;
43     }
44 
45     /// column 1: The name of the chromosome or scaffold.
46     /// getter
47     @property contig() const
48     {
49         if(unpacked) return this.fields[0];
50         return this.line.splitter('\t').front;
51     }
52 
53     /// column 1: The name of the chromosome or scaffold.
54     /// setter
55     @property contig(string val)
56     {
57         unpack;
58         if(this.fields.length == 0) this.fields.length = 3;  
59         this.fields[0] = val;
60     }
61 
62     /// Columns 2 & 3 as coordinate pair, Zero-based half-open.
63     /// column 2: The starting position of the feature in the chromosome or scaffold.
64     /// column 3: The ending position of the feature in the chromosome or scaffold. 
65     /// getter
66     @property coordinates() const
67     {
68         if(unpacked)
69             return ZBHO(this.fields[1].to!long, this.fields[2].to!long);
70         auto start = (cast(string)this.line.splitter('\t').drop(1).front).to!long;
71         auto end = (cast(string)this.line.splitter('\t').drop(2).front).to!long;
72         return ZBHO(start, end);
73     }
74 
75     /// Columns 2 & 3 as coordinate pair, Zero-based half-open.
76     /// column 2: The starting position of the feature in the chromosome or scaffold.
77     /// column 3: The ending position of the feature in the chromosome or scaffold. 
78     /// setter
79     @property coordinates(CoordSystem cs)(Interval!cs coords)
80     {
81         unpack;
82         if(this.fields.length < 3) this.fields.length = 3;
83         auto newCoords = coords.to!(CoordSystem.zbho);
84         this.fields[1] = newCoords.start.pos.to!string;
85         this.fields[2] = newCoords.end.pos.to!string; 
86     }
87     
88     /// column 2: The starting position of the feature in the chromosome or scaffold.
89     @property start() const { return this.coordinates.start; }
90     
91     /// column 3: The ending position of the feature in the chromosome or scaffold. 
92     @property end() const { return this.coordinates.end; }
93 
94     /// column 4: Defines the name of the BED line.
95     /// getter
96     @property name() const
97     {
98         if(unpacked) return this.fields[3];
99         return this.line.splitter('\t').drop(3).front; 
100     }
101 
102     /// column 4: Defines the name of the BED line.
103     /// setter
104     @property name(string val)
105     {
106         unpack;
107         if(this.fields.length < 4) this.fields.length = 4;
108         this.fields[3] = val;
109     }
110 
111     /// column 5: A score between 0 and 1000.
112     /// getter
113     @property score() const
114     {
115         if(unpacked) return this.fields[4].to!int;
116         return this.line.splitter('\t').drop(4).front.to!int;
117     }
118 
119     /// column 5: A score between 0 and 1000.
120     /// setter
121     @property score(int val)
122     {
123         unpack;
124         if(this.fields.length < 5) this.fields.length = 5;
125         this.fields[4] = val.to!string;
126     }    
127     
128     /// column 6: Defines the strand. Either "." (=no strand) or "+" or "-".
129     /// getter
130     @property strand() const 
131     {
132         if(unpacked) return this.fields[5][0];
133         return cast(char)this.line.splitter('\t').drop(5).front[0];
134     }
135 
136     /// column 6: Defines the strand. Either "." (=no strand) or "+" or "-".
137     /// setter
138     @property strand(char val)
139     {
140         assert(val == '.' || val == '+'|| val == '-');
141         unpack;
142         if(this.fields.length < 6) this.fields.length = 6;
143         this.fields[5] = [val].idup;
144     }
145 
146     /// column 7: The starting position at which the feature is drawn thickly;
147     /// getter
148     @property thickStart() const
149     {
150         if(unpacked) return this.fields[6].to!int;
151         return this.line.splitter('\t').drop(6).front.to!int;
152     }
153 
154     /// column 7: The starting position at which the feature is drawn thickly;
155     /// setter
156     @property thickStart(int val)
157     {
158         unpack;
159         if(this.fields.length < 7) this.fields.length = 7;
160         this.fields[6] = val.to!string;
161     }
162 
163     /// column 8: The ending position at which the feature is drawn thickly
164     /// getter
165     @property thickEnd() const
166     {
167         if(unpacked) return this.fields[7].to!int;
168         return this.line.splitter('\t').drop(7).front.to!int;
169     }
170 
171     /// column 8: The ending position at which the feature is drawn thickly
172     /// setter
173     @property thickEnd(int val)
174     {
175         unpack;
176         if(this.fields.length < 8) this.fields.length = 8;
177         this.fields[7] = val.to!string;
178     }
179     
180     /// column 9: An RGB value of the form R,G,B (e.g. 255,0,0).
181     /// getter
182     @property itemRGB() const 
183     {
184         string str;
185         if(unpacked)
186             str = this.fields[8];
187         else
188             str = this.line.splitter('\t').drop(8).front;
189         if(str == "0") return RGB.init;
190         return RGB(
191             str.splitter(',').front.to!ubyte,
192             str.splitter(',').drop(1).front.to!ubyte,
193             str.splitter(',').drop(2).front.to!ubyte
194             );
195     }
196 
197     /// column 9: An RGB value of the form R,G,B (e.g. 255,0,0).
198     /// Setter
199     @property itemRGB(RGB rgb)
200     {
201         unpack;
202         if(this.fields.length < 9) this.fields.length = 9;
203         this.fields[8] = [rgb.red, rgb.green, rgb.blue].map!(x => x.to!string).join(",");
204     }
205 
206     /// column 10: The number of blocks (exons) in the BED line.
207     /// getter
208     @property blockCount() const
209     {
210         if(unpacked) return this.fields[9].to!int;
211         return this.line.splitter('\t').drop(9).front.to!int;
212     }
213 
214     /// column 10: The number of blocks (exons) in the BED line.
215     /// setter
216     @property blockCount(int count)
217     {
218         unpack;
219         if(this.fields.length < 10) this.fields.length = 10;
220         this.fields[9] = count.to!string;
221     }
222     
223     /// column 11: A comma-separated list of the block sizes. 
224     /// The number of items in this list should correspond to blockCount.
225     /// getter
226     @property blockSizes() const 
227     {
228         string str;
229         if(unpacked)
230             str = this.fields[10];
231         else
232             str = this.line.splitter('\t').drop(10).front;
233         int[] arr = new int[this.blockCount];
234         foreach (i, key; str.splitter(',').enumerate)
235         {
236             arr[i] = key.to!int;
237         }
238         return arr;
239     }
240     
241     /// column 11: A comma-separated list of the block sizes. 
242     /// The number of items in this list should correspond to blockCount.
243     /// setter
244     @property blockSizes(int[] vals)
245     {
246         unpack;
247         if(this.fields.length < 11) this.fields.length = 11;
248         this.fields[10] = vals.map!(x => x.to!string).join(",");
249     }
250 
251 
252     /// column 12: A comma-separated list of block starts. 
253     /// All of the blockStart positions should be calculated relative to chromStart. 
254     /// The number of items in this list should correspond to blockCount.
255     /// getter
256     @property blockStarts() const 
257     {
258         string str;
259         if(unpacked)
260             str = this.fields[11];
261         else
262             str = this.line.splitter('\t').drop(11).front;
263         int[] arr = new int[this.blockCount];
264         foreach (i, key; str.splitter(',').enumerate)
265         {
266             arr[i] = key.to!int;
267         }
268         return arr;
269     }
270 
271     /// column 12: A comma-separated list of block starts. 
272     /// All of the blockStart positions should be calculated relative to chromStart. 
273     /// The number of items in this list should correspond to blockCount.
274     /// setter
275     @property blockStarts(int[] vals)
276     {
277         unpack;
278         if(this.fields.length < 12) this.fields.length = 12;
279         this.fields[11] = vals.map!(x => x.to!string).join(",");
280     }
281 
282     /// get column idx as a string
283     /// helps if your bed file isn't a ucsc bed file
284     auto opIndex(ulong idx)
285     {
286         if(unpacked) return this.fields[idx];
287         return this.line.splitter('\t').drop(idx).front;
288     }
289 
290     /// set column idx as a string
291     /// helps if your bed file isn't a ucsc bed file
292     auto opIndexAssign(string val, ulong idx)
293     {
294         unpack;
295         if(this.fields.length < idx) this.fields.length = idx;
296         return this.fields[idx] = val;
297     }
298 
299     /// return bed line
300     string toString() const
301     {
302         if(unpacked) return this.fields.join("\t");
303         return this.line;
304     }
305 }
306