1 /**
2 
3 GFF file reading and writing
4 
5 This module provides a readable, writeable abstraction of GFF records and files.
6 
7 Authors: James S Blachly, MD <james.blachly@gmail.com>; Thomas Gregory <charles.gregory@osumc.edu>
8 License: MIT
9 Date: 2019-01-28
10 Standards: http://gmod.org/wiki/GFF3
11     https://useast.ensembl.org/info/website/upload/gff3.html
12     https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md
13     http://www.sequenceontology.org/gff3.shtml
14 */
15 module dhtslib.gff;
16 
17 public import dhtslib.gff.record;
18 public import dhtslib.gff.reader;
19 public import dhtslib.gff.writer;
20 
21 debug(dhtslib_unittest) unittest
22 {
23     import dhtslib.coordinates;
24     auto rec    = GTFRecord("chr1\tHAVANA\tgene\t11869\t14409\t.\t+\t.\tID \"ENSG00000223972.5\" ; gene_id ENSG00000223972.5 ; gene_id ENSG00000223972.5 ; gene_type transcribed_unprocessed_pseudogene ; gene_name DDX11L1 ; level 2 ; havana_gene OTTHUMG00000000961.2"); // @suppress(dscanner.style.long_line)
25     auto rec_neg= GTFRecord("chr1\tHAVANA\tgene\t11869\t14409\t.\t-\t.\tID \"ENSG00000223972.5\" ; gene_id ENSG00000223972.5 ; gene_id ENSG00000223972.5 ; gene_type transcribed_unprocessed_pseudogene ; gene_name DDX11L1 ; level 2 ; havana_gene OTTHUMG00000000961.2"); // @suppress(dscanner.style.long_line)
26 
27     assert(rec.seqid=="chr1");
28     assert(rec.source=="HAVANA");
29     assert(rec.type=="gene");
30     assert(rec.start==11_869);
31     assert(rec.end==14_409);
32     assert(rec.score==-1.0);
33     assert(rec.strand()=='+');
34     assert(rec.phase==-1);
35     assert(rec["ID"] == "ENSG00000223972.5");
36     assert(rec["gene_id"] == "ENSG00000223972.5");
37     assert(rec["gene_type"] == "transcribed_unprocessed_pseudogene");
38     assert(rec["gene_name"] == "DDX11L1");
39     assert(rec["level"] == "2");
40     assert(rec["havana_gene"] == "OTTHUMG00000000961.2");
41 
42     assert(rec.length == 2541);
43     assert(rec.relativeStart == 1);
44     assert(rec.relativeEnd == 2540);
45 
46     // Test forward and backward offsets
47     assert(rec.coordinateAtOffset(2) == 11_871);
48     assert(rec_neg.coordinateAtOffset(2) == 14_407);
49 
50     assert(rec.coordinateAtBegin == 11_869);
51     assert(rec.coordinateAtEnd   == 14_409);
52 
53     assert(rec_neg.coordinateAtBegin == 14_409);
54     assert(rec_neg.coordinateAtEnd   == 11_869);
55 
56     rec.seqid = "chr2";
57     rec.source = "HAVANA1";
58     rec.type = "gene1";
59     rec.coordinates = OBC(11_870, 14_410);
60     rec.score = 1.0;
61     rec.strand = '-';
62     rec.phase = 1;
63     rec["ID"] = "ENSG00000223972.51";
64     rec["gene_id"] = "ENSG00000223972.51";
65     rec["gene_type"] = "transcribed_unprocessed_pseudogene1";
66     rec["gene_name"] = "DDX11L11";
67     rec["level"] = "21";
68     rec["havana_gene"] = "OTTHUMG00000000961.21";
69 
70     assert(rec.seqid=="chr2");
71     assert(rec.source=="HAVANA1");
72     assert(rec.type=="gene1");
73     assert(rec.start==11_870);
74     assert(rec.end==14_410);
75     assert(rec.score==1.0);
76     assert(rec.strand()=='-');
77     assert(rec.phase==1);
78     assert(rec["ID"] == "\"ENSG00000223972.51\"");
79     assert(rec["gene_id"] == "\"ENSG00000223972.51\"");
80     assert(rec["gene_type"] == "\"transcribed_unprocessed_pseudogene1\"");
81     assert(rec["gene_name"] == "\"DDX11L11\"");
82     assert(rec["level"] == "\"21\"");
83     assert(rec["havana_gene"] == "\"OTTHUMG00000000961.21\"");
84 
85     assert(rec.length == 2541);
86     assert(rec.relativeStart == 1);
87     assert(rec.relativeEnd == 2540);
88 
89     // Test forward and backward offsets
90     assert(rec.coordinateAtOffset(2) == 14_408);
91 
92     assert(rec.coordinateAtBegin == 14_410);
93     assert(rec.coordinateAtEnd   == 11_870);
94 
95     // TODO validator
96     assert(rec.isValid);
97 }
98 
99 debug(dhtslib_unittest) unittest
100 {
101     import dhtslib.coordinates;
102     auto rec    = GFF3Record("chr1\tHAVANA\tgene\t11869\t14409\t.\t+\t.\tID=ENSG00000223972.5;gene_id=ENSG00000223972.5;gene_id=ENSG00000223972.5;gene_type=transcribed_unprocessed_pseudogene;gene_name=DDX11L1;level=2;havana_gene=OTTHUMG00000000961.2"); // @suppress(dscanner.style.long_line)
103     auto rec_neg= GFF3Record("chr1\tHAVANA\tgene\t11869\t14409\t.\t-\t.\tID=ENSG00000223972.5;gene_id=ENSG00000223972.5;gene_id=ENSG00000223972.5;gene_type=transcribed_unprocessed_pseudogene;gene_name=DDX11L1;level=2;havana_gene=OTTHUMG00000000961.2"); // @suppress(dscanner.style.long_line)
104 
105     assert(rec.seqid=="chr1");
106     assert(rec.source=="HAVANA");
107     assert(rec.type=="gene");
108     assert(rec.start==11_869);
109     assert(rec.end==14_409);
110     assert(rec.score==-1.0);
111     assert(rec.strand()=='+');
112     assert(rec.phase==-1);
113     assert(rec["ID"] == "ENSG00000223972.5");
114     assert(rec["gene_id"] == "ENSG00000223972.5");
115     assert(rec["gene_type"] == "transcribed_unprocessed_pseudogene");
116     assert(rec["gene_name"] == "DDX11L1");
117     assert(rec["level"] == "2");
118     assert(rec["havana_gene"] == "OTTHUMG00000000961.2");
119 
120     assert(rec.length == 2541);
121     assert(rec.relativeStart == 1);
122     assert(rec.relativeEnd == 2540);
123 
124     // Test forward and backward offsets
125     assert(rec.coordinateAtOffset(2) == 11_871);
126     assert(rec_neg.coordinateAtOffset(2) == 14_407);
127 
128     assert(rec.coordinateAtBegin == 11_869);
129     assert(rec.coordinateAtEnd   == 14_409);
130 
131     assert(rec_neg.coordinateAtBegin == 14_409);
132     assert(rec_neg.coordinateAtEnd   == 11_869);
133 
134     rec.seqid = "chr2";
135     rec.source = "HAVANA1";
136     rec.type = "gene1";
137     rec.coordinates = OBC(11_870, 14_410);
138     rec.score = 1.0;
139     rec.strand = '-';
140     rec.phase = 1;
141     rec["ID"] = "ENSG00000223972.51";
142     rec["gene_id"] = "ENSG00000223972.51";
143     rec["gene_type"] = "transcribed_unprocessed_pseudogene1";
144     rec["gene_name"] = "DDX11L11";
145     rec["level"] = "21";
146     rec["havana_gene"] = "OTTHUMG00000000961.21";
147 
148     assert(rec.seqid=="chr2");
149     assert(rec.source=="HAVANA1");
150     assert(rec.type=="gene1");
151     assert(rec.start==11_870);
152     assert(rec.end==14_410);
153     assert(rec.score==1.0);
154     assert(rec.strand()=='-');
155     assert(rec.phase==1);
156     assert(rec["ID"] == "ENSG00000223972.51");
157     assert(rec["gene_id"] == "ENSG00000223972.51");
158     assert(rec["gene_type"] == "transcribed_unprocessed_pseudogene1");
159     assert(rec["gene_name"] == "DDX11L11");
160     assert(rec["level"] == "21");
161     assert(rec["havana_gene"] == "OTTHUMG00000000961.21");
162 
163     assert(rec.length == 2541);
164     assert(rec.relativeStart == 1);
165     assert(rec.relativeEnd == 2540);
166 
167     // Test forward and backward offsets
168     assert(rec.coordinateAtOffset(2) == 14_408);
169 
170     assert(rec.coordinateAtBegin == 14_410);
171     assert(rec.coordinateAtEnd   == 11_870);
172 
173     // TODO validator
174     assert(rec.isValid);
175 }
176