1 /** 2 3 GFF file reading and writing 4 5 This module provides a readable, writeable abstraction of GFF records and files. 6 7 Authors: James S Blachly, MD <james.blachly@gmail.com>; Thomas Gregory <charles.gregory@osumc.edu> 8 License: MIT 9 Date: 2019-01-28 10 Standards: http://gmod.org/wiki/GFF3 11 https://useast.ensembl.org/info/website/upload/gff3.html 12 https://github.com/The-Sequence-Ontology/Specifications/blob/master/gff3.md 13 http://www.sequenceontology.org/gff3.shtml 14 */ 15 module dhtslib.gff; 16 17 public import dhtslib.gff.record; 18 public import dhtslib.gff.reader; 19 public import dhtslib.gff.writer; 20 21 debug(dhtslib_unittest) unittest 22 { 23 import dhtslib.coordinates; 24 auto rec = GTFRecord("chr1\tHAVANA\tgene\t11869\t14409\t.\t+\t.\tID \"ENSG00000223972.5\" ; gene_id ENSG00000223972.5 ; gene_id ENSG00000223972.5 ; gene_type transcribed_unprocessed_pseudogene ; gene_name DDX11L1 ; level 2 ; havana_gene OTTHUMG00000000961.2"); // @suppress(dscanner.style.long_line) 25 auto rec_neg= GTFRecord("chr1\tHAVANA\tgene\t11869\t14409\t.\t-\t.\tID \"ENSG00000223972.5\" ; gene_id ENSG00000223972.5 ; gene_id ENSG00000223972.5 ; gene_type transcribed_unprocessed_pseudogene ; gene_name DDX11L1 ; level 2 ; havana_gene OTTHUMG00000000961.2"); // @suppress(dscanner.style.long_line) 26 27 assert(rec.seqid=="chr1"); 28 assert(rec.source=="HAVANA"); 29 assert(rec.type=="gene"); 30 assert(rec.start==11_869); 31 assert(rec.end==14_409); 32 assert(rec.score==-1.0); 33 assert(rec.strand()=='+'); 34 assert(rec.phase==-1); 35 assert(rec["ID"] == "ENSG00000223972.5"); 36 assert(rec["gene_id"] == "ENSG00000223972.5"); 37 assert(rec["gene_type"] == "transcribed_unprocessed_pseudogene"); 38 assert(rec["gene_name"] == "DDX11L1"); 39 assert(rec["level"] == "2"); 40 assert(rec["havana_gene"] == "OTTHUMG00000000961.2"); 41 42 assert(rec.length == 2541); 43 assert(rec.relativeStart == 1); 44 assert(rec.relativeEnd == 2540); 45 46 // Test forward and backward offsets 47 assert(rec.coordinateAtOffset(2) == 11_871); 48 assert(rec_neg.coordinateAtOffset(2) == 14_407); 49 50 assert(rec.coordinateAtBegin == 11_869); 51 assert(rec.coordinateAtEnd == 14_409); 52 53 assert(rec_neg.coordinateAtBegin == 14_409); 54 assert(rec_neg.coordinateAtEnd == 11_869); 55 56 rec.seqid = "chr2"; 57 rec.source = "HAVANA1"; 58 rec.type = "gene1"; 59 rec.coordinates = OBC(11_870, 14_410); 60 rec.score = 1.0; 61 rec.strand = '-'; 62 rec.phase = 1; 63 rec["ID"] = "ENSG00000223972.51"; 64 rec["gene_id"] = "ENSG00000223972.51"; 65 rec["gene_type"] = "transcribed_unprocessed_pseudogene1"; 66 rec["gene_name"] = "DDX11L11"; 67 rec["level"] = "21"; 68 rec["havana_gene"] = "OTTHUMG00000000961.21"; 69 70 assert(rec.seqid=="chr2"); 71 assert(rec.source=="HAVANA1"); 72 assert(rec.type=="gene1"); 73 assert(rec.start==11_870); 74 assert(rec.end==14_410); 75 assert(rec.score==1.0); 76 assert(rec.strand()=='-'); 77 assert(rec.phase==1); 78 assert(rec["ID"] == "\"ENSG00000223972.51\""); 79 assert(rec["gene_id"] == "\"ENSG00000223972.51\""); 80 assert(rec["gene_type"] == "\"transcribed_unprocessed_pseudogene1\""); 81 assert(rec["gene_name"] == "\"DDX11L11\""); 82 assert(rec["level"] == "\"21\""); 83 assert(rec["havana_gene"] == "\"OTTHUMG00000000961.21\""); 84 85 assert(rec.length == 2541); 86 assert(rec.relativeStart == 1); 87 assert(rec.relativeEnd == 2540); 88 89 // Test forward and backward offsets 90 assert(rec.coordinateAtOffset(2) == 14_408); 91 92 assert(rec.coordinateAtBegin == 14_410); 93 assert(rec.coordinateAtEnd == 11_870); 94 95 // TODO validator 96 assert(rec.isValid); 97 } 98 99 debug(dhtslib_unittest) unittest 100 { 101 import dhtslib.coordinates; 102 auto rec = GFF3Record("chr1\tHAVANA\tgene\t11869\t14409\t.\t+\t.\tID=ENSG00000223972.5;gene_id=ENSG00000223972.5;gene_id=ENSG00000223972.5;gene_type=transcribed_unprocessed_pseudogene;gene_name=DDX11L1;level=2;havana_gene=OTTHUMG00000000961.2"); // @suppress(dscanner.style.long_line) 103 auto rec_neg= GFF3Record("chr1\tHAVANA\tgene\t11869\t14409\t.\t-\t.\tID=ENSG00000223972.5;gene_id=ENSG00000223972.5;gene_id=ENSG00000223972.5;gene_type=transcribed_unprocessed_pseudogene;gene_name=DDX11L1;level=2;havana_gene=OTTHUMG00000000961.2"); // @suppress(dscanner.style.long_line) 104 105 assert(rec.seqid=="chr1"); 106 assert(rec.source=="HAVANA"); 107 assert(rec.type=="gene"); 108 assert(rec.start==11_869); 109 assert(rec.end==14_409); 110 assert(rec.score==-1.0); 111 assert(rec.strand()=='+'); 112 assert(rec.phase==-1); 113 assert(rec["ID"] == "ENSG00000223972.5"); 114 assert(rec["gene_id"] == "ENSG00000223972.5"); 115 assert(rec["gene_type"] == "transcribed_unprocessed_pseudogene"); 116 assert(rec["gene_name"] == "DDX11L1"); 117 assert(rec["level"] == "2"); 118 assert(rec["havana_gene"] == "OTTHUMG00000000961.2"); 119 120 assert(rec.length == 2541); 121 assert(rec.relativeStart == 1); 122 assert(rec.relativeEnd == 2540); 123 124 // Test forward and backward offsets 125 assert(rec.coordinateAtOffset(2) == 11_871); 126 assert(rec_neg.coordinateAtOffset(2) == 14_407); 127 128 assert(rec.coordinateAtBegin == 11_869); 129 assert(rec.coordinateAtEnd == 14_409); 130 131 assert(rec_neg.coordinateAtBegin == 14_409); 132 assert(rec_neg.coordinateAtEnd == 11_869); 133 134 rec.seqid = "chr2"; 135 rec.source = "HAVANA1"; 136 rec.type = "gene1"; 137 rec.coordinates = OBC(11_870, 14_410); 138 rec.score = 1.0; 139 rec.strand = '-'; 140 rec.phase = 1; 141 rec["ID"] = "ENSG00000223972.51"; 142 rec["gene_id"] = "ENSG00000223972.51"; 143 rec["gene_type"] = "transcribed_unprocessed_pseudogene1"; 144 rec["gene_name"] = "DDX11L11"; 145 rec["level"] = "21"; 146 rec["havana_gene"] = "OTTHUMG00000000961.21"; 147 148 assert(rec.seqid=="chr2"); 149 assert(rec.source=="HAVANA1"); 150 assert(rec.type=="gene1"); 151 assert(rec.start==11_870); 152 assert(rec.end==14_410); 153 assert(rec.score==1.0); 154 assert(rec.strand()=='-'); 155 assert(rec.phase==1); 156 assert(rec["ID"] == "ENSG00000223972.51"); 157 assert(rec["gene_id"] == "ENSG00000223972.51"); 158 assert(rec["gene_type"] == "transcribed_unprocessed_pseudogene1"); 159 assert(rec["gene_name"] == "DDX11L11"); 160 assert(rec["level"] == "21"); 161 assert(rec["havana_gene"] == "OTTHUMG00000000961.21"); 162 163 assert(rec.length == 2541); 164 assert(rec.relativeStart == 1); 165 assert(rec.relativeEnd == 2540); 166 167 // Test forward and backward offsets 168 assert(rec.coordinateAtOffset(2) == 14_408); 169 170 assert(rec.coordinateAtBegin == 14_410); 171 assert(rec.coordinateAtEnd == 11_870); 172 173 // TODO validator 174 assert(rec.isValid); 175 } 176