1 module dhtslib.gff.reader;
2 
3 import std.range : inputRangeObject, InputRangeObject;
4 
5 import dhtslib.bgzf;
6 import dhtslib.tabix;
7 import dhtslib.gff;
8 import dhtslib.coordinates;
9 
10 /// Returns a RecordReader range for a GFF3 file
11 auto GFF3Reader(string fn)
12 {
13     return RecordReader!GFF3Record(fn);
14 }
15 
16 /// ditto
17 auto GFF3Reader(CoordSystem cs)(string fn, string chrom, Interval!cs coords, string fnIdx = "")
18 {
19     return RecordReaderRegion!(GFF3Record, cs)(fn, chrom, coords, fnIdx);
20 }
21 
22 /// Returns a RecordReader range for a GTF file
23 auto GTFReader(string fn)
24 {
25     return RecordReader!GTFRecord(fn);
26 }
27 
28 /// ditto
29 auto GTFReader(CoordSystem cs)(string fn, string chrom, Interval!cs coords, string fnIdx = "")
30 {
31     return RecordReaderRegion!(GTFRecord, cs)(fn, chrom, coords, fnIdx);
32 }
33 
34 /// Returns a RecordReader range for a GFF2 file
35 auto GFF2Reader(string fn)
36 {
37     return RecordReader!GTFRecord(fn);
38 }
39 
40 /// ditto
41 auto GFF2Reader(CoordSystem cs)(string fn, string chrom, Interval!cs coords, string fnIdx = "")
42 {
43     return RecordReaderRegion!(GTFRecord, cs)(fn, chrom, coords, fnIdx);
44 }
45 
46 debug(dhtslib_unittest) unittest
47 {
48     import std.stdio;
49     import htslib.hts_log;
50     import std.algorithm : map;
51     import std.array : array;
52     import std.path : buildPath, dirName;
53     hts_set_log_level(htsLogLevel.HTS_LOG_INFO);
54     hts_log_info(__FUNCTION__, "Testing GFF3Reader");
55     hts_log_info(__FUNCTION__, "Loading test file");
56 
57     auto gff = GFF3Reader(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","tabix","gff_file.gff"));
58     auto rec = gff.front;
59     assert(rec.contig == "X");
60     assert(rec.source == "Vega");
61     assert(rec.type == "exon");
62     assert(rec.coordinates == OBC(2934816, 2935190));
63     assert(rec.score == -1);
64     assert(rec.strand == '-');
65     assert(rec.phase == -1);
66     assert(rec["Name"] == "OTTHUME00001604789");
67     assert(rec["Parent"] == "OTTHUMT00000055643");
68     gff.popFront;
69 
70     rec = gff.front;
71     assert(rec.contig == "X");
72     assert(rec.source == "Vega");
73     assert(rec.type == "gene");
74     assert(rec.coordinates == OBC(2934816, 2964270));
75     assert(rec.score == -1);
76     assert(rec.strand == '-');
77     assert(rec.phase == -1);
78     assert(rec["Name"] == "OTTHUMG00000137358");
79 
80 }
81 
82 debug(dhtslib_unittest) unittest
83 {
84     import dhtslib.util;
85     import std.stdio;
86     import htslib.hts_log;
87     import htslib.tbx : tbx_index_build2, tbx_conf_gff;
88     import std.algorithm : map;
89     import std.array : array;
90     import std.path : buildPath, dirName;
91     import std.utf : toUTFz;
92     import std.array : array;
93 
94     hts_set_log_level(htsLogLevel.HTS_LOG_INFO);
95     hts_log_info(__FUNCTION__, "Testing GFF3Reader");
96     hts_log_info(__FUNCTION__, "building test idx file");
97     // auto err = tbx_index_build2(
98     //     toUTFz!(char *)(buildPath(dirName(dirName(dirName(__FILE__))),"htslib","test","tabix","gff_file.gff.gz")),
99     //     toUTFz!(char *)("test.tbi"),
100     //     0,
101     //     &tbx_conf_gff
102     // );
103     // writeln(err);
104     hts_log_info(__FUNCTION__, "Loading test file");
105     
106     auto reg = getIntervalFromString("X:2934832-2935190");
107     auto gff = GFF3Reader(
108         buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","tabix","gff_file.gff.gz"),
109         reg.contig, reg.interval
110         );
111 
112     assert(gff.array.length == 4);
113 
114 }
115 
116 debug(dhtslib_unittest) unittest
117 {
118     import std.stdio;
119     import htslib.hts_log;
120     import std.algorithm : map;
121     import std.array : array;
122     import std.path : buildPath, dirName;
123     hts_set_log_level(htsLogLevel.HTS_LOG_INFO);
124     hts_log_info(__FUNCTION__, "Testing GFF3Reader save");
125     hts_log_info(__FUNCTION__, "Loading test file");
126 
127     auto gff = GFF3Reader(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","tabix","gff_file.gff"));
128     assert(gff.array.length == 62);
129 
130 }
131 
132 debug(dhtslib_unittest) unittest
133 {
134     import dhtslib.util;
135     import std.stdio;
136     import htslib.hts_log;
137     import htslib.tbx : tbx_index_build2, tbx_conf_gff;
138     import std.algorithm : map;
139     import std.array : array;
140     import std.path : buildPath, dirName;
141     import std.utf : toUTFz;
142     import std.array : array;
143 
144     hts_set_log_level(htsLogLevel.HTS_LOG_INFO);
145     hts_log_info(__FUNCTION__, "Testing GFF3Reader save (tabix)");
146     hts_log_info(__FUNCTION__, "Loading test file");
147     
148     auto reg = getIntervalFromString("X:2934832-2935190");
149     auto gff = GFF3Reader(
150         buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","tabix","gff_file.gff.gz"),
151         reg.contig, reg.interval
152         );
153     auto range1 = gff.save;
154     gff.popFront;
155 
156     auto range2 = gff.save;
157     gff.popFront;
158 
159     auto range3 = gff.save;
160     gff.popFront;
161 
162     auto range4 = gff.save;
163     gff.popFront;
164 
165     auto range5 = gff.save;
166     assert(range1.array.length == 4);
167     assert(range2.array.length == 3);
168     assert(range3.array.length == 2);
169     assert(range4.array.length == 1);
170     assert(range5.array.length == 0);
171 
172 }