1 module dhtslib.sam.writer;
2 
3 import std.stdio : stderr, writeln, File;
4 import std.string : fromStringz, toStringz;
5 import std.parallelism : totalCPUs;
6 import std.format;
7 
8 import htslib.hts : htsFile, hts_open, hts_close, hts_hopen, hts_set_threads;
9 import htslib.hts : hts_idx_t, hts_itr_t, hts_itr_multi_t, hts_reglist_t, hts_pair32_t;
10 import htslib.hfile : hdopen, hclose, hFILE;
11 
12 import htslib.kstring;
13 import htslib.sam;
14 import htslib.hts_log;
15 import dhtslib.sam.record;
16 import dhtslib.sam.header;
17 import dhtslib.sam : parseSam;
18 import dhtslib.memory;
19 import dhtslib.util;
20 
21 
22 /// SAM/BAM/CRAM on-disk format.
23 /// `DEDUCE` will attempt to auto-detect from filename or other means
24 enum SAMWriterTypes
25 {
26     BAM,
27     UBAM,
28     SAM,
29     CRAM,
30     DEDUCE
31 }
32 
33 /// Encapsulates a SAM/BAM/CRAM, but as write-only
34 struct SAMWriter
35 {
36     /// filename; as usable from D
37     string filename;
38 
39     /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope
40     private const(char)* fn;
41 
42     /// htsFile
43     private HtsFile fp;
44 
45     /// hFILE if required
46     private hFILE* f;
47 
48     /// header struct
49     SAMHeader header;
50 
51     private kstring_t line;
52 
53     /// disallow copying
54     @disable this(this);
55 
56     /** Create a representation of SAM/BAM/CRAM file from given filename or File
57 
58     Params:
59         fn =            string filename (complete path passed to htslib; may support S3:// https:// etc.)
60         extra_threads = extra threads for compression/decompression
61                         -1 use all available cores (default)
62                         0  use no extra threads
63                         >1 add indicated number of threads (to a default of 1)
64     */
65     this(T)(T f,SAMHeader header, SAMWriterTypes t=SAMWriterTypes.DEDUCE,int extra_threads = -1)
66     if (is(T == string) || is(T == File))
67     {
68         import std.parallelism : totalCPUs;
69         char[] mode;
70         if(t == SAMWriterTypes.BAM) mode=['w','b','\0'];
71         else if(t == SAMWriterTypes.UBAM) mode=['w','b','0','\0'];
72         else if(t == SAMWriterTypes.SAM) mode=['w','\0'];
73         else if(t == SAMWriterTypes.CRAM) mode=['w','c','\0'];
74         // open file
75         static if (is(T == string))
76         {
77             if(t == SAMWriterTypes.DEDUCE){
78                 import std.path:extension;
79                 auto ext=extension(f);
80                 if(ext==".bam") mode=['w','b','\0'];
81                 else if(ext==".sam") mode=['w','\0'];
82                 else if(ext==".cram") mode=['w','c','\0'];
83                 else {
84                     hts_log_error(__FUNCTION__,"extension "~ext~" not valid");
85                     throw new Exception("DEDUCE SAMWriterType used with non-valid extension");
86                 }
87             }
88             this.filename = f;
89             this.fn = toStringz(f);
90             this.fp = hts_open(this.fn, mode.ptr);
91         }
92         else static if (is(T == File))
93         {
94             assert(t!=SAMWriterTypes.DEDUCE);
95             this.filename = f.name();
96             this.fn = toStringz(f.name);
97             this.f = hdopen(f.fileno, cast(immutable(char)*) "w");
98             this.fp = hts_hopen(this.f, this.fn, mode.ptr);
99         }
100         else assert(0);
101 
102         if (extra_threads == -1)
103         {
104             if ( totalCPUs > 1)
105             {
106                 hts_log_info(__FUNCTION__,
107                         format("%d CPU cores detected; enabling multithreading", totalCPUs));
108                 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable,
109                 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac
110                 hts_set_threads(this.fp, totalCPUs);
111             }
112         }
113         else if (extra_threads > 0)
114         {
115             if ((extra_threads + 1) > totalCPUs)
116                 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected");
117             hts_set_threads(this.fp, extra_threads);
118         }
119         else if (extra_threads == 0)
120         {
121             hts_log_debug(__FUNCTION__, "Zero extra threads requested");
122         }
123         else
124         {
125             hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested");
126         }
127 
128         // read header
129         this.header = header.dup;
130         sam_hdr_write(this.fp,this.header.h);
131     }
132 
133 
134     /// Write a SAMRecord to disk
135     void write(SAMRecord rec){
136         const auto ret = sam_write1(this.fp, this.header.h, rec.b);
137         assert(ret>=0);
138     }
139 }
140 ///
141 debug(dhtslib_unittest) unittest
142 {
143     import dhtslib.sam;
144     import htslib.hts_log : hts_log_info;
145     import std.path : buildPath,dirName;
146     import std.string : fromStringz;
147     import std.array : array;
148 
149     hts_set_log_level(htsLogLevel.HTS_LOG_TRACE);
150     hts_log_info(__FUNCTION__, "Testing SAMWriter");
151     hts_log_info(__FUNCTION__, "Loading test file");
152     auto sam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","auxf#values.sam"), 0);
153     auto sam2 = SAMWriter("/tmp/test.bam",sam.header);
154     auto sam3 = SAMWriter("/tmp/test2.bam",sam.header, SAMWriterTypes.DEDUCE, 4);
155     auto sam4 = SAMWriter("/tmp/test3.bam",sam.header, SAMWriterTypes.DEDUCE, 0);
156 
157     hts_log_info(__FUNCTION__, "Getting read 1");
158     auto readrange = sam.allRecords;
159     assert(readrange.empty == false);
160     auto read = readrange.front();
161 
162     sam2.write(read);
163     sam3.write(read);
164     sam4.write(read);
165     destroy(sam2);
166     destroy(sam3);
167     destroy(sam4);
168     auto sam5 = SAMFile("/tmp/test.bam");
169     auto sam6 = SAMFile("/tmp/test2.bam");
170     auto sam7 = SAMFile("/tmp/test3.bam");
171 
172     assert(sam5.allRecords.array.length == 1);
173     assert(sam5.allRecords.array.length == sam6.allRecords.array.length);
174     assert(sam5.allRecords.array.length == sam7.allRecords.array.length);
175     assert(sam7.allRecords.array.length == sam6.allRecords.array.length);
176 
177 }
178 
179 
180