1 module dhtslib.sam.writer; 2 3 import std.stdio : stderr, writeln, File; 4 import std.string : fromStringz, toStringz; 5 import std.parallelism : totalCPUs; 6 import std.format; 7 8 import htslib.hts : htsFile, hts_open, hts_close, hts_hopen, hts_set_threads; 9 import htslib.hts : hts_idx_t, hts_itr_t, hts_itr_multi_t, hts_reglist_t, hts_pair32_t; 10 import htslib.hfile : hdopen, hclose, hFILE; 11 12 import htslib.kstring; 13 import htslib.sam; 14 import htslib.hts_log; 15 import dhtslib.sam.record; 16 import dhtslib.sam.header; 17 import dhtslib.sam : parseSam; 18 import dhtslib.memory; 19 import dhtslib.util; 20 21 22 /// SAM/BAM/CRAM on-disk format. 23 /// `DEDUCE` will attempt to auto-detect from filename or other means 24 enum SAMWriterTypes 25 { 26 BAM, 27 UBAM, 28 SAM, 29 CRAM, 30 DEDUCE 31 } 32 33 /// Encapsulates a SAM/BAM/CRAM, but as write-only 34 struct SAMWriter 35 { 36 /// filename; as usable from D 37 string filename; 38 39 /// filename \0-terminated C string; reference needed to avoid GC reaping result of toStringz when ctor goes out of scope 40 private const(char)* fn; 41 42 /// htsFile 43 private HtsFile fp; 44 45 /// hFILE if required 46 private hFILE* f; 47 48 /// header struct 49 SAMHeader header; 50 51 private kstring_t line; 52 53 /// disallow copying 54 @disable this(this); 55 56 /** Create a representation of SAM/BAM/CRAM file from given filename or File 57 58 Params: 59 fn = string filename (complete path passed to htslib; may support S3:// https:// etc.) 60 extra_threads = extra threads for compression/decompression 61 -1 use all available cores (default) 62 0 use no extra threads 63 >1 add indicated number of threads (to a default of 1) 64 */ 65 this(T)(T f,SAMHeader header, SAMWriterTypes t=SAMWriterTypes.DEDUCE,int extra_threads = -1) 66 if (is(T == string) || is(T == File)) 67 { 68 import std.parallelism : totalCPUs; 69 char[] mode; 70 if(t == SAMWriterTypes.BAM) mode=['w','b','\0']; 71 else if(t == SAMWriterTypes.UBAM) mode=['w','b','0','\0']; 72 else if(t == SAMWriterTypes.SAM) mode=['w','\0']; 73 else if(t == SAMWriterTypes.CRAM) mode=['w','c','\0']; 74 // open file 75 static if (is(T == string)) 76 { 77 if(t == SAMWriterTypes.DEDUCE){ 78 import std.path:extension; 79 auto ext=extension(f); 80 if(ext==".bam") mode=['w','b','\0']; 81 else if(ext==".sam") mode=['w','\0']; 82 else if(ext==".cram") mode=['w','c','\0']; 83 else { 84 hts_log_error(__FUNCTION__,"extension "~ext~" not valid"); 85 throw new Exception("DEDUCE SAMWriterType used with non-valid extension"); 86 } 87 } 88 this.filename = f; 89 this.fn = toStringz(f); 90 this.fp = hts_open(this.fn, mode.ptr); 91 } 92 else static if (is(T == File)) 93 { 94 assert(t!=SAMWriterTypes.DEDUCE); 95 this.filename = f.name(); 96 this.fn = toStringz(f.name); 97 this.f = hdopen(f.fileno, cast(immutable(char)*) "w"); 98 this.fp = hts_hopen(this.f, this.fn, mode.ptr); 99 } 100 else assert(0); 101 102 if (extra_threads == -1) 103 { 104 if ( totalCPUs > 1) 105 { 106 hts_log_info(__FUNCTION__, 107 format("%d CPU cores detected; enabling multithreading", totalCPUs)); 108 // hts_set_threads adds N _EXTRA_ threads, so totalCPUs - 1 seemed reasonable, 109 // but overcomitting by 1 thread (i.e., passing totalCPUs) buys an extra 3% on my 2-core 2013 Mac 110 hts_set_threads(this.fp, totalCPUs); 111 } 112 } 113 else if (extra_threads > 0) 114 { 115 if ((extra_threads + 1) > totalCPUs) 116 hts_log_warning(__FUNCTION__, "More threads requested than CPU cores detected"); 117 hts_set_threads(this.fp, extra_threads); 118 } 119 else if (extra_threads == 0) 120 { 121 hts_log_debug(__FUNCTION__, "Zero extra threads requested"); 122 } 123 else 124 { 125 hts_log_warning(__FUNCTION__, "Invalid negative number of extra threads requested"); 126 } 127 128 // read header 129 this.header = header.dup; 130 sam_hdr_write(this.fp,this.header.h); 131 } 132 133 134 /// Write a SAMRecord to disk 135 void write(SAMRecord rec){ 136 const auto ret = sam_write1(this.fp, this.header.h, rec.b); 137 assert(ret>=0); 138 } 139 } 140 /// 141 debug(dhtslib_unittest) unittest 142 { 143 import dhtslib.sam; 144 import htslib.hts_log : hts_log_info; 145 import std.path : buildPath,dirName; 146 import std.string : fromStringz; 147 import std.array : array; 148 149 hts_set_log_level(htsLogLevel.HTS_LOG_TRACE); 150 hts_log_info(__FUNCTION__, "Testing SAMWriter"); 151 hts_log_info(__FUNCTION__, "Loading test file"); 152 auto sam = SAMFile(buildPath(dirName(dirName(dirName(dirName(__FILE__)))),"htslib","test","auxf#values.sam"), 0); 153 auto sam2 = SAMWriter("/tmp/test.bam",sam.header); 154 auto sam3 = SAMWriter("/tmp/test2.bam",sam.header, SAMWriterTypes.DEDUCE, 4); 155 auto sam4 = SAMWriter("/tmp/test3.bam",sam.header, SAMWriterTypes.DEDUCE, 0); 156 157 hts_log_info(__FUNCTION__, "Getting read 1"); 158 auto readrange = sam.allRecords; 159 assert(readrange.empty == false); 160 auto read = readrange.front(); 161 162 sam2.write(read); 163 sam3.write(read); 164 sam4.write(read); 165 destroy(sam2); 166 destroy(sam3); 167 destroy(sam4); 168 auto sam5 = SAMFile("/tmp/test.bam"); 169 auto sam6 = SAMFile("/tmp/test2.bam"); 170 auto sam7 = SAMFile("/tmp/test3.bam"); 171 172 assert(sam5.allRecords.array.length == 1); 173 assert(sam5.allRecords.array.length == sam6.allRecords.array.length); 174 assert(sam5.allRecords.array.length == sam7.allRecords.array.length); 175 assert(sam7.allRecords.array.length == sam6.allRecords.array.length); 176 177 } 178 179 180