001package com.hfg.chem.format; 002 003 004import java.io.BufferedReader; 005import java.io.ByteArrayInputStream; 006import java.io.IOException; 007import java.io.InputStream; 008import java.io.InputStreamReader; 009import java.util.ArrayList; 010import java.util.List; 011 012import com.hfg.chem.Molecule; 013import com.hfg.util.StringBuilderPlus; 014import com.hfg.util.collection.CollectionUtil; 015import com.hfg.util.io.GZIP; 016 017//------------------------------------------------------------------------------ 018/** 019 Buffered molecule reader. 020 <div> 021 @author J. Alex Taylor, hairyfatguy.com 022 </div> 023 */ 024//------------------------------------------------------------------------------ 025// com.hfg Library 026// 027// This library is free software; you can redistribute it and/or 028// modify it under the terms of the GNU Lesser General Public 029// License as published by the Free Software Foundation; either 030// version 2.1 of the License, or (at your option) any later version. 031// 032// This library is distributed in the hope that it will be useful, 033// but WITHOUT ANY WARRANTY; without even the implied warranty of 034// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 035// Lesser General Public License for more details. 036// 037// You should have received a copy of the GNU Lesser General Public 038// License along with this library; if not, write to the Free Software 039// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 040// 041// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 042// jataylor@hairyfatguy.com 043//------------------------------------------------------------------------------ 044 045public class BufferedMoleculeReader<T extends Molecule> 046{ 047 private ReadableChemFormat<T> mFormatObj; 048 private BufferedReader mBufferedReader; 049 private boolean mEndOfContentReached; 050 private String mRecordStartLine; 051 private int mNumRecordsParsed; 052 053 private StringBuilderPlus mUncompressedRecord = new StringBuilderPlus().setDelimiter("\n"); 054 private List<byte[]> mCompressedRecordChunks; 055 private int mCurrentRecordLength = 0; 056 057 // How long the record should be before compression is used. 058 private static int sCompressionThreshold = 8 * 1024; 059 060 //########################################################################### 061 // CONSTRUCTORS 062 //########################################################################### 063 064 //--------------------------------------------------------------------------- 065 public BufferedMoleculeReader(BufferedReader inReader, ReadableChemFormat<T> inFormatObj) 066 { 067 mBufferedReader = inReader; 068 mFormatObj = inFormatObj; 069 } 070 071 //########################################################################### 072 // PUBLIC METHODS 073 //########################################################################### 074 075 //--------------------------------------------------------------------------- 076 public void close() 077 throws IOException 078 { 079 mBufferedReader.close(); 080 } 081 082 //--------------------------------------------------------------------------- 083 public ReadableChemFormat<T> getFormatObj() 084 { 085 return mFormatObj; 086 } 087 088 //--------------------------------------------------------------------------- 089 public synchronized boolean hasNext() 090 { 091 boolean result = false; 092 if (! endOfContentReached()) 093 { 094 if (0 == mCurrentRecordLength) 095 { 096 readNextRecord(); 097 } 098 099 result = mCurrentRecordLength > 0; 100 } 101 102 return result; 103 } 104 105 //--------------------------------------------------------------------------- 106 public synchronized T next() 107 { 108 T nextSeq = null; 109 if (0 == mCurrentRecordLength) 110 { 111 readNextRecord(); 112 } 113 114 if (mCurrentRecordLength > 0) 115 { 116 nextSeq = mFormatObj.readRecord(getBufferedRecordReader()); 117// mNextRecord.setLength(0); // Clear the raw record 118 mCurrentRecordLength = 0; 119 } 120 121 return nextSeq; 122 } 123 124 //--------------------------------------------------------------------------- 125 public List<T> readAll() 126 { 127 List<T> seqs = new ArrayList<T>(); 128 while (hasNext()) 129 { 130 seqs.add(next()); 131 } 132 133 return seqs; 134 } 135 136 //--------------------------------------------------------------------------- 137 protected boolean endOfContentReached() 138 { 139 return mEndOfContentReached; 140 } 141 142 //--------------------------------------------------------------------------- 143 private synchronized void readNextRecord() 144 { 145 if (! endOfContentReached()) 146 { 147 // Start w/ a fresh record 148 mUncompressedRecord = new StringBuilderPlus().setDelimiter("\n"); 149 mCompressedRecordChunks = null; 150 mCurrentRecordLength = 0; 151 152 if (mRecordStartLine != null) 153 { 154 mUncompressedRecord.appendln(mRecordStartLine); 155 } 156 157 try 158 { 159 String line; 160 while ((line = mBufferedReader.readLine()) != null) 161 { 162 if (mFormatObj.isEndOfRecord(line)) 163 { 164 if (mFormatObj.hasJanusDelimiter()) 165 { 166 if (0 == mNumRecordsParsed 167 && 0 == mCurrentRecordLength) 168 { 169 appendLineToCurrentRecord(line); 170 } 171 else 172 { 173 mRecordStartLine = line; 174 break; 175 } 176 } 177 else 178 { 179 appendLineToCurrentRecord(line); 180 break; 181 } 182 } 183 else 184 { 185 appendLineToCurrentRecord(line); 186 } 187 } 188 189 if (null == line) 190 { 191 mEndOfContentReached = true; 192 } 193 } 194 catch (IOException e) 195 { 196 throw new ChemIOException(e); 197 } 198 } 199 200 if (mCurrentRecordLength > 0) 201 { 202 mNumRecordsParsed++; 203 } 204 } 205 206 //-------------------------------------------------------------------------- 207 // Note: inLine will not have a return at the end 208 private void appendLineToCurrentRecord(String inLine) 209 throws ChemIOException 210 { 211 mCurrentRecordLength += inLine.length() + 1; 212 213 mUncompressedRecord.appendln(inLine); 214 if (mUncompressedRecord.length() > sCompressionThreshold) 215 { 216 if (null == mCompressedRecordChunks) 217 { 218 mCompressedRecordChunks = new ArrayList<>(); 219 } 220 221 mCompressedRecordChunks.add(GZIP.compress(mUncompressedRecord.toString())); 222 mUncompressedRecord.setLength(0); 223 } 224 } 225 226 //-------------------------------------------------------------------------- 227 private BufferedReader getBufferedRecordReader() 228 { 229 InputStream seqStream = null; 230 231 if (CollectionUtil.hasValues(mCompressedRecordChunks)) 232 { 233 if (mUncompressedRecord.length() > 0) 234 { 235 mCompressedRecordChunks.add(GZIP.compress(mUncompressedRecord.toString())); 236 } 237 seqStream = new RecordStreamer(); 238 } 239 else if (mUncompressedRecord.length() > 0) 240 { 241 seqStream = new ByteArrayInputStream(mUncompressedRecord.toString().getBytes()); 242 } 243 244 return new BufferedReader(new InputStreamReader(seqStream)); 245 } 246 247 //########################################################################## 248 // INNER CLASSES 249 //########################################################################## 250 251 252 private class RecordStreamer extends InputStream 253 { 254 private String mCurrentChunk; 255 private int mCurrentChunkIndex; 256 private int mCharIndex; 257 private boolean mDone = false; 258 259 //----------------------------------------------------------------------- 260 public RecordStreamer() 261 { 262 mCurrentChunkIndex = 0; 263 } 264 265 //----------------------------------------------------------------------- 266 public int read() 267 { 268 return (mDone ? -1 : getNextChar()); 269 } 270 271 //----------------------------------------------------------------------- 272 private char getNextChar() 273 { 274 if (null == mCurrentChunk) 275 { 276 mCurrentChunk = GZIP.uncompressToString(mCompressedRecordChunks.get(mCurrentChunkIndex)); 277 278 mCharIndex = 0; 279 } 280 281 char nextChar = mCurrentChunk.charAt(mCharIndex++); 282 283 if (mCharIndex >= mCurrentChunk.length()) 284 { 285 // This is the last char in this chunk. 286 mCurrentChunk = null; 287 mCurrentChunkIndex++; 288 if (mCurrentChunkIndex < 0 || mCurrentChunkIndex == mCompressedRecordChunks.size()) 289 { 290 // This was the last chunk. 291 mDone = true; 292 } 293 } 294 295 return nextChar; 296 } 297 } 298}