001package com.hfg.chem.format;
002
003
004import java.io.BufferedReader;
005import java.io.ByteArrayInputStream;
006import java.io.IOException;
007import java.io.InputStream;
008import java.io.InputStreamReader;
009import java.util.ArrayList;
010import java.util.List;
011
012import com.hfg.chem.Molecule;
013import com.hfg.util.StringBuilderPlus;
014import com.hfg.util.collection.CollectionUtil;
015import com.hfg.util.io.GZIP;
016
017//------------------------------------------------------------------------------
018/**
019 Buffered molecule reader.
020 <div>
021 @author J. Alex Taylor, hairyfatguy.com
022 </div>
023 */
024//------------------------------------------------------------------------------
025// com.hfg Library
026//
027// This library is free software; you can redistribute it and/or
028// modify it under the terms of the GNU Lesser General Public
029// License as published by the Free Software Foundation; either
030// version 2.1 of the License, or (at your option) any later version.
031//
032// This library is distributed in the hope that it will be useful,
033// but WITHOUT ANY WARRANTY; without even the implied warranty of
034// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
035// Lesser General Public License for more details.
036//
037// You should have received a copy of the GNU Lesser General Public
038// License along with this library; if not, write to the Free Software
039// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
040//
041// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
042// jataylor@hairyfatguy.com
043//------------------------------------------------------------------------------
044
045public class BufferedMoleculeReader<T extends Molecule>
046{
047   private ReadableChemFormat<T> mFormatObj;
048   private BufferedReader       mBufferedReader;
049   private boolean              mEndOfContentReached;
050   private String               mRecordStartLine;
051   private int                  mNumRecordsParsed;
052
053   private StringBuilderPlus    mUncompressedRecord = new StringBuilderPlus().setDelimiter("\n");
054   private List<byte[]>         mCompressedRecordChunks;
055   private int                  mCurrentRecordLength = 0;
056
057   // How long the record should be before compression is used.
058   private static int   sCompressionThreshold = 8 * 1024;
059
060   //###########################################################################
061   // CONSTRUCTORS
062   //###########################################################################
063
064   //---------------------------------------------------------------------------
065   public BufferedMoleculeReader(BufferedReader inReader, ReadableChemFormat<T> inFormatObj)
066   {
067      mBufferedReader = inReader;
068      mFormatObj      = inFormatObj;
069   }
070
071   //###########################################################################
072   // PUBLIC METHODS
073   //###########################################################################
074
075   //---------------------------------------------------------------------------
076   public void close()
077         throws IOException
078   {
079      mBufferedReader.close();
080   }
081
082   //---------------------------------------------------------------------------
083   public ReadableChemFormat<T> getFormatObj()
084   {
085      return mFormatObj;
086   }
087
088   //---------------------------------------------------------------------------
089   public synchronized boolean hasNext()
090   {
091      boolean result = false;
092      if (! endOfContentReached())
093      {
094         if (0 == mCurrentRecordLength)
095         {
096            readNextRecord();
097         }
098
099         result = mCurrentRecordLength > 0;
100      }
101
102      return result;
103   }
104
105   //---------------------------------------------------------------------------
106   public synchronized T next()
107   {
108      T nextSeq = null;
109      if (0 == mCurrentRecordLength)
110      {
111         readNextRecord();
112      }
113
114      if (mCurrentRecordLength > 0)
115      {
116         nextSeq = mFormatObj.readRecord(getBufferedRecordReader());
117//         mNextRecord.setLength(0); // Clear the raw record
118         mCurrentRecordLength = 0;
119      }
120
121      return nextSeq;
122   }
123
124   //---------------------------------------------------------------------------
125   public List<T> readAll()
126   {
127      List<T> seqs = new ArrayList<T>();
128      while (hasNext())
129      {
130         seqs.add(next());
131      }
132
133      return seqs;
134   }
135
136   //---------------------------------------------------------------------------
137   protected boolean endOfContentReached()
138   {
139      return mEndOfContentReached;
140   }
141
142   //---------------------------------------------------------------------------
143   private synchronized void readNextRecord()
144   {
145      if (! endOfContentReached())
146      {
147         // Start w/ a fresh record
148         mUncompressedRecord = new StringBuilderPlus().setDelimiter("\n");
149         mCompressedRecordChunks = null;
150         mCurrentRecordLength = 0;
151
152         if (mRecordStartLine != null)
153         {
154            mUncompressedRecord.appendln(mRecordStartLine);
155         }
156
157         try
158         {
159            String line;
160            while ((line = mBufferedReader.readLine()) != null)
161            {
162               if (mFormatObj.isEndOfRecord(line))
163               {
164                  if (mFormatObj.hasJanusDelimiter())
165                  {
166                     if (0 == mNumRecordsParsed
167                           && 0 == mCurrentRecordLength)
168                     {
169                        appendLineToCurrentRecord(line);
170                     }
171                     else
172                     {
173                        mRecordStartLine = line;
174                        break;
175                     }
176                  }
177                  else
178                  {
179                     appendLineToCurrentRecord(line);
180                     break;
181                  }
182               }
183               else
184               {
185                  appendLineToCurrentRecord(line);
186               }
187            }
188
189            if (null == line)
190            {
191               mEndOfContentReached = true;
192            }
193         }
194         catch (IOException e)
195         {
196            throw new ChemIOException(e);
197         }
198      }
199
200      if (mCurrentRecordLength > 0)
201      {
202         mNumRecordsParsed++;
203      }
204   }
205
206   //--------------------------------------------------------------------------
207   // Note: inLine will not have a return at the end
208   private void appendLineToCurrentRecord(String inLine)
209         throws ChemIOException
210   {
211      mCurrentRecordLength += inLine.length() + 1;
212
213      mUncompressedRecord.appendln(inLine);
214      if (mUncompressedRecord.length() > sCompressionThreshold)
215      {
216         if (null == mCompressedRecordChunks)
217         {
218            mCompressedRecordChunks = new ArrayList<>();
219         }
220
221         mCompressedRecordChunks.add(GZIP.compress(mUncompressedRecord.toString()));
222         mUncompressedRecord.setLength(0);
223      }
224   }
225
226   //--------------------------------------------------------------------------
227   private BufferedReader getBufferedRecordReader()
228   {
229      InputStream seqStream = null;
230
231      if (CollectionUtil.hasValues(mCompressedRecordChunks))
232      {
233         if (mUncompressedRecord.length() > 0)
234         {
235            mCompressedRecordChunks.add(GZIP.compress(mUncompressedRecord.toString()));
236         }
237         seqStream = new RecordStreamer();
238      }
239      else if (mUncompressedRecord.length() > 0)
240      {
241         seqStream = new ByteArrayInputStream(mUncompressedRecord.toString().getBytes());
242      }
243
244      return new BufferedReader(new InputStreamReader(seqStream));
245   }
246
247   //##########################################################################
248   // INNER CLASSES
249   //##########################################################################
250
251
252   private class RecordStreamer extends InputStream
253   {
254      private String    mCurrentChunk;
255      private int       mCurrentChunkIndex;
256      private int       mCharIndex;
257      private boolean   mDone = false;
258
259      //-----------------------------------------------------------------------
260      public RecordStreamer()
261      {
262         mCurrentChunkIndex = 0;
263      }
264
265      //-----------------------------------------------------------------------
266      public int read()
267      {
268         return (mDone ? -1 : getNextChar());
269      }
270
271      //-----------------------------------------------------------------------
272      private char getNextChar()
273      {
274         if (null == mCurrentChunk)
275         {
276            mCurrentChunk = GZIP.uncompressToString(mCompressedRecordChunks.get(mCurrentChunkIndex));
277
278            mCharIndex = 0;
279         }
280
281         char nextChar = mCurrentChunk.charAt(mCharIndex++);
282
283         if (mCharIndex >= mCurrentChunk.length())
284         {
285            // This is the last char in this chunk.
286            mCurrentChunk = null;
287            mCurrentChunkIndex++;
288            if (mCurrentChunkIndex < 0 || mCurrentChunkIndex == mCompressedRecordChunks.size())
289            {
290               // This was the last chunk.
291               mDone = true;
292            }
293         }
294
295         return nextChar;
296      }
297   }
298}