001package com.hfg.bio.seq.translation;
002
003import com.hfg.bio.seq.NucleicAcid;
004import com.hfg.bio.seq.Protein;
005import com.hfg.bio.Strand;
006import com.hfg.bio.seq.ProteinFactory;
007import com.hfg.util.StringUtil;
008
009import java.io.FilterReader;
010import java.io.IOException;
011import java.io.Reader;
012
013//------------------------------------------------------------------------------
014/**
015 Translates nucleic acid sequences into protein.
016 <div>
017  @author J. Alex Taylor, hairyfatguy.com
018 </div>
019 */
020//------------------------------------------------------------------------------
021// com.hfg Library
022//
023// This library is free software; you can redistribute it and/or
024// modify it under the terms of the GNU Lesser General Public
025// License as published by the Free Software Foundation; either
026// version 2.1 of the License, or (at your option) any later version.
027//
028// This library is distributed in the hope that it will be useful,
029// but WITHOUT ANY WARRANTY; without even the implied warranty of
030// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
031// Lesser General Public License for more details.
032//
033// You should have received a copy of the GNU Lesser General Public
034// License along with this library; if not, write to the Free Software
035// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
036//
037// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
038// jataylor@hairyfatguy.com
039//------------------------------------------------------------------------------
040
041public class NucleicAcidTranslator
042{
043   private TranslationTable mTranslationTable = StandardTranslationTable.getInstance();
044   private ProteinFactory   mProteinFactory   = new ProteinFactory();
045
046   public static final String FRAME_ATT = "translation frame";
047
048   //###########################################################################
049   // CONSTRUCTORS
050   //###########################################################################
051
052   //---------------------------------------------------------------------------
053   public NucleicAcidTranslator()
054   {
055
056   }
057
058   //---------------------------------------------------------------------------
059   public NucleicAcidTranslator(TranslationTable inTranslationTable)
060   {
061      mTranslationTable = inTranslationTable;
062   }
063
064
065   //###########################################################################
066   // PUBLIC METHODS
067   //###########################################################################
068
069   //---------------------------------------------------------------------------
070   public NucleicAcidTranslator setProteinFactory(ProteinFactory inValue)
071   {
072      mProteinFactory = inValue;
073      return this;
074   }
075
076   //---------------------------------------------------------------------------
077   public Protein translate(NucleicAcid inSeq, TranslationFrame inFrame)
078      throws TranslationException
079   {
080      Protein protein = mProteinFactory.createSeqObj();
081      protein.setID(inSeq.getID());
082      protein.setDescription(inSeq.getDescription());
083      protein.setAttribute(FRAME_ATT, inFrame);
084
085      try
086      {
087         if (0 == inSeq.length())
088         {
089            throw new RuntimeException("The nucleic acid " + StringUtil.singleQuote(inSeq.getID()) + " doesn't have a sequence!");
090         }
091
092         Reader naReader = (inFrame.getStrand().equals(Strand.PLUS) ? inSeq.getSequenceReader() : inSeq.getReverseComplementSequenceReader());
093         naReader.skip(inFrame.getOffset());
094
095         TranslationReader translationReader = new TranslationReader(naReader);
096
097         protein.setSequence(translationReader);
098      }
099      catch (Exception e)
100      {
101         throw new TranslationException("Problem during translation of " + inSeq.getID() + " in frame " + inFrame + "!", e);
102      }
103
104      return protein;
105   }
106
107   //###########################################################################
108   // INNER CLASS
109   //###########################################################################
110
111   // In order to be be able to efficiently deal with large nucleotide sequences,
112   // the translation operation is performed in a Reader.
113
114   private class TranslationReader extends FilterReader
115   {
116      private char[] mAABuffer    = new char[1024];
117      private char[] mCodonBuffer = new char[3];
118      private int mBufferIndex = 0;
119      private int mBufferLimit = 0;
120      private boolean mEndOfStream;
121
122      //------------------------------------------------------------------------
123      protected TranslationReader(Reader reader)
124      {
125         super(reader);
126      }
127
128      //------------------------------------------------------------------------
129      @Override
130      public int read()
131         throws IOException
132      {
133         if (mBufferIndex >= mBufferLimit
134             && ! mEndOfStream)
135         {
136            fillBuffer();
137         }
138
139         return (mBufferIndex < mBufferLimit ? mAABuffer[mBufferIndex++] : -1);
140      }
141
142      //------------------------------------------------------------------------
143      @Override
144      public int read(char[] inBuffer, int inOffset, int inLength)
145            throws IOException
146      {
147         int numChars = 0;
148         int bufferIndex = inOffset;
149         int nextChar;
150         do
151         {
152            nextChar = read();
153            if (nextChar > 0)
154            {
155               inBuffer[bufferIndex++] = (char) nextChar;
156               numChars++;
157            }
158
159         } while (nextChar >= 0
160                  && numChars < inLength);
161
162         return (-1 == nextChar && 0 == numChars ? -1 : numChars);
163      }
164
165      //------------------------------------------------------------------------
166      private void fillBuffer()
167         throws IOException
168      {
169         int bufferIndex = 0;
170         while (bufferIndex < mAABuffer.length)
171         {
172            int nucleotidesRead = in.read(mCodonBuffer, 0, 3);
173            if (-1 == nucleotidesRead)
174            {
175               mEndOfStream = true;
176               break;
177            }
178            else if (nucleotidesRead < 3)
179            {
180               // We didn't get a full codon.
181               int newNucleotidesRead = in.read(mCodonBuffer, nucleotidesRead, 3 - nucleotidesRead);
182               if (-1 == newNucleotidesRead)
183               {
184                  mEndOfStream = true;
185                  break;
186               }
187
188               nucleotidesRead += newNucleotidesRead;
189               if (nucleotidesRead < 3)
190               {
191                  throw new TranslationException("Problem reading a full codon!");
192               }
193            }
194
195            String codonString = new String(mCodonBuffer);
196
197            char aa;
198            if (codonString.contains("-"))
199            {
200               aa = '-';
201            }
202            else
203            {
204               aa = mTranslationTable.translateCodon(new String(mCodonBuffer));
205            }
206
207            mAABuffer[bufferIndex++] = aa;
208         }
209
210         mBufferIndex = 0;
211         mBufferLimit = bufferIndex;
212      }
213   }
214
215}