001package com.hfg.bio.seq.translation; 002 003import com.hfg.bio.seq.NucleicAcid; 004import com.hfg.bio.seq.Protein; 005import com.hfg.bio.Strand; 006import com.hfg.bio.seq.ProteinFactory; 007import com.hfg.util.StringUtil; 008 009import java.io.FilterReader; 010import java.io.IOException; 011import java.io.Reader; 012 013//------------------------------------------------------------------------------ 014/** 015 Translates nucleic acid sequences into protein. 016 <div> 017 @author J. Alex Taylor, hairyfatguy.com 018 </div> 019 */ 020//------------------------------------------------------------------------------ 021// com.hfg Library 022// 023// This library is free software; you can redistribute it and/or 024// modify it under the terms of the GNU Lesser General Public 025// License as published by the Free Software Foundation; either 026// version 2.1 of the License, or (at your option) any later version. 027// 028// This library is distributed in the hope that it will be useful, 029// but WITHOUT ANY WARRANTY; without even the implied warranty of 030// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 031// Lesser General Public License for more details. 032// 033// You should have received a copy of the GNU Lesser General Public 034// License along with this library; if not, write to the Free Software 035// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 036// 037// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 038// jataylor@hairyfatguy.com 039//------------------------------------------------------------------------------ 040 041public class NucleicAcidTranslator 042{ 043 private TranslationTable mTranslationTable = StandardTranslationTable.getInstance(); 044 private ProteinFactory mProteinFactory = new ProteinFactory(); 045 046 public static final String FRAME_ATT = "translation frame"; 047 048 //########################################################################### 049 // CONSTRUCTORS 050 //########################################################################### 051 052 //--------------------------------------------------------------------------- 053 public NucleicAcidTranslator() 054 { 055 056 } 057 058 //--------------------------------------------------------------------------- 059 public NucleicAcidTranslator(TranslationTable inTranslationTable) 060 { 061 mTranslationTable = inTranslationTable; 062 } 063 064 065 //########################################################################### 066 // PUBLIC METHODS 067 //########################################################################### 068 069 //--------------------------------------------------------------------------- 070 public NucleicAcidTranslator setProteinFactory(ProteinFactory inValue) 071 { 072 mProteinFactory = inValue; 073 return this; 074 } 075 076 //--------------------------------------------------------------------------- 077 public Protein translate(NucleicAcid inSeq, TranslationFrame inFrame) 078 throws TranslationException 079 { 080 Protein protein = mProteinFactory.createSeqObj(); 081 protein.setID(inSeq.getID()); 082 protein.setDescription(inSeq.getDescription()); 083 protein.setAttribute(FRAME_ATT, inFrame); 084 085 try 086 { 087 if (0 == inSeq.length()) 088 { 089 throw new RuntimeException("The nucleic acid " + StringUtil.singleQuote(inSeq.getID()) + " doesn't have a sequence!"); 090 } 091 092 Reader naReader = (inFrame.getStrand().equals(Strand.PLUS) ? inSeq.getSequenceReader() : inSeq.getReverseComplementSequenceReader()); 093 naReader.skip(inFrame.getOffset()); 094 095 TranslationReader translationReader = new TranslationReader(naReader); 096 097 protein.setSequence(translationReader); 098 } 099 catch (Exception e) 100 { 101 throw new TranslationException("Problem during translation of " + inSeq.getID() + " in frame " + inFrame + "!", e); 102 } 103 104 return protein; 105 } 106 107 //########################################################################### 108 // INNER CLASS 109 //########################################################################### 110 111 // In order to be be able to efficiently deal with large nucleotide sequences, 112 // the translation operation is performed in a Reader. 113 114 private class TranslationReader extends FilterReader 115 { 116 private char[] mAABuffer = new char[1024]; 117 private char[] mCodonBuffer = new char[3]; 118 private int mBufferIndex = 0; 119 private int mBufferLimit = 0; 120 private boolean mEndOfStream; 121 122 //------------------------------------------------------------------------ 123 protected TranslationReader(Reader reader) 124 { 125 super(reader); 126 } 127 128 //------------------------------------------------------------------------ 129 @Override 130 public int read() 131 throws IOException 132 { 133 if (mBufferIndex >= mBufferLimit 134 && ! mEndOfStream) 135 { 136 fillBuffer(); 137 } 138 139 return (mBufferIndex < mBufferLimit ? mAABuffer[mBufferIndex++] : -1); 140 } 141 142 //------------------------------------------------------------------------ 143 @Override 144 public int read(char[] inBuffer, int inOffset, int inLength) 145 throws IOException 146 { 147 int numChars = 0; 148 int bufferIndex = inOffset; 149 int nextChar; 150 do 151 { 152 nextChar = read(); 153 if (nextChar > 0) 154 { 155 inBuffer[bufferIndex++] = (char) nextChar; 156 numChars++; 157 } 158 159 } while (nextChar >= 0 160 && numChars < inLength); 161 162 return (-1 == nextChar && 0 == numChars ? -1 : numChars); 163 } 164 165 //------------------------------------------------------------------------ 166 private void fillBuffer() 167 throws IOException 168 { 169 int bufferIndex = 0; 170 while (bufferIndex < mAABuffer.length) 171 { 172 int nucleotidesRead = in.read(mCodonBuffer, 0, 3); 173 if (-1 == nucleotidesRead) 174 { 175 mEndOfStream = true; 176 break; 177 } 178 else if (nucleotidesRead < 3) 179 { 180 // We didn't get a full codon. 181 int newNucleotidesRead = in.read(mCodonBuffer, nucleotidesRead, 3 - nucleotidesRead); 182 if (-1 == newNucleotidesRead) 183 { 184 mEndOfStream = true; 185 break; 186 } 187 188 nucleotidesRead += newNucleotidesRead; 189 if (nucleotidesRead < 3) 190 { 191 throw new TranslationException("Problem reading a full codon!"); 192 } 193 } 194 195 String codonString = new String(mCodonBuffer); 196 197 char aa; 198 if (codonString.contains("-")) 199 { 200 aa = '-'; 201 } 202 else 203 { 204 aa = mTranslationTable.translateCodon(new String(mCodonBuffer)); 205 } 206 207 mAABuffer[bufferIndex++] = aa; 208 } 209 210 mBufferIndex = 0; 211 mBufferLimit = bufferIndex; 212 } 213 } 214 215}