001package com.hfg.bio.seq.alignment.matrix;
002
003import java.io.*;
004import java.util.ArrayList;
005import java.util.Collection;
006import java.util.HashMap;
007import java.util.List;
008import java.util.Map;
009import java.util.regex.Pattern;
010
011import com.hfg.exception.ProgrammingException;
012import com.hfg.util.StringBuilderPlus;
013import com.hfg.util.StringUtil;
014
015
016//------------------------------------------------------------------------------
017/**
018 Substitution matrix for use with pairwise sequence alignments.
019 <div>
020  @author J. Alex Taylor, hairyfatguy.com
021 </div>
022 */
023//------------------------------------------------------------------------------
024// com.hfg XML/HTML Coding Library
025//
026// This library is free software; you can redistribute it and/or
027// modify it under the terms of the GNU Lesser General Public
028// License as published by the Free Software Foundation; either
029// version 2.1 of the License, or (at your option) any later version.
030//
031// This library is distributed in the hope that it will be useful,
032// but WITHOUT ANY WARRANTY; without even the implied warranty of
033// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
034// Lesser General Public License for more details.
035//
036// You should have received a copy of the GNU Lesser General Public
037// License along with this library; if not, write to the Free Software
038// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
039//
040// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
041// jataylor@hairyfatguy.com
042//------------------------------------------------------------------------------
043
044public class SubstitutionMatrix
045{
046   private String mName;
047   private float[][]  mMatrix;
048   private List<Character> mOrderedResidues;
049   private int[] mResidueIndexLookup;
050   private String mToStringCache;
051
052   private static Pattern sNumPattern = Pattern.compile("\\d+");
053
054   private static Map<String, SubstitutionMatrix> sInstanceMap = new HashMap<>();
055
056
057   public static final SubstitutionMatrix IDENTITY = new SubstitutionMatrix("IDENTITY");
058   public static final SubstitutionMatrix IDENTITY_GAP = new SubstitutionMatrix("IDENTITY_GAP");
059   public static final SubstitutionMatrix NUCLEOTIDE = new SubstitutionMatrix("NUCLEOTIDE");
060
061   public static final SubstitutionMatrix PAM10  = new SubstitutionMatrix("PAM10");
062   public static final SubstitutionMatrix PAM100 = new SubstitutionMatrix("PAM100");
063   public static final SubstitutionMatrix PAM120 = new SubstitutionMatrix("PAM120");
064   public static final SubstitutionMatrix PAM250 = new SubstitutionMatrix("PAM250");
065
066   public static final SubstitutionMatrix BLOSUM62 = new SubstitutionMatrix("BLOSUM62");
067   public static final SubstitutionMatrix BLOSUM90 = new SubstitutionMatrix("BLOSUM90");
068   public static final SubstitutionMatrix BLOSUM90_GAP = new SubstitutionMatrix("BLOSUM90_GAP");
069
070   //###########################################################################
071   // CONSTRUCTORS
072   //###########################################################################
073
074   //---------------------------------------------------------------------------
075   /**
076    Standard matrices will be retrievable from resource files.
077    */
078   private SubstitutionMatrix(String inName)
079   {
080      mName = inName;
081      sInstanceMap.put(mName, this);
082   }
083
084   //---------------------------------------------------------------------------
085   /**
086    Constructor using a BufferedReader.
087    @param inName the name to use for the substitution matrix
088    @param inReader reader containing the substitution matrix data
089    @throws IOException if a problem is encountered reading from the specified Reader
090    */
091   public SubstitutionMatrix(String inName, BufferedReader inReader)
092      throws IOException
093   {
094      this(inName);
095      mMatrix = readMatrixData(inReader);
096   }
097
098
099
100   //###########################################################################
101   // PUBLIC METHODS
102   //###########################################################################
103
104   //---------------------------------------------------------------------------
105   public String name()
106   {
107      return mName;
108   }
109
110   //---------------------------------------------------------------------------
111   @Override
112   public String toString()
113   {
114      if (null == mToStringCache)
115      {
116         StringBuilderPlus buffer = new StringBuilderPlus();
117         buffer.appendln("# " + name());
118
119         for (char residue : mOrderedResidues)
120         {
121            buffer.append(String.format("%5s ", residue + ""));
122         }
123         buffer.appendln();
124
125         for (char residue1 : mOrderedResidues)
126         {
127            buffer.append(String.format("%5s ", residue1 + ""));
128
129            for (char residue2 : mOrderedResidues)
130            {
131               buffer.append(String.format("%5.1f ", score(residue1, residue2)));
132            }
133            buffer.appendln();
134         }
135         mToStringCache = buffer.toString();
136      }
137
138      return mToStringCache;
139   }
140
141   //---------------------------------------------------------------------------
142   /**
143    Returns whether or not this substitution matrix supports the specified character.
144    * @param inResidue the character to be checked for inclusion in this matrix
145    * @return whether or not this substitution matrix supports the specified character
146    */
147   public boolean contains(char inResidue)
148   {
149     return mOrderedResidues.contains(inResidue);
150   }
151
152   //---------------------------------------------------------------------------
153   public int getResidueIndex(char inResidue)
154   {
155      int[] residueIndexLookup = getResidueIndexLookup();
156      if (((int) inResidue) >= residueIndexLookup.length)
157      {
158         throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(inResidue) + "!");
159      }
160
161      int residueIndex = residueIndexLookup[(int) inResidue];
162      if (-1 == residueIndex)
163      {
164         throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(inResidue) + "!");
165      }
166
167      return residueIndex;
168   }
169
170   //---------------------------------------------------------------------------
171   // For performance reasons some method contents are included here directly.
172   public int[] getResidueIndicesForSequence(String inSequence)
173   {
174      if (null == mResidueIndexLookup)
175      {
176         init();
177      }
178
179      int length = inSequence.length();
180      int indices[] = new int[length];
181
182      for (int i = 0; i < length; i++)
183      {
184         char theChar = inSequence.charAt(i);
185
186         if (((int) theChar) >= mResidueIndexLookup.length)
187         {
188            throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(theChar) + "!");
189         }
190
191         int residueIndex = mResidueIndexLookup[(int) theChar];
192         if (-1 == residueIndex)
193         {
194            throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(theChar) + "!");
195         }
196
197         indices[i] = residueIndex;
198      }
199
200      return indices;
201   }
202
203   //---------------------------------------------------------------------------
204   public float score(char inResidue1, char inResidue2)
205   {
206      int index1 = getResidueIndex(inResidue1);
207      int index2 = getResidueIndex(inResidue2);
208
209      return getRawMatrix()[index1][index2];
210   }
211
212   //---------------------------------------------------------------------------
213   public float scoreCaseInsensitive(char inResidue1, char inResidue2)
214   {
215      return score(Character.toUpperCase(inResidue1), Character.toUpperCase(inResidue2));
216   }
217
218   //---------------------------------------------------------------------------
219   public float[][] getRawMatrix()
220   {
221      if (null == mMatrix)
222      {
223         init();
224      }
225
226      return mMatrix;
227   }
228
229   //---------------------------------------------------------------------------
230   public static Collection<SubstitutionMatrix> values()
231   {
232      return sInstanceMap.values();
233   }
234
235   //###########################################################################
236   // PRIVATE METHODS
237   //###########################################################################
238
239   //---------------------------------------------------------------------------
240   private synchronized void init()
241   {
242      if (null == mMatrix)
243      {
244         if (StringUtil.isSet(name()))
245         {
246            mMatrix = readMatrixDataFromRsrc();
247         }
248         else
249         {
250            throw new ProgrammingException("The substitution matrix has not been properly initialized!");
251         }
252      }
253   }
254
255   //---------------------------------------------------------------------------
256   private float[][] readMatrixDataFromRsrc()
257   {
258      float[][] matrix;
259
260      InputStream rsrcStream = this.getClass().getResourceAsStream(name() + ".mat");
261      if (null == rsrcStream)
262      {
263         // Try looking in the pam directory
264         rsrcStream = this.getClass().getResourceAsStream("pam/" + name() + ".mat");
265         if (null == rsrcStream)
266         {
267            // Try looking in the blosum directory
268            rsrcStream = this.getClass().getResourceAsStream("blosum/" + name() + ".mat");
269            if (null == rsrcStream)
270            {
271               throw new ProgrammingException("The rsrc for substitution matrix " + StringUtil.singleQuote(name()) + " couldn't be located!?");
272            }
273         }
274      }
275
276      try
277      {
278         BufferedReader reader = new BufferedReader(new InputStreamReader(rsrcStream));
279         matrix = readMatrixData(reader);
280         reader.close();
281      }
282      catch (IOException e)
283      {
284         throw new RuntimeException("Problem reading data for substitution matrix " + StringUtil.singleQuote(name()) + "!", e);
285      }
286
287      return matrix;
288   }
289
290   //---------------------------------------------------------------------------
291   private void readMatrixDataFromFile(File inFile)
292      throws IOException
293   {
294      if (null == inFile)
295      {
296         throw new IOException("No substitution matrix file was specified!");
297      }
298      else if (! inFile.exists())
299      {
300         throw new IOException("The substitution matrix file " + StringUtil.singleQuote(inFile.getPath()) + " does not exist!");
301      }
302      else if (! inFile.canRead())
303      {
304         throw new IOException("No read permissions for the substitution matrix file " + StringUtil.singleQuote(inFile.getPath()) + "!");
305      }
306
307      BufferedReader reader = null;
308      try
309      {
310         reader = new BufferedReader(new FileReader(inFile));
311         readMatrixData(reader);
312      }
313      finally
314      {
315         if (reader != null)
316         {
317            reader.close();
318         }
319      }
320   }
321
322   //---------------------------------------------------------------------------
323   private float[][] readMatrixData(BufferedReader inReader)
324         throws IOException
325   {
326      float[][]  matrix = null;
327
328      // For performance, we will use an int[] instead of a Map to lookup the proper matrix position for a given residue
329      int[] residueIndexLookup = null;
330      String line;
331      while ((line = inReader.readLine()) != null)
332      {
333         line = line.trim();
334
335         if (0 == line.length()       // Skip blank lines
336             || line.startsWith("#")) // Skip comment lines
337         {
338            continue;
339         }
340
341         // Read the header line of residues
342         if (null == residueIndexLookup
343             && ! sNumPattern.matcher(line).find())
344         {
345//            mResidueIndex = new OrderedMap<Character, Integer>();
346            int inex = 0;
347            int maxIndex = -1;
348            String pieces[] = line.split("\\s+");
349            for (String piece : pieces)
350            {
351               if (piece.length() > 1)
352               {
353                  throw new RuntimeException("The matrix header line " + StringUtil.singleQuote(line) + " is not in the proper format!");
354               }
355
356               if (((int)piece.charAt(0)) > maxIndex)
357               {
358                  maxIndex = (int) piece.charAt(0);
359               }
360//                  mResidueIndex.put(piece.charAt(0), index++);
361            }
362
363            residueIndexLookup = new int[maxIndex + 1];
364            mOrderedResidues = new ArrayList<>(pieces.length);
365            for (int i = 0; i <= maxIndex; i++)
366            {
367               residueIndexLookup[i] = -1;
368            }
369
370            int matrixColIndex = 0;
371            for (String piece : pieces)
372            {
373               residueIndexLookup[(int) piece.charAt(0)] = matrixColIndex++;
374               mOrderedResidues.add(piece.charAt(0));
375            }
376
377//            matrix = new float[mResidueIndex.size()][mResidueIndex.size()];
378            matrix = new float[matrixColIndex][matrixColIndex];
379         }
380         else if (matrix != null)
381         {
382            // Read matrix data
383            String pieces[] = line.split("\\s+");
384            char residue = pieces[0].charAt(0);
385//            int residueIndex = mResidueIndex.get(residue);
386            int residueIndex = residueIndexLookup[(int)residue];
387
388            for (int i = 1; i < pieces.length; i++)
389            {
390               matrix[residueIndex][i - 1] = Float.parseFloat(pieces[i]);
391            }
392         }
393      }
394
395      mResidueIndexLookup = residueIndexLookup;
396      return matrix;
397   }
398
399   //---------------------------------------------------------------------------
400   private int[] getResidueIndexLookup()
401   {
402      if (null == mResidueIndexLookup)
403      {
404         init();
405      }
406
407      return mResidueIndexLookup;
408   }
409}