001package com.hfg.bio.seq.alignment.matrix; 002 003import java.io.*; 004import java.util.ArrayList; 005import java.util.Collection; 006import java.util.HashMap; 007import java.util.List; 008import java.util.Map; 009import java.util.regex.Pattern; 010 011import com.hfg.exception.ProgrammingException; 012import com.hfg.util.StringBuilderPlus; 013import com.hfg.util.StringUtil; 014 015 016//------------------------------------------------------------------------------ 017/** 018 Substitution matrix for use with pairwise sequence alignments. 019 <div> 020 @author J. Alex Taylor, hairyfatguy.com 021 </div> 022 */ 023//------------------------------------------------------------------------------ 024// com.hfg XML/HTML Coding Library 025// 026// This library is free software; you can redistribute it and/or 027// modify it under the terms of the GNU Lesser General Public 028// License as published by the Free Software Foundation; either 029// version 2.1 of the License, or (at your option) any later version. 030// 031// This library is distributed in the hope that it will be useful, 032// but WITHOUT ANY WARRANTY; without even the implied warranty of 033// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 034// Lesser General Public License for more details. 035// 036// You should have received a copy of the GNU Lesser General Public 037// License along with this library; if not, write to the Free Software 038// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 039// 040// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 041// jataylor@hairyfatguy.com 042//------------------------------------------------------------------------------ 043 044public class SubstitutionMatrix 045{ 046 private String mName; 047 private float[][] mMatrix; 048 private List<Character> mOrderedResidues; 049 private int[] mResidueIndexLookup; 050 private String mToStringCache; 051 052 private static Pattern sNumPattern = Pattern.compile("\\d+"); 053 054 private static Map<String, SubstitutionMatrix> sInstanceMap = new HashMap<>(); 055 056 057 public static final SubstitutionMatrix IDENTITY = new SubstitutionMatrix("IDENTITY"); 058 public static final SubstitutionMatrix IDENTITY_GAP = new SubstitutionMatrix("IDENTITY_GAP"); 059 public static final SubstitutionMatrix NUCLEOTIDE = new SubstitutionMatrix("NUCLEOTIDE"); 060 061 public static final SubstitutionMatrix PAM10 = new SubstitutionMatrix("PAM10"); 062 public static final SubstitutionMatrix PAM100 = new SubstitutionMatrix("PAM100"); 063 public static final SubstitutionMatrix PAM120 = new SubstitutionMatrix("PAM120"); 064 public static final SubstitutionMatrix PAM250 = new SubstitutionMatrix("PAM250"); 065 066 public static final SubstitutionMatrix BLOSUM62 = new SubstitutionMatrix("BLOSUM62"); 067 public static final SubstitutionMatrix BLOSUM90 = new SubstitutionMatrix("BLOSUM90"); 068 public static final SubstitutionMatrix BLOSUM90_GAP = new SubstitutionMatrix("BLOSUM90_GAP"); 069 070 //########################################################################### 071 // CONSTRUCTORS 072 //########################################################################### 073 074 //--------------------------------------------------------------------------- 075 /** 076 Standard matrices will be retrievable from resource files. 077 */ 078 private SubstitutionMatrix(String inName) 079 { 080 mName = inName; 081 sInstanceMap.put(mName, this); 082 } 083 084 //--------------------------------------------------------------------------- 085 /** 086 Constructor using a BufferedReader. 087 @param inName the name to use for the substitution matrix 088 @param inReader reader containing the substitution matrix data 089 @throws IOException if a problem is encountered reading from the specified Reader 090 */ 091 public SubstitutionMatrix(String inName, BufferedReader inReader) 092 throws IOException 093 { 094 this(inName); 095 mMatrix = readMatrixData(inReader); 096 } 097 098 099 100 //########################################################################### 101 // PUBLIC METHODS 102 //########################################################################### 103 104 //--------------------------------------------------------------------------- 105 public String name() 106 { 107 return mName; 108 } 109 110 //--------------------------------------------------------------------------- 111 @Override 112 public String toString() 113 { 114 if (null == mToStringCache) 115 { 116 StringBuilderPlus buffer = new StringBuilderPlus(); 117 buffer.appendln("# " + name()); 118 119 for (char residue : mOrderedResidues) 120 { 121 buffer.append(String.format("%5s ", residue + "")); 122 } 123 buffer.appendln(); 124 125 for (char residue1 : mOrderedResidues) 126 { 127 buffer.append(String.format("%5s ", residue1 + "")); 128 129 for (char residue2 : mOrderedResidues) 130 { 131 buffer.append(String.format("%5.1f ", score(residue1, residue2))); 132 } 133 buffer.appendln(); 134 } 135 mToStringCache = buffer.toString(); 136 } 137 138 return mToStringCache; 139 } 140 141 //--------------------------------------------------------------------------- 142 /** 143 Returns whether or not this substitution matrix supports the specified character. 144 * @param inResidue the character to be checked for inclusion in this matrix 145 * @return whether or not this substitution matrix supports the specified character 146 */ 147 public boolean contains(char inResidue) 148 { 149 return mOrderedResidues.contains(inResidue); 150 } 151 152 //--------------------------------------------------------------------------- 153 public int getResidueIndex(char inResidue) 154 { 155 int[] residueIndexLookup = getResidueIndexLookup(); 156 if (((int) inResidue) >= residueIndexLookup.length) 157 { 158 throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(inResidue) + "!"); 159 } 160 161 int residueIndex = residueIndexLookup[(int) inResidue]; 162 if (-1 == residueIndex) 163 { 164 throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(inResidue) + "!"); 165 } 166 167 return residueIndex; 168 } 169 170 //--------------------------------------------------------------------------- 171 // For performance reasons some method contents are included here directly. 172 public int[] getResidueIndicesForSequence(String inSequence) 173 { 174 if (null == mResidueIndexLookup) 175 { 176 init(); 177 } 178 179 int length = inSequence.length(); 180 int indices[] = new int[length]; 181 182 for (int i = 0; i < length; i++) 183 { 184 char theChar = inSequence.charAt(i); 185 186 if (((int) theChar) >= mResidueIndexLookup.length) 187 { 188 throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(theChar) + "!"); 189 } 190 191 int residueIndex = mResidueIndexLookup[(int) theChar]; 192 if (-1 == residueIndex) 193 { 194 throw new RuntimeException("The substitution matrix " + name() + " doesn't support the character " + StringUtil.singleQuote(theChar) + "!"); 195 } 196 197 indices[i] = residueIndex; 198 } 199 200 return indices; 201 } 202 203 //--------------------------------------------------------------------------- 204 public float score(char inResidue1, char inResidue2) 205 { 206 int index1 = getResidueIndex(inResidue1); 207 int index2 = getResidueIndex(inResidue2); 208 209 return getRawMatrix()[index1][index2]; 210 } 211 212 //--------------------------------------------------------------------------- 213 public float scoreCaseInsensitive(char inResidue1, char inResidue2) 214 { 215 return score(Character.toUpperCase(inResidue1), Character.toUpperCase(inResidue2)); 216 } 217 218 //--------------------------------------------------------------------------- 219 public float[][] getRawMatrix() 220 { 221 if (null == mMatrix) 222 { 223 init(); 224 } 225 226 return mMatrix; 227 } 228 229 //--------------------------------------------------------------------------- 230 public static Collection<SubstitutionMatrix> values() 231 { 232 return sInstanceMap.values(); 233 } 234 235 //########################################################################### 236 // PRIVATE METHODS 237 //########################################################################### 238 239 //--------------------------------------------------------------------------- 240 private synchronized void init() 241 { 242 if (null == mMatrix) 243 { 244 if (StringUtil.isSet(name())) 245 { 246 mMatrix = readMatrixDataFromRsrc(); 247 } 248 else 249 { 250 throw new ProgrammingException("The substitution matrix has not been properly initialized!"); 251 } 252 } 253 } 254 255 //--------------------------------------------------------------------------- 256 private float[][] readMatrixDataFromRsrc() 257 { 258 float[][] matrix; 259 260 InputStream rsrcStream = this.getClass().getResourceAsStream(name() + ".mat"); 261 if (null == rsrcStream) 262 { 263 // Try looking in the pam directory 264 rsrcStream = this.getClass().getResourceAsStream("pam/" + name() + ".mat"); 265 if (null == rsrcStream) 266 { 267 // Try looking in the blosum directory 268 rsrcStream = this.getClass().getResourceAsStream("blosum/" + name() + ".mat"); 269 if (null == rsrcStream) 270 { 271 throw new ProgrammingException("The rsrc for substitution matrix " + StringUtil.singleQuote(name()) + " couldn't be located!?"); 272 } 273 } 274 } 275 276 try 277 { 278 BufferedReader reader = new BufferedReader(new InputStreamReader(rsrcStream)); 279 matrix = readMatrixData(reader); 280 reader.close(); 281 } 282 catch (IOException e) 283 { 284 throw new RuntimeException("Problem reading data for substitution matrix " + StringUtil.singleQuote(name()) + "!", e); 285 } 286 287 return matrix; 288 } 289 290 //--------------------------------------------------------------------------- 291 private void readMatrixDataFromFile(File inFile) 292 throws IOException 293 { 294 if (null == inFile) 295 { 296 throw new IOException("No substitution matrix file was specified!"); 297 } 298 else if (! inFile.exists()) 299 { 300 throw new IOException("The substitution matrix file " + StringUtil.singleQuote(inFile.getPath()) + " does not exist!"); 301 } 302 else if (! inFile.canRead()) 303 { 304 throw new IOException("No read permissions for the substitution matrix file " + StringUtil.singleQuote(inFile.getPath()) + "!"); 305 } 306 307 BufferedReader reader = null; 308 try 309 { 310 reader = new BufferedReader(new FileReader(inFile)); 311 readMatrixData(reader); 312 } 313 finally 314 { 315 if (reader != null) 316 { 317 reader.close(); 318 } 319 } 320 } 321 322 //--------------------------------------------------------------------------- 323 private float[][] readMatrixData(BufferedReader inReader) 324 throws IOException 325 { 326 float[][] matrix = null; 327 328 // For performance, we will use an int[] instead of a Map to lookup the proper matrix position for a given residue 329 int[] residueIndexLookup = null; 330 String line; 331 while ((line = inReader.readLine()) != null) 332 { 333 line = line.trim(); 334 335 if (0 == line.length() // Skip blank lines 336 || line.startsWith("#")) // Skip comment lines 337 { 338 continue; 339 } 340 341 // Read the header line of residues 342 if (null == residueIndexLookup 343 && ! sNumPattern.matcher(line).find()) 344 { 345// mResidueIndex = new OrderedMap<Character, Integer>(); 346 int inex = 0; 347 int maxIndex = -1; 348 String pieces[] = line.split("\\s+"); 349 for (String piece : pieces) 350 { 351 if (piece.length() > 1) 352 { 353 throw new RuntimeException("The matrix header line " + StringUtil.singleQuote(line) + " is not in the proper format!"); 354 } 355 356 if (((int)piece.charAt(0)) > maxIndex) 357 { 358 maxIndex = (int) piece.charAt(0); 359 } 360// mResidueIndex.put(piece.charAt(0), index++); 361 } 362 363 residueIndexLookup = new int[maxIndex + 1]; 364 mOrderedResidues = new ArrayList<>(pieces.length); 365 for (int i = 0; i <= maxIndex; i++) 366 { 367 residueIndexLookup[i] = -1; 368 } 369 370 int matrixColIndex = 0; 371 for (String piece : pieces) 372 { 373 residueIndexLookup[(int) piece.charAt(0)] = matrixColIndex++; 374 mOrderedResidues.add(piece.charAt(0)); 375 } 376 377// matrix = new float[mResidueIndex.size()][mResidueIndex.size()]; 378 matrix = new float[matrixColIndex][matrixColIndex]; 379 } 380 else if (matrix != null) 381 { 382 // Read matrix data 383 String pieces[] = line.split("\\s+"); 384 char residue = pieces[0].charAt(0); 385// int residueIndex = mResidueIndex.get(residue); 386 int residueIndex = residueIndexLookup[(int)residue]; 387 388 for (int i = 1; i < pieces.length; i++) 389 { 390 matrix[residueIndex][i - 1] = Float.parseFloat(pieces[i]); 391 } 392 } 393 } 394 395 mResidueIndexLookup = residueIndexLookup; 396 return matrix; 397 } 398 399 //--------------------------------------------------------------------------- 400 private int[] getResidueIndexLookup() 401 { 402 if (null == mResidueIndexLookup) 403 { 404 init(); 405 } 406 407 return mResidueIndexLookup; 408 } 409}