001package com.hfg.bio.seq.alignment; 002 003import java.util.List; 004 005import com.hfg.bio.HfgBioXML; 006import com.hfg.exception.ProgrammingException; 007import com.hfg.math.Range; 008import com.hfg.util.CompareUtil; 009import com.hfg.util.StringUtil; 010import com.hfg.xml.XMLNode; 011import com.hfg.xml.XMLTag; 012 013//------------------------------------------------------------------------------ 014/** 015 Pairwise sequence alignment scoring data. 016 <div> 017 @author J. Alex Taylor, hairyfatguy.com 018 </div> 019 */ 020//------------------------------------------------------------------------------ 021// com.hfg XML/HTML Coding Library 022// 023// This library is free software; you can redistribute it and/or 024// modify it under the terms of the GNU Lesser General Public 025// License as published by the Free Software Foundation; either 026// version 2.1 of the License, or (at your option) any later version. 027// 028// This library is distributed in the hope that it will be useful, 029// but WITHOUT ANY WARRANTY; without even the implied warranty of 030// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 031// Lesser General Public License for more details. 032// 033// You should have received a copy of the GNU Lesser General Public 034// License along with this library; if not, write to the Free Software 035// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 036// 037// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 038// jataylor@hairyfatguy.com 039//------------------------------------------------------------------------------ 040 041public class AlignmentScoring implements Cloneable, Comparable<AlignmentScoring> 042{ 043 private List<Range<Integer>> mScoringBoundaries; // 1-based 044 private Float mRawScore; 045 private Integer mNumIdentities; 046 private Integer mComparisonLength; 047 private Integer mAdjustedComparisonLength; 048 private Float mPctIdentity; 049 private Float mAdjustedPctIdentity; 050 private Float mEValue; 051 private Float mPValue; 052 private Float mZScore; 053 private Integer mQueryInternalGapLength; 054 055 //########################################################################### 056 // CONSTRUCTORS 057 //########################################################################### 058 059 //--------------------------------------------------------------------------- 060 public AlignmentScoring() 061 { 062 } 063 064 //--------------------------------------------------------------------------- 065 public AlignmentScoring(XMLNode inXMLNode) 066 { 067 if (! inXMLNode.getTagName().equals(HfgBioXML.ALIGNMENT_SCORING)) 068 { 069 throw new RuntimeException("Cannot construct a " + this.getClass().getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!"); 070 } 071 072 073 String rawScoreString = inXMLNode.getAttributeValue(HfgBioXML.SCORE_ATT); 074 if (StringUtil.isSet(rawScoreString)) 075 { 076 setScore(Float.parseFloat(rawScoreString)); 077 } 078 079 String numIdentititesString = inXMLNode.getAttributeValue(HfgBioXML.NUM_IDENTITIES_ATT); 080 if (StringUtil.isSet(numIdentititesString)) 081 { 082 setNumIdentities(Integer.parseInt(numIdentititesString)); 083 } 084 085 String comparisionLenString = inXMLNode.getAttributeValue(HfgBioXML.COMPARISON_LENGTH_ATT); 086 if (StringUtil.isSet(comparisionLenString)) 087 { 088 setComparisonLength(Integer.parseInt(comparisionLenString)); 089 } 090 } 091 092 //########################################################################### 093 // PUBLIC METHODS 094 //########################################################################### 095 096 //--------------------------------------------------------------------------- 097 public AlignmentScoring combine(AlignmentScoring inValue) 098 { 099 AlignmentScoring result = clone(); 100 101 if (inValue != null 102 && inValue.getComparisonLength() > 0) 103 { 104 result.incrementComparisonLength(inValue.getComparisonLength()); 105 result.incrementNumIdentities(inValue.getNumIdentities()); 106 107 if (result.mAdjustedComparisonLength != null 108 && inValue.mAdjustedComparisonLength != null) 109 { 110 result.mAdjustedComparisonLength += inValue.mAdjustedComparisonLength; 111 } 112 113 if (inValue.mQueryInternalGapLength != null) 114 { 115 if (null == result.mQueryInternalGapLength) 116 { 117 result.mQueryInternalGapLength = inValue.mQueryInternalGapLength; 118 } 119 else 120 { 121 result.mQueryInternalGapLength += inValue.mQueryInternalGapLength; 122 } 123 } 124 125 if (result.getScore() != null 126 && inValue.getScore() != null) 127 { 128 result.setScore(result.getScore() + inValue.getScore()); 129 } 130 131 if (result.getPValue() != null 132 && inValue.getPValue() != null) 133 { 134 result.setPValue(result.getPValue() * inValue.getPValue()); 135 } 136 137 // TODO: E-value & Z-score 138 // TODO: Scoring boundaries? 139 140 result.mPctIdentity = null; 141 result.mAdjustedPctIdentity = null; 142 } 143 144 return result; 145 } 146 147 //--------------------------------------------------------------------------- 148 @Override 149 public AlignmentScoring clone() 150 { 151 AlignmentScoring cloneObj; 152 try 153 { 154 cloneObj = (AlignmentScoring) super.clone(); 155 } 156 catch (Exception e) 157 { 158 throw new ProgrammingException(e); 159 } 160 161 return cloneObj; 162 } 163 164 //-------------------------------------------------------------------------- 165 @Override 166 public boolean equals(Object inObj2) 167 { 168 return (inObj2 != null 169 && inObj2 instanceof AlignmentScoring 170 && 0 == compareTo((AlignmentScoring) inObj2)); 171 } 172 173 //-------------------------------------------------------------------------- 174 @Override 175 public int hashCode() 176 { 177 int hashCode = 0; 178 if (getScore() != null) 179 { 180 hashCode += getScore().hashCode(); 181 } 182 183 hashCode += 31 * getNumIdentities(); 184 185 return hashCode; 186 } 187 188 //-------------------------------------------------------------------------- 189 @Override 190 public int compareTo(AlignmentScoring inObj2) 191 { 192 int score = - CompareUtil.compare(getScore(), inObj2.getScore()); 193 if (0 == score) 194 { 195 score = - CompareUtil.compare(getNumIdentities(), inObj2.getNumIdentities()); 196 } 197 198 return score; 199 } 200 201 //--------------------------------------------------------------------------- 202 @Override 203 public String toString() 204 { 205 return String.format("%.2f: [%d / %d]", getScore(), getNumIdentities(), getComparisonLength()); 206 } 207 208 //-------------------------------------------------------------------------- 209 public XMLNode toXMLNode() 210 { 211 XMLTag tag = new XMLTag(HfgBioXML.ALIGNMENT_SCORING); 212 213 if (getScore() != null) 214 { 215 tag.setAttribute(HfgBioXML.SCORE_ATT, getScore()); 216 } 217 218 if (getComparisonLength() != 0) 219 { 220 tag.setAttribute(HfgBioXML.NUM_IDENTITIES_ATT, getNumIdentities()); 221 } 222 223 if (getComparisonLength() != 0) 224 { 225 tag.setAttribute(HfgBioXML.COMPARISON_LENGTH_ATT, getComparisonLength()); 226 } 227 228 229 return tag; 230 } 231 232 //--------------------------------------------------------------------------- 233 public void clear() 234 { 235 mNumIdentities = null; 236 mComparisonLength = null; 237 mAdjustedComparisonLength = null; 238 mPctIdentity = null; 239 mAdjustedPctIdentity = null; 240 mEValue = null; 241 mPValue = null; 242 mZScore = null; 243 } 244 245 //--------------------------------------------------------------------------- 246 /** 247 Specifies an optional 1-based list of alignment position ranges which should 248 be considered for scoring. 249 @param inValue List of 1-based Ranges to score 250 @return this AlignmentScoring object to enable method chaining 251 */ 252 public AlignmentScoring setScoringBoundaries(List<Range<Integer>> inValue) 253 { 254 mScoringBoundaries = inValue; 255 return this; 256 } 257 258 //--------------------------------------------------------------------------- 259 public List<Range<Integer>> getScoringBoundaries() 260 { 261 return mScoringBoundaries; 262 } 263 264 //--------------------------------------------------------------------------- 265 public AlignmentScoring setScore(Float inValue) 266 { 267 mRawScore = inValue; 268 return this; 269 } 270 271 //--------------------------------------------------------------------------- 272 public Float getScore() 273 { 274 return mRawScore; 275 } 276 277 //--------------------------------------------------------------------------- 278 public AlignmentScoring setNumIdentities(Integer inValue) 279 { 280 mNumIdentities = inValue; 281 return this; 282 } 283 284 //--------------------------------------------------------------------------- 285 public AlignmentScoring incrementNumIdentities() 286 { 287 return incrementNumIdentities(1); 288 } 289 290 //--------------------------------------------------------------------------- 291 public AlignmentScoring incrementNumIdentities(int inValue) 292 { 293 mNumIdentities = (mNumIdentities != null ? mNumIdentities : 0) + inValue; 294 return this; 295 } 296 297 //--------------------------------------------------------------------------- 298 public int getNumIdentities() 299 { 300 return mNumIdentities != null ? mNumIdentities : 0; 301 } 302 303 304 //--------------------------------------------------------------------------- 305 public AlignmentScoring setComparisonLength(Integer inValue) 306 { 307 mComparisonLength = inValue; 308 return this; 309 } 310 311 //--------------------------------------------------------------------------- 312 public AlignmentScoring incrementComparisonLength() 313 { 314 return incrementComparisonLength(1); 315 } 316 317 //--------------------------------------------------------------------------- 318 public AlignmentScoring incrementComparisonLength(int inValue) 319 { 320 mComparisonLength = (mComparisonLength != null ? mComparisonLength : 0) + inValue; 321 return this; 322 } 323 324 //--------------------------------------------------------------------------- 325 public int getComparisonLength() 326 { 327 return mComparisonLength != null ? mComparisonLength : 0; 328 } 329 330 //--------------------------------------------------------------------------- 331 /** 332 Doesn't penalize sequence's terminal gaps. 333 @return the adjusted comparison length 334 */ 335 public int getAdjustedComparisonLength() 336 { 337 return mAdjustedComparisonLength != null ? mAdjustedComparisonLength : mComparisonLength != null ? mComparisonLength : 0; 338 } 339 340 //--------------------------------------------------------------------------- 341 public Float getPctIdentity() 342 { 343 if (null == mPctIdentity 344 && mComparisonLength != null) 345 { 346 mPctIdentity = mComparisonLength > 0 && mNumIdentities != null ? 100f * mNumIdentities / mComparisonLength.floatValue() : 0; 347 } 348 349 return mPctIdentity; 350 } 351 352 //--------------------------------------------------------------------------- 353 /** 354 Sets the percent identity that doesn't penalize sequence's terminal gaps. 355 @return this AlignmentScoring object to enable method chaining 356 */ 357 public AlignmentScoring setAdjustedPctIdentity(Float inValue) 358 { 359 mAdjustedPctIdentity = inValue; 360 return this; 361 } 362 363 //--------------------------------------------------------------------------- 364 /** 365 Doesn't penalize sequence's terminal gaps. 366 @return the adjusted percent identity 367 */ 368 public Float getAdjustedPctIdentity() 369 { 370 if (null == mAdjustedPctIdentity 371 && getAdjustedComparisonLength() > 0) 372 { 373 mAdjustedPctIdentity = mNumIdentities != null ? 100f * mNumIdentities / (float) getAdjustedComparisonLength() : 0; 374 } 375 376 return mAdjustedPctIdentity != null ? mAdjustedPctIdentity : getPctIdentity(); 377 } 378 379 380 //--------------------------------------------------------------------------- 381 public AlignmentScoring setEValue(Float inValue) 382 { 383 mEValue = inValue; 384 return this; 385 } 386 387 //--------------------------------------------------------------------------- 388 public Float getEValue() 389 { 390 return mEValue; 391 } 392 393 //--------------------------------------------------------------------------- 394 public AlignmentScoring setPValue(Float inValue) 395 { 396 mPValue = inValue; 397 return this; 398 } 399 400 //--------------------------------------------------------------------------- 401 public Float getPValue() 402 { 403 return mPValue; 404 } 405 406 //--------------------------------------------------------------------------- 407 public AlignmentScoring setZScore(Float inValue) 408 { 409 mZScore = inValue; 410 return this; 411 } 412 413 //--------------------------------------------------------------------------- 414 public Float getZScore() 415 { 416 return mZScore; 417 } 418 419 420 //--------------------------------------------------------------------------- 421 /** 422 Returns the total number of non-terminal query gap positions where the subject is not a gap. 423 * @return the total number of non-terminal query gap positions where the subject is not a gap 424 */ 425 public Integer getQueryInternalGapLength() 426 { 427 return mQueryInternalGapLength; 428 } 429 430 //--------------------------------------------------------------------------- 431 public void calculate(PairwiseSeqAlignment inAlignment) 432 { 433 int numIdentities = 0; 434 int comparisonLength = 0; 435 int adjustedComparisonLength = 0; 436 int queryInternalGapLength = 0; // The total number of non-terminal query gap positions where the subject is not a gap 437 438 AlignedQuery alignedQuery = inAlignment.getAlignedQuery(); 439 AlignedSubject alignedSubject = inAlignment.getAlignedSubject(); 440 441 String queryResidues = alignedQuery.getAlignedSeq().toLowerCase(); // Lower-casing for case-insensitive comparison 442 String subjectResidues = alignedSubject.getAlignedSeq().toLowerCase(); 443 444 445 Integer lastQueryResidueIndex = queryResidues.length() - 1; 446 if (null == alignedQuery.getSeq() 447 || alignedQuery.getSeqLocation().getEnd().equals(alignedQuery.getSeq().length())) 448 { 449 for (int i = queryResidues.length() - 1; i >= 0; i--) 450 { 451 char queryResidue = queryResidues.charAt(i); 452 if (queryResidue != '-') 453 { 454 lastQueryResidueIndex = i; 455 break; 456 } 457 } 458 } 459 460 Integer lastSubjectResidueIndex = subjectResidues.length() - 1; 461 if (null == alignedSubject.getSeq() 462 || alignedSubject.getSeqLocation().getEnd().equals(alignedSubject.getSeq().length())) 463 { 464 for (int i = subjectResidues.length() - 1; i >= 0; i--) 465 { 466 char subjectResidue = subjectResidues.charAt(i); 467 if (subjectResidue != '-') 468 { 469 lastSubjectResidueIndex = i; 470 break; 471 } 472 } 473 } 474 475 int lastIndex = Math.min(lastQueryResidueIndex, lastSubjectResidueIndex); 476 477 // Did the query start before this alignment? 478 boolean queryStarted = alignedQuery.getSeqLocation() != null 479 && alignedQuery.getSeqLocation().getStart() != null 480 && alignedQuery.getSeqLocation().getStart() > 1; 481 482 // Did the subject start before this alignment? 483 boolean subjectStarted = alignedSubject.getSeqLocation() != null 484 && alignedSubject.getSeqLocation().getStart() != null 485 && alignedSubject.getSeqLocation().getStart() > 1; 486 487 boolean firstAlignedQueryResidueEncountered = false; 488 489 for (int i = 0; i < queryResidues.length(); i++) 490 { 491 char queryResidue = queryResidues.charAt(i); 492 char subjectResidue = subjectResidues.charAt(i); 493 494 if (! firstAlignedQueryResidueEncountered 495 && queryResidue != '-') 496 { 497 firstAlignedQueryResidueEncountered = true; 498 499 if (! queryStarted) 500 { 501 queryStarted = true; 502 } 503 } 504 505 506 if (! subjectStarted 507 && subjectResidue != '-') 508 { 509 subjectStarted = true; 510 } 511 512 // Default to true if no boundaries were specified 513 boolean isInScoringRegion = true; 514 if (mScoringBoundaries != null) 515 { 516 isInScoringRegion = false; 517 for (Range<Integer> range : mScoringBoundaries) 518 { 519 if (range.contains(i + 1)) 520 { 521 isInScoringRegion = true; 522 break; 523 } 524 } 525 } 526 527 if ((queryResidue != '-' || subjectResidue != '-') 528 && isInScoringRegion) 529 { 530 if (queryResidue == subjectResidue) 531 { 532 numIdentities++; 533 } 534 535 comparisonLength++; 536 537 if (queryStarted 538 && subjectStarted 539 && i <= lastIndex) 540 { 541 adjustedComparisonLength++; 542 } 543 } 544 545 if (queryResidue == '-' 546 && subjectResidue != '-' 547 && firstAlignedQueryResidueEncountered 548 && isInScoringRegion 549 && i < lastQueryResidueIndex) 550 { 551 queryInternalGapLength++; 552 } 553 } 554 555 mNumIdentities = numIdentities; 556 mComparisonLength = comparisonLength; 557 mAdjustedComparisonLength = adjustedComparisonLength; 558 mQueryInternalGapLength = queryInternalGapLength; 559 } 560}