001package com.hfg.bio.seq.alignment; 002 003import java.util.HashSet; 004import java.util.Set; 005 006import com.hfg.bio.HfgBioXML; 007import com.hfg.bio.seq.BioSequence; 008import com.hfg.bio.seq.alignment.matrix.SubstitutionMatrix; 009import com.hfg.exception.ProgrammingException; 010import com.hfg.util.CompareUtil; 011import com.hfg.util.StringBuilderPlus; 012import com.hfg.util.StringUtil; 013import com.hfg.xml.XMLNode; 014import com.hfg.xml.XMLTag; 015 016//------------------------------------------------------------------------------ 017/** 018 Pairwise sequence alignment container. 019 <div> 020 @author J. Alex Taylor, hairyfatguy.com 021 </div> 022 */ 023//------------------------------------------------------------------------------ 024// com.hfg XML/HTML Coding Library 025// 026// This library is free software; you can redistribute it and/or 027// modify it under the terms of the GNU Lesser General Public 028// License as published by the Free Software Foundation; either 029// version 2.1 of the License, or (at your option) any later version. 030// 031// This library is distributed in the hope that it will be useful, 032// but WITHOUT ANY WARRANTY; without even the implied warranty of 033// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 034// Lesser General Public License for more details. 035// 036// You should have received a copy of the GNU Lesser General Public 037// License along with this library; if not, write to the Free Software 038// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 039// 040// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 041// jataylor@hairyfatguy.com 042//------------------------------------------------------------------------------ 043 044public class PairwiseSeqAlignment implements Cloneable, Comparable<PairwiseSeqAlignment> 045{ 046 private PairwiseSettings mSettings; 047 private AlignedQuery mAlignedQuery; 048 private AlignedSubject mAlignedSubject; 049 private Integer mAlignmentLength; 050 private AlignmentScoring mScoring = new AlignmentScoring(); 051 052 //########################################################################### 053 // CONSTRUCTORS 054 //########################################################################### 055 056 //--------------------------------------------------------------------------- 057 public PairwiseSeqAlignment(AlignedQuery inAlignedQuery, AlignedSubject inAlignedSubject) 058 { 059 mAlignedQuery = inAlignedQuery; 060 mAlignedSubject = inAlignedSubject; 061 } 062 063 //--------------------------------------------------------------------------- 064 public PairwiseSeqAlignment(XMLNode inXMLNode) 065 { 066 if (! inXMLNode.getTagName().equals(HfgBioXML.ALIGNMENT)) 067 { 068 throw new RuntimeException("Cannot construct a " + this.getClass().getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!"); 069 } 070 071 mAlignedQuery = new AlignedQuery(inXMLNode.getRequiredSubtagByName(HfgBioXML.ALIGNED_QUERY)); 072 mAlignedSubject = new AlignedSubject(inXMLNode.getRequiredSubtagByName(HfgBioXML.ALIGNED_SUBJECT)); 073 074 String rawScoreString = inXMLNode.getAttributeValue(HfgBioXML.SCORE_ATT); 075 if (StringUtil.isSet(rawScoreString)) 076 { 077 setScore(Float.parseFloat(rawScoreString)); 078 } 079 } 080 081 //########################################################################### 082 // PUBLIC METHODS 083 //########################################################################### 084 085 //--------------------------------------------------------------------------- 086 @Override 087 public PairwiseSeqAlignment clone() 088 { 089 PairwiseSeqAlignment cloneObj; 090 try 091 { 092 cloneObj = (PairwiseSeqAlignment) super.clone(); 093 } 094 catch (Exception e) 095 { 096 throw new ProgrammingException(e); 097 } 098 099 if (mSettings != null) 100 { 101 cloneObj.mSettings = mSettings.clone(); 102 } 103 104 if (mAlignedQuery != null) 105 { 106 cloneObj.mAlignedQuery = mAlignedQuery.clone(); 107 } 108 109 if (mAlignedSubject != null) 110 { 111 cloneObj.mAlignedSubject = mAlignedSubject.clone(); 112 } 113 114 return cloneObj; 115 } 116 117 //-------------------------------------------------------------------------- 118 @Override 119 public boolean equals(Object inObj2) 120 { 121 return (inObj2 != null 122 && inObj2 instanceof PairwiseSeqAlignment 123 && 0 == compareTo((PairwiseSeqAlignment) inObj2)); 124 } 125 126 //-------------------------------------------------------------------------- 127 @Override 128 public int hashCode() 129 { 130 int hashCode = 0; 131 if (getScore() != null) 132 { 133 hashCode += getScore().hashCode(); 134 } 135 136 if (getNumIdentities() != null) 137 { 138 hashCode += 31 * getNumIdentities().hashCode(); 139 } 140 141 return hashCode; 142 } 143 144 //-------------------------------------------------------------------------- 145 @Override 146 public int compareTo(PairwiseSeqAlignment inObj2) 147 { 148 int score = - CompareUtil.compare(getScore(), inObj2.getScore()); 149 if (0 == score) 150 { 151 score = - CompareUtil.compare(getNumIdentities(), inObj2.getNumIdentities()); 152 } 153 154 return score; 155 } 156 157 //--------------------------------------------------------------------------- 158 public PairwiseSeqAlignment setSettings(PairwiseSettings inValue) 159 { 160 mSettings = inValue; 161 return this; 162 } 163 164 //--------------------------------------------------------------------------- 165 public PairwiseSettings getSettings() 166 { 167 return mSettings; 168 } 169 170 //--------------------------------------------------------------------------- 171 public void clearCachedValues() 172 { 173 mAlignmentLength = null; 174 mScoring.clear(); 175 } 176 177 //--------------------------------------------------------------------------- 178 public PairwiseSeqAlignment setAlignedQuery(AlignedQuery inValue) 179 { 180 mAlignedQuery = inValue; 181 clearCachedValues(); 182 return this; 183 } 184 185 //--------------------------------------------------------------------------- 186 public AlignedQuery getAlignedQuery() 187 { 188 return mAlignedQuery; 189 } 190 191 //--------------------------------------------------------------------------- 192 public PairwiseSeqAlignment setAlignedSubject(AlignedSubject inValue) 193 { 194 mAlignedSubject = inValue; 195 clearCachedValues(); 196 return this; 197 } 198 199 //--------------------------------------------------------------------------- 200 public AlignedSubject getAlignedSubject() 201 { 202 return mAlignedSubject; 203 } 204 205 //--------------------------------------------------------------------------- 206 public Integer length() 207 { 208 if (null == mAlignmentLength) 209 { 210 mAlignmentLength = getAlignedQuery().getAlignedSeq().length(); 211 } 212 213 return mAlignmentLength; 214 } 215 216 //--------------------------------------------------------------------------- 217 /** 218 Returns the alignment scoring object. 219 * @return the alignment scoring object 220 */ 221 public AlignmentScoring getScoring() 222 { 223 ensureScoringIdentityStatsAreCalculated(); 224 return mScoring; 225 } 226 227 //--------------------------------------------------------------------------- 228 public PairwiseSeqAlignment setScore(Float inValue) 229 { 230 mScoring.setScore(inValue); 231 return this; 232 } 233 234 //--------------------------------------------------------------------------- 235 public Float getScore() 236 { 237 return mScoring.getScore(); 238 } 239 240 //--------------------------------------------------------------------------- 241 public Integer getNumIdentities() 242 { 243 ensureScoringIdentityStatsAreCalculated(); 244 return mScoring.getNumIdentities(); 245 } 246 247 //--------------------------------------------------------------------------- 248 public Integer getComparisonLength() 249 { 250 ensureScoringIdentityStatsAreCalculated(); 251 return mScoring.getComparisonLength(); 252 } 253 254 //--------------------------------------------------------------------------- 255 /** 256 Doesn't penalize sequence's terminal gaps. 257 @return the adjusted comparison length 258 */ 259 public Integer getAdjustedComparisonLength() 260 { 261 ensureScoringIdentityStatsAreCalculated(); 262 return mScoring.getAdjustedComparisonLength(); 263 } 264 265 //--------------------------------------------------------------------------- 266 public Float getPctIdentity() 267 { 268 ensureScoringIdentityStatsAreCalculated(); 269 return mScoring.getPctIdentity(); 270 } 271 272 //--------------------------------------------------------------------------- 273 /** 274 Doesn't penalize sequence's terminal gaps. 275 @return the adjusted percent identity 276 */ 277 public Float getAdjustedPctIdentity() 278 { 279 ensureScoringIdentityStatsAreCalculated(); 280 return mScoring.getAdjustedPctIdentity(); 281 } 282 283 284 //--------------------------------------------------------------------------- 285 public PairwiseSeqAlignment setEValue(Float inValue) 286 { 287 mScoring.setEValue(inValue); 288 return this; 289 } 290 291 //--------------------------------------------------------------------------- 292 public Float getEValue() 293 { 294 return mScoring.getEValue(); 295 } 296 297 //--------------------------------------------------------------------------- 298 public PairwiseSeqAlignment setPValue(Float inValue) 299 { 300 mScoring.setPValue(inValue); 301 return this; 302 } 303 304 //--------------------------------------------------------------------------- 305 public Float getPValue() 306 { 307 return mScoring.getPValue(); 308 } 309 310 //--------------------------------------------------------------------------- 311 public PairwiseSeqAlignment setZScore(Float inValue) 312 { 313 mScoring.setZScore(inValue); 314 return this; 315 } 316 317 //--------------------------------------------------------------------------- 318 public Float getZScore() 319 { 320 return mScoring.getZScore(); 321 } 322 323 //--------------------------------------------------------------------------- 324 /** 325 Method helpful for understanding the per-residue scoring breakdown. 326 @return ASCII display of the per-residue scoring 327 */ 328 public String showScoring() 329 { 330 SubstitutionMatrix substitutionMatrix = null; 331 if (null == getAlignedSubject().getPSSM()) 332 { 333 substitutionMatrix = getSettings().getSubstitutionMatrix(getAlignedQuery().getSeq().getType()); 334 } 335 336 StringBuilderPlus buffer = new StringBuilderPlus(); 337 338 boolean queryGapOpen = false; 339 boolean subjectGapOpen = false; 340 341 Set<Integer> queryGapExclusionIndices = new HashSet<>(); 342 if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.QUERY, PairwiseSeqTerminus.LEFT)) 343 { 344 for (int i = 0; i < length(); i++) 345 { 346 if (getAlignedQuery().getAlignedSeq().charAt(i) != '-') 347 { 348 break; 349 } 350 351 queryGapExclusionIndices.add(i); 352 } 353 } 354 355 if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.QUERY, PairwiseSeqTerminus.RIGHT)) 356 { 357 for (int i = length() - 1; i > 0; i--) 358 { 359 if (getAlignedQuery().getAlignedSeq().charAt(i) != '-') 360 { 361 break; 362 } 363 364 queryGapExclusionIndices.add(i); 365 } 366 } 367 368 Set<Integer> subjectGapExclusionIndices = new HashSet<>(); 369 if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.SUBJECT, PairwiseSeqTerminus.LEFT)) 370 { 371 for (int i = 0; i < length(); i++) 372 { 373 if (getAlignedSubject().getAlignedSeq().charAt(i) != '-') 374 { 375 break; 376 } 377 378 subjectGapExclusionIndices.add(i); 379 } 380 } 381 382 if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.SUBJECT, PairwiseSeqTerminus.RIGHT)) 383 { 384 for (int i = length() - 1; i > 0; i--) 385 { 386 if (getAlignedSubject().getAlignedSeq().charAt(i) != '-') 387 { 388 break; 389 } 390 391 subjectGapExclusionIndices.add(i); 392 } 393 } 394 395 Set<Integer> subjectGapScoreIndices = new HashSet<>(); 396 if (getSettings().getSubjectTemplateMode()) 397 { 398 StringBuilderPlus origSubject = new StringBuilderPlus(getAlignedSubject().getSeq().getSequence()); 399 for (int i = 0; i < length(); i++) 400 { 401 if (getAlignedSubject().getAlignedSeq().charAt(i) == '-') 402 { 403 if (origSubject.charAt(i) == '-') 404 { 405 subjectGapScoreIndices.add(i); 406 } 407 else 408 { 409 origSubject.insert(i, '-'); 410 } 411 } 412 } 413 } 414 415 GapPenalties queryGapPenalties = getSettings().getGapPenalties(PairwiseSeqType.QUERY); 416 GapPenalties subjectGapPenalties = getSettings().getGapPenalties(PairwiseSeqType.SUBJECT); 417 418 float totalScore = 0; 419 420 for (int i = 0; i < length(); i++) 421 { 422 buffer.append(String.format("%3d. ", i + 1)); 423 424 char queryResidue = getAlignedQuery().getAlignedSeq().charAt(i); 425 char subjectResidue = getAlignedSubject().getAlignedSeq().charAt(i); 426 427 float score = 0; 428 if (queryResidue != '-') 429 { 430 queryGapOpen = false; 431 432 if (subjectResidue != '-') 433 { 434 subjectGapOpen = false; 435 if (substitutionMatrix != null) 436 { 437 score = (getSettings().getScoreCaseInsensitive() ? 438 substitutionMatrix.scoreCaseInsensitive(queryResidue, subjectResidue) : 439 substitutionMatrix.score(queryResidue, subjectResidue)); 440 } 441 else 442 { 443 score = getAlignedSubject().getPSSM().score(i + 1, queryResidue); 444 } 445 } 446 else 447 { 448 if (subjectGapScoreIndices.contains(i)) 449 { 450 if (substitutionMatrix != null) 451 { 452 score = (getSettings().getScoreCaseInsensitive() ? 453 substitutionMatrix.scoreCaseInsensitive(queryResidue, subjectResidue) : 454 substitutionMatrix.score(queryResidue, subjectResidue)); 455 } 456 else 457 { 458 score = getAlignedSubject().getPSSM().score(i + 1, queryResidue); 459 } 460 } 461 else if (! subjectGapExclusionIndices.contains(i)) 462 { 463 score = (subjectGapOpen ? subjectGapPenalties.getExtensionPenalty() : subjectGapPenalties.getOpenPenalty()); 464 } 465 466 subjectGapOpen = true; 467 } 468 } 469 else 470 { 471 if (subjectGapScoreIndices.contains(i)) 472 { 473 if (substitutionMatrix != null) 474 { 475 score = (getSettings().getScoreCaseInsensitive() ? 476 substitutionMatrix.scoreCaseInsensitive(queryResidue, subjectResidue) : 477 substitutionMatrix.score(queryResidue, subjectResidue)); 478 } 479 else 480 { 481 score = getAlignedSubject().getPSSM().score(i + 1, queryResidue); 482 } 483 } 484 else if (! queryGapExclusionIndices.contains(i)) 485 { 486 if (substitutionMatrix != null) 487 { 488 score = (queryGapOpen ? queryGapPenalties.getExtensionPenalty() : queryGapPenalties.getOpenPenalty()); 489 } 490 else 491 { 492 score = (queryGapOpen ? queryGapPenalties.getExtensionPenalty() * getAlignedSubject().getPSSM().getGapExtScore(i + 1) 493 : queryGapPenalties.getOpenPenalty() * getAlignedSubject().getPSSM().getGapOpenScore(i + 1)); 494 } 495 } 496 497 queryGapOpen = true; 498 } 499 500 buffer.append(String.format("%c %5.1f %c", queryResidue, score, subjectResidue)); 501 502 buffer.appendln(); 503 504 totalScore += score; 505 } 506 507 buffer.appendln(String.format("Total:%6.1f", totalScore)); 508 buffer.appendln(); 509 510 return buffer.toString(); 511 } 512 513 //--------------------------------------------------------------------------- 514 @Override 515 public String toString() 516 { 517 StringBuilderPlus buffer = new StringBuilderPlus().setDelimiter(", "); 518 if (null == getSettings() 519 || getSettings().getQueryAlignmentType().equals(PairwiseAlignmentType.LOCAL)) 520 { 521 buffer.append(String.format("Query Range: %s", getAlignedQuery().getSeqLocation().toString())); 522 } 523 524 if (null == getSettings() 525 || getSettings().getSubjectAlignmentType().equals(PairwiseAlignmentType.LOCAL)) 526 { 527 buffer.delimitedAppend(String.format("Subject Range: %s", getAlignedSubject().getSeqLocation().toString())); 528 } 529 530 buffer.delimitedAppend(String.format("Score: %.1f\n", getScore())); 531 // Show the aligned sequences with the query on top 532 buffer.appendln(getAlignedQuery().getAlignedSeq()); 533 534 // Generate the comparison string - lowercase the sequences for doing a case-insensitive comparison 535 String lcQuerySeq = getAlignedQuery().getAlignedSeq().toLowerCase(); 536 String lcSubjectSeq = getAlignedSubject().getAlignedSeq().toLowerCase(); 537 538 for (int i = 0; i < length(); i++) 539 { 540 char queryChar = lcQuerySeq.charAt(i); 541 char subjectChar = lcSubjectSeq.charAt(i); 542 buffer.append(queryChar != '-' && queryChar == subjectChar ? '|' : ' '); 543 } 544 buffer.appendln(); 545 546 // Now show the aligned subject on the bottom 547 buffer.appendln(getAlignedSubject().getAlignedSeq()); 548 549 return buffer.toString(); 550 } 551 552 //-------------------------------------------------------------------------- 553 public XMLNode toXMLNode() 554 { 555 XMLTag tag = new XMLTag(HfgBioXML.ALIGNMENT); 556 557 if (getScore() != null) 558 { 559 tag.setAttribute(HfgBioXML.SCORE_ATT, getScore()); 560 } 561 562 tag.addSubtag(getAlignedQuery().toXMLNode()); 563 tag.addSubtag(getAlignedSubject().toXMLNode()); 564 565 return tag; 566 } 567 568 //--------------------------------------------------------------------------- 569 public PairwiseSeqAlignment getSubalignment(int inZeroIndexedStart) 570 { 571 return getSubalignment(inZeroIndexedStart, length()); 572 } 573 574 //--------------------------------------------------------------------------- 575 public PairwiseSeqAlignment getSubalignment(int inZeroIndexedStart, int inLimit) 576 { 577 578 AlignedQuery subalignedQuery = new AlignedQuery(mAlignedQuery.getSeq(), 579 mAlignedQuery.getAlignedSeq().substring(inZeroIndexedStart, inLimit), 580 mAlignedQuery.getLinearPosition(inZeroIndexedStart)); 581 582 AlignedSubject subalignedSubject = null; 583 if (mAlignedSubject != null) 584 { 585 subalignedSubject = new AlignedSubject(mAlignedSubject.getSeq(), 586 mAlignedSubject.getAlignedSeq().substring(inZeroIndexedStart, inLimit), 587 mAlignedSubject.getLinearPosition(inZeroIndexedStart)); 588 } 589 590 return new PairwiseSeqAlignment(subalignedQuery, subalignedSubject); 591 } 592 593 //--------------------------------------------------------------------------- 594 /** 595 In the case of a local query vs. a global subject, this method can be used to extend the local query in both directions 596 to match where subject residues are present. 597 */ 598 public void extendAlignedQuery() 599 { 600 BioSequence query = getAlignedQuery().getSeq(); 601 if (query != null) 602 { 603 int queryLength = query.length(); // Pulled out for performance 604 int queryMatchStart = getAlignedQuery().getSeqLocation().getStart(); 605 606 // Don't leave terminal gaps on the query when there are residues to give and the germline has a residue 607 if (getAlignedQuery().getNTerminalGapLength() > 0 608 && queryMatchStart > 1) 609 { 610 StringBuilder queryBuffer = new StringBuilder(getAlignedQuery().getAlignedSeq()); 611 StringBuilder germlineBuffer = new StringBuilder(getAlignedSubject().getAlignedSeq()); 612 613 int queryGapSize = getAlignedQuery().getNTerminalGapLength(); 614 int germlineGapSize = getAlignedSubject().getNTerminalGapLength(); 615 616 int adjustedQueryMatchStart = queryMatchStart; 617 618 if (queryGapSize > germlineGapSize) 619 { 620 for (int index = queryGapSize - 1; index >= 0; index--) 621 { 622 if (germlineBuffer.charAt(index) != '-' 623 && '-' == queryBuffer.charAt(index)) 624 { 625 queryBuffer.setCharAt(index, query.residueAt(adjustedQueryMatchStart - queryMatchStart)); 626 627 adjustedQueryMatchStart--; 628 if (1 == adjustedQueryMatchStart) 629 { 630 break; 631 } 632 } 633 } 634 } 635 636 setAlignedQuery(new AlignedQuery(query, queryBuffer, adjustedQueryMatchStart)); 637 } 638 639 if (getAlignedQuery().getCTerminalGapLength() > 0) 640 { 641 StringBuilder queryBuffer = new StringBuilder(getAlignedQuery().getAlignedSeq()); 642 int queryBufferLength = queryBuffer.length(); // Pulled out for performance 643 StringBuilder germlineBuffer = new StringBuilder(getAlignedSubject().getAlignedSeq()); 644 645 int queryGapSize = getAlignedQuery().getCTerminalGapLength(); 646 int germlineGapSize = getAlignedSubject().getCTerminalGapLength(); 647 648 int matchEnd = getAlignedQuery().getSeqLocation().getEnd(); 649 650 if (queryGapSize > germlineGapSize 651 && matchEnd < queryLength) 652 { 653 for (int index = queryBufferLength - queryGapSize; index < queryBufferLength; index++) 654 { 655 if (germlineBuffer.charAt(index) != '-' 656 && '-' == queryBuffer.charAt(index)) 657 { 658 queryBuffer.setCharAt(index, query.residueAt(matchEnd + 1)); 659 660 matchEnd++; 661 if (matchEnd == queryLength) 662 { 663 break; 664 } 665 } 666 } 667 } 668 669 setAlignedQuery(new AlignedQuery(query, queryBuffer, getAlignedQuery().getSeqLocation().getStart())); 670 } 671 } 672 } 673 674 //--------------------------------------------------------------------------- 675 /** 676 Swaps the query and the subject. 677 */ 678 public void invert() 679 { 680 AlignedQuery newQuery = new AlignedQuery(getAlignedSubject().getSeq(), getAlignedSubject().getAlignedSeq(), getAlignedSubject().getSeqLocation().getStart()); 681 AlignedSubject newSubject = new AlignedSubject(getAlignedQuery().getSeq(), getAlignedQuery().getAlignedSeq(), getAlignedQuery().getSeqLocation().getStart()); 682 683 setAlignedQuery(newQuery); 684 setAlignedSubject(newSubject); 685 } 686 687 //########################################################################### 688 // PRIVATE METHODS 689 //########################################################################### 690 691 //--------------------------------------------------------------------------- 692 private void ensureScoringIdentityStatsAreCalculated() 693 { 694 if (0 == mScoring.getComparisonLength()) 695 { 696 mScoring.calculate(this); 697 } 698 } 699 700}