001package com.hfg.bio.seq.alignment;
002
003import java.util.HashSet;
004import java.util.Set;
005
006import com.hfg.bio.HfgBioXML;
007import com.hfg.bio.seq.BioSequence;
008import com.hfg.bio.seq.alignment.matrix.SubstitutionMatrix;
009import com.hfg.exception.ProgrammingException;
010import com.hfg.util.CompareUtil;
011import com.hfg.util.StringBuilderPlus;
012import com.hfg.util.StringUtil;
013import com.hfg.xml.XMLNode;
014import com.hfg.xml.XMLTag;
015
016//------------------------------------------------------------------------------
017/**
018 Pairwise sequence alignment container.
019 <div>
020 @author J. Alex Taylor, hairyfatguy.com
021 </div>
022 */
023//------------------------------------------------------------------------------
024// com.hfg XML/HTML Coding Library
025//
026// This library is free software; you can redistribute it and/or
027// modify it under the terms of the GNU Lesser General Public
028// License as published by the Free Software Foundation; either
029// version 2.1 of the License, or (at your option) any later version.
030//
031// This library is distributed in the hope that it will be useful,
032// but WITHOUT ANY WARRANTY; without even the implied warranty of
033// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
034// Lesser General Public License for more details.
035//
036// You should have received a copy of the GNU Lesser General Public
037// License along with this library; if not, write to the Free Software
038// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
039//
040// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
041// jataylor@hairyfatguy.com
042//------------------------------------------------------------------------------
043
044public class PairwiseSeqAlignment implements Cloneable, Comparable<PairwiseSeqAlignment>
045{
046   private PairwiseSettings mSettings;
047   private AlignedQuery     mAlignedQuery;
048   private AlignedSubject   mAlignedSubject;
049   private Integer          mAlignmentLength;
050   private AlignmentScoring mScoring = new AlignmentScoring();
051
052   //###########################################################################
053   // CONSTRUCTORS
054   //###########################################################################
055
056   //---------------------------------------------------------------------------
057   public PairwiseSeqAlignment(AlignedQuery inAlignedQuery, AlignedSubject inAlignedSubject)
058   {
059      mAlignedQuery   = inAlignedQuery;
060      mAlignedSubject = inAlignedSubject;
061   }
062
063   //---------------------------------------------------------------------------
064   public PairwiseSeqAlignment(XMLNode inXMLNode)
065   {
066      if (! inXMLNode.getTagName().equals(HfgBioXML.ALIGNMENT))
067      {
068         throw new RuntimeException("Cannot construct a " + this.getClass().getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!");
069      }
070
071      mAlignedQuery = new AlignedQuery(inXMLNode.getRequiredSubtagByName(HfgBioXML.ALIGNED_QUERY));
072      mAlignedSubject = new AlignedSubject(inXMLNode.getRequiredSubtagByName(HfgBioXML.ALIGNED_SUBJECT));
073
074      String rawScoreString = inXMLNode.getAttributeValue(HfgBioXML.SCORE_ATT);
075      if (StringUtil.isSet(rawScoreString))
076      {
077         setScore(Float.parseFloat(rawScoreString));
078      }
079   }
080
081   //###########################################################################
082   // PUBLIC METHODS
083   //###########################################################################
084
085   //---------------------------------------------------------------------------
086   @Override
087   public PairwiseSeqAlignment clone()
088   {
089      PairwiseSeqAlignment cloneObj;
090      try
091      {
092         cloneObj = (PairwiseSeqAlignment) super.clone();
093      }
094      catch (Exception e)
095      {
096         throw new ProgrammingException(e);
097      }
098
099      if (mSettings != null)
100      {
101         cloneObj.mSettings = mSettings.clone();
102      }
103
104      if (mAlignedQuery != null)
105      {
106         cloneObj.mAlignedQuery = mAlignedQuery.clone();
107      }
108
109      if (mAlignedSubject != null)
110      {
111         cloneObj.mAlignedSubject = mAlignedSubject.clone();
112      }
113
114      return cloneObj;
115   }
116
117   //--------------------------------------------------------------------------
118   @Override
119   public boolean equals(Object inObj2)
120   {
121      return (inObj2 != null
122               && inObj2 instanceof PairwiseSeqAlignment
123               && 0 == compareTo((PairwiseSeqAlignment) inObj2));
124   }
125
126   //--------------------------------------------------------------------------
127   @Override
128   public int hashCode()
129   {
130      int hashCode = 0;
131      if (getScore() != null)
132      {
133         hashCode += getScore().hashCode();
134      }
135
136      if (getNumIdentities() != null)
137      {
138         hashCode += 31 * getNumIdentities().hashCode();
139      }
140
141      return hashCode;
142   }
143
144   //--------------------------------------------------------------------------
145   @Override
146   public int compareTo(PairwiseSeqAlignment inObj2)
147   {
148      int score = - CompareUtil.compare(getScore(), inObj2.getScore());
149      if (0 == score)
150      {
151         score = - CompareUtil.compare(getNumIdentities(), inObj2.getNumIdentities());
152      }
153
154      return score;
155   }
156
157   //---------------------------------------------------------------------------
158   public PairwiseSeqAlignment setSettings(PairwiseSettings inValue)
159   {
160      mSettings = inValue;
161      return this;
162   }
163
164   //---------------------------------------------------------------------------
165   public PairwiseSettings getSettings()
166   {
167      return mSettings;
168   }
169
170   //---------------------------------------------------------------------------
171   public void clearCachedValues()
172   {
173      mAlignmentLength = null;
174      mScoring.clear();
175   }
176
177   //---------------------------------------------------------------------------
178   public PairwiseSeqAlignment setAlignedQuery(AlignedQuery inValue)
179   {
180      mAlignedQuery = inValue;
181      clearCachedValues();
182      return this;
183   }
184
185   //---------------------------------------------------------------------------
186   public AlignedQuery getAlignedQuery()
187   {
188      return mAlignedQuery;
189   }
190
191   //---------------------------------------------------------------------------
192   public PairwiseSeqAlignment setAlignedSubject(AlignedSubject inValue)
193   {
194      mAlignedSubject = inValue;
195      clearCachedValues();
196      return this;
197   }
198
199   //---------------------------------------------------------------------------
200   public AlignedSubject getAlignedSubject()
201   {
202      return mAlignedSubject;
203   }
204
205   //---------------------------------------------------------------------------
206   public Integer length()
207   {
208      if (null == mAlignmentLength)
209      {
210         mAlignmentLength = getAlignedQuery().getAlignedSeq().length();
211      }
212
213      return mAlignmentLength;
214   }
215
216   //---------------------------------------------------------------------------
217   /**
218    Returns the alignment scoring object.
219    * @return the alignment scoring object
220    */
221   public AlignmentScoring getScoring()
222   {
223      ensureScoringIdentityStatsAreCalculated();
224      return mScoring;
225   }
226
227   //---------------------------------------------------------------------------
228   public PairwiseSeqAlignment setScore(Float inValue)
229   {
230      mScoring.setScore(inValue);
231      return this;
232   }
233
234   //---------------------------------------------------------------------------
235   public Float getScore()
236   {
237      return mScoring.getScore();
238   }
239
240   //---------------------------------------------------------------------------
241   public Integer getNumIdentities()
242   {
243      ensureScoringIdentityStatsAreCalculated();
244      return mScoring.getNumIdentities();
245   }
246
247   //---------------------------------------------------------------------------
248   public Integer getComparisonLength()
249   {
250      ensureScoringIdentityStatsAreCalculated();
251      return mScoring.getComparisonLength();
252   }
253
254   //---------------------------------------------------------------------------
255   /**
256    Doesn't penalize sequence's terminal gaps.
257    @return the adjusted comparison length
258    */
259   public Integer getAdjustedComparisonLength()
260   {
261      ensureScoringIdentityStatsAreCalculated();
262      return mScoring.getAdjustedComparisonLength();
263   }
264
265   //---------------------------------------------------------------------------
266   public Float getPctIdentity()
267   {
268      ensureScoringIdentityStatsAreCalculated();
269      return mScoring.getPctIdentity();
270   }
271
272   //---------------------------------------------------------------------------
273   /**
274    Doesn't penalize sequence's terminal gaps.
275    @return the adjusted percent identity
276    */
277   public Float getAdjustedPctIdentity()
278   {
279      ensureScoringIdentityStatsAreCalculated();
280      return mScoring.getAdjustedPctIdentity();
281   }
282
283
284   //---------------------------------------------------------------------------
285   public PairwiseSeqAlignment setEValue(Float inValue)
286   {
287      mScoring.setEValue(inValue);
288      return this;
289   }
290
291   //---------------------------------------------------------------------------
292   public Float getEValue()
293   {
294      return mScoring.getEValue();
295   }
296
297   //---------------------------------------------------------------------------
298   public PairwiseSeqAlignment setPValue(Float inValue)
299   {
300      mScoring.setPValue(inValue);
301      return this;
302   }
303
304   //---------------------------------------------------------------------------
305   public Float getPValue()
306   {
307      return mScoring.getPValue();
308   }
309
310   //---------------------------------------------------------------------------
311   public PairwiseSeqAlignment setZScore(Float inValue)
312   {
313      mScoring.setZScore(inValue);
314      return this;
315   }
316
317   //---------------------------------------------------------------------------
318   public Float getZScore()
319   {
320      return mScoring.getZScore();
321   }
322
323   //---------------------------------------------------------------------------
324   /**
325    Method helpful for understanding the per-residue scoring breakdown.
326    @return ASCII display of the per-residue scoring
327    */
328   public String showScoring()
329   {
330      SubstitutionMatrix substitutionMatrix = null;
331      if (null == getAlignedSubject().getPSSM())
332      {
333         substitutionMatrix = getSettings().getSubstitutionMatrix(getAlignedQuery().getSeq().getType());
334      }
335
336      StringBuilderPlus buffer = new StringBuilderPlus();
337
338      boolean queryGapOpen   = false;
339      boolean subjectGapOpen = false;
340
341      Set<Integer> queryGapExclusionIndices = new HashSet<>();
342      if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.QUERY, PairwiseSeqTerminus.LEFT))
343      {
344         for (int i = 0; i < length(); i++)
345         {
346            if (getAlignedQuery().getAlignedSeq().charAt(i) != '-')
347            {
348               break;
349            }
350
351            queryGapExclusionIndices.add(i);
352         }
353      }
354
355      if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.QUERY, PairwiseSeqTerminus.RIGHT))
356      {
357         for (int i = length() - 1; i > 0; i--)
358         {
359            if (getAlignedQuery().getAlignedSeq().charAt(i) != '-')
360            {
361               break;
362            }
363
364            queryGapExclusionIndices.add(i);
365         }
366      }
367
368      Set<Integer> subjectGapExclusionIndices = new HashSet<>();
369      if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.SUBJECT, PairwiseSeqTerminus.LEFT))
370      {
371         for (int i = 0; i < length(); i++)
372         {
373            if (getAlignedSubject().getAlignedSeq().charAt(i) != '-')
374            {
375               break;
376            }
377
378            subjectGapExclusionIndices.add(i);
379         }
380      }
381
382      if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.SUBJECT, PairwiseSeqTerminus.RIGHT))
383      {
384         for (int i = length() - 1; i > 0; i--)
385         {
386            if (getAlignedSubject().getAlignedSeq().charAt(i) != '-')
387            {
388               break;
389            }
390
391            subjectGapExclusionIndices.add(i);
392         }
393      }
394
395      Set<Integer> subjectGapScoreIndices = new HashSet<>();
396      if (getSettings().getSubjectTemplateMode())
397      {
398         StringBuilderPlus origSubject = new StringBuilderPlus(getAlignedSubject().getSeq().getSequence());
399         for (int i = 0; i < length(); i++)
400         {
401            if (getAlignedSubject().getAlignedSeq().charAt(i) == '-')
402            {
403               if (origSubject.charAt(i) == '-')
404               {
405                  subjectGapScoreIndices.add(i);
406               }
407               else
408               {
409                  origSubject.insert(i, '-');
410               }
411            }
412         }
413      }
414
415      GapPenalties queryGapPenalties   = getSettings().getGapPenalties(PairwiseSeqType.QUERY);
416      GapPenalties subjectGapPenalties = getSettings().getGapPenalties(PairwiseSeqType.SUBJECT);
417
418      float totalScore = 0;
419
420      for (int i = 0; i < length(); i++)
421      {
422         buffer.append(String.format("%3d. ", i + 1));
423
424         char queryResidue   = getAlignedQuery().getAlignedSeq().charAt(i);
425         char subjectResidue = getAlignedSubject().getAlignedSeq().charAt(i);
426
427         float score = 0;
428         if (queryResidue != '-')
429         {
430            queryGapOpen = false;
431
432            if (subjectResidue != '-')
433            {
434               subjectGapOpen = false;
435               if (substitutionMatrix != null)
436               {
437                  score = (getSettings().getScoreCaseInsensitive() ?
438                           substitutionMatrix.scoreCaseInsensitive(queryResidue, subjectResidue) :
439                           substitutionMatrix.score(queryResidue, subjectResidue));
440               }
441               else
442               {
443                  score = getAlignedSubject().getPSSM().score(i + 1, queryResidue);
444               }
445            }
446            else
447            {
448               if (subjectGapScoreIndices.contains(i))
449               {
450                  if (substitutionMatrix != null)
451                  {
452                     score = (getSettings().getScoreCaseInsensitive() ?
453                              substitutionMatrix.scoreCaseInsensitive(queryResidue, subjectResidue) :
454                              substitutionMatrix.score(queryResidue, subjectResidue));
455                  }
456                  else
457                  {
458                     score = getAlignedSubject().getPSSM().score(i + 1, queryResidue);
459                  }
460               }
461               else if (! subjectGapExclusionIndices.contains(i))
462               {
463                  score = (subjectGapOpen ? subjectGapPenalties.getExtensionPenalty() : subjectGapPenalties.getOpenPenalty());
464               }
465
466               subjectGapOpen = true;
467            }
468         }
469         else
470         {
471            if (subjectGapScoreIndices.contains(i))
472            {
473               if (substitutionMatrix != null)
474               {
475                  score = (getSettings().getScoreCaseInsensitive() ?
476                           substitutionMatrix.scoreCaseInsensitive(queryResidue, subjectResidue) :
477                           substitutionMatrix.score(queryResidue, subjectResidue));
478               }
479               else
480               {
481                  score = getAlignedSubject().getPSSM().score(i + 1, queryResidue);
482               }
483            }
484            else if (! queryGapExclusionIndices.contains(i))
485            {
486               if (substitutionMatrix != null)
487               {
488                  score = (queryGapOpen ? queryGapPenalties.getExtensionPenalty() : queryGapPenalties.getOpenPenalty());
489               }
490               else
491               {
492                  score = (queryGapOpen ? queryGapPenalties.getExtensionPenalty() * getAlignedSubject().getPSSM().getGapExtScore(i + 1)
493                                        : queryGapPenalties.getOpenPenalty() * getAlignedSubject().getPSSM().getGapOpenScore(i + 1));
494               }
495            }
496
497            queryGapOpen = true;
498         }
499
500         buffer.append(String.format("%c %5.1f  %c", queryResidue, score, subjectResidue));
501
502         buffer.appendln();
503
504         totalScore += score;
505      }
506
507      buffer.appendln(String.format("Total:%6.1f", totalScore));
508      buffer.appendln();
509
510      return buffer.toString();
511   }
512
513   //---------------------------------------------------------------------------
514   @Override
515   public String toString()
516   {
517      StringBuilderPlus buffer = new StringBuilderPlus().setDelimiter(", ");
518      if (null == getSettings()
519          || getSettings().getQueryAlignmentType().equals(PairwiseAlignmentType.LOCAL))
520      {
521         buffer.append(String.format("Query Range: %s", getAlignedQuery().getSeqLocation().toString()));
522      }
523
524      if (null == getSettings()
525          || getSettings().getSubjectAlignmentType().equals(PairwiseAlignmentType.LOCAL))
526      {
527         buffer.delimitedAppend(String.format("Subject Range: %s", getAlignedSubject().getSeqLocation().toString()));
528      }
529
530      buffer.delimitedAppend(String.format("Score: %.1f\n", getScore()));
531      // Show the aligned sequences with the query on top
532      buffer.appendln(getAlignedQuery().getAlignedSeq());
533
534      // Generate the comparison string - lowercase the sequences for doing a case-insensitive comparison
535      String lcQuerySeq = getAlignedQuery().getAlignedSeq().toLowerCase();
536      String lcSubjectSeq = getAlignedSubject().getAlignedSeq().toLowerCase();
537
538      for (int i = 0; i < length(); i++)
539      {
540         char queryChar = lcQuerySeq.charAt(i);
541         char subjectChar = lcSubjectSeq.charAt(i);
542         buffer.append(queryChar != '-' && queryChar == subjectChar  ? '|' : ' ');
543      }
544      buffer.appendln();
545
546      // Now show the aligned subject on the bottom
547      buffer.appendln(getAlignedSubject().getAlignedSeq());
548
549      return buffer.toString();
550   }
551
552   //--------------------------------------------------------------------------
553   public XMLNode toXMLNode()
554   {
555      XMLTag tag = new XMLTag(HfgBioXML.ALIGNMENT);
556
557      if (getScore() != null)
558      {
559         tag.setAttribute(HfgBioXML.SCORE_ATT, getScore());
560      }
561
562      tag.addSubtag(getAlignedQuery().toXMLNode());
563      tag.addSubtag(getAlignedSubject().toXMLNode());
564
565      return tag;
566   }
567
568   //---------------------------------------------------------------------------
569   public PairwiseSeqAlignment getSubalignment(int inZeroIndexedStart)
570   {
571      return getSubalignment(inZeroIndexedStart, length());
572   }
573
574   //---------------------------------------------------------------------------
575   public PairwiseSeqAlignment getSubalignment(int inZeroIndexedStart, int inLimit)
576   {
577
578      AlignedQuery subalignedQuery = new AlignedQuery(mAlignedQuery.getSeq(),
579                                                   mAlignedQuery.getAlignedSeq().substring(inZeroIndexedStart, inLimit),
580                                                   mAlignedQuery.getLinearPosition(inZeroIndexedStart));
581
582      AlignedSubject subalignedSubject = null;
583      if (mAlignedSubject != null)
584      {
585         subalignedSubject = new AlignedSubject(mAlignedSubject.getSeq(),
586                                                mAlignedSubject.getAlignedSeq().substring(inZeroIndexedStart, inLimit),
587                                                mAlignedSubject.getLinearPosition(inZeroIndexedStart));
588      }
589
590      return new PairwiseSeqAlignment(subalignedQuery, subalignedSubject);
591   }
592
593   //---------------------------------------------------------------------------
594   /**
595    In the case of a local query vs. a global subject, this method can be used to extend the local query in both directions
596    to match where subject residues are present.
597    */
598   public void extendAlignedQuery()
599   {
600      BioSequence query = getAlignedQuery().getSeq();
601      if (query != null)
602      {
603         int queryLength = query.length(); // Pulled out for performance
604         int queryMatchStart = getAlignedQuery().getSeqLocation().getStart();
605
606         // Don't leave terminal gaps on the query when there are residues to give and the germline has a residue
607         if (getAlignedQuery().getNTerminalGapLength() > 0
608               && queryMatchStart > 1)
609         {
610            StringBuilder queryBuffer = new StringBuilder(getAlignedQuery().getAlignedSeq());
611            StringBuilder germlineBuffer = new StringBuilder(getAlignedSubject().getAlignedSeq());
612
613            int queryGapSize = getAlignedQuery().getNTerminalGapLength();
614            int germlineGapSize = getAlignedSubject().getNTerminalGapLength();
615
616            int adjustedQueryMatchStart = queryMatchStart;
617
618            if (queryGapSize > germlineGapSize)
619            {
620               for (int index = queryGapSize - 1; index >= 0; index--)
621               {
622                  if (germlineBuffer.charAt(index) != '-'
623                        && '-' == queryBuffer.charAt(index))
624                  {
625                     queryBuffer.setCharAt(index, query.residueAt(adjustedQueryMatchStart - queryMatchStart));
626
627                     adjustedQueryMatchStart--;
628                     if (1 == adjustedQueryMatchStart)
629                     {
630                        break;
631                     }
632                  }
633               }
634            }
635
636            setAlignedQuery(new AlignedQuery(query, queryBuffer, adjustedQueryMatchStart));
637         }
638
639         if (getAlignedQuery().getCTerminalGapLength() > 0)
640         {
641            StringBuilder queryBuffer = new StringBuilder(getAlignedQuery().getAlignedSeq());
642            int queryBufferLength = queryBuffer.length(); // Pulled out for performance
643            StringBuilder germlineBuffer = new StringBuilder(getAlignedSubject().getAlignedSeq());
644
645            int queryGapSize = getAlignedQuery().getCTerminalGapLength();
646            int germlineGapSize = getAlignedSubject().getCTerminalGapLength();
647
648            int matchEnd = getAlignedQuery().getSeqLocation().getEnd();
649
650            if (queryGapSize > germlineGapSize
651                  && matchEnd < queryLength)
652            {
653               for (int index = queryBufferLength - queryGapSize; index < queryBufferLength; index++)
654               {
655                  if (germlineBuffer.charAt(index) != '-'
656                        && '-' == queryBuffer.charAt(index))
657                  {
658                     queryBuffer.setCharAt(index, query.residueAt(matchEnd + 1));
659
660                     matchEnd++;
661                     if (matchEnd == queryLength)
662                     {
663                        break;
664                     }
665                  }
666               }
667            }
668
669            setAlignedQuery(new AlignedQuery(query, queryBuffer, getAlignedQuery().getSeqLocation().getStart()));
670         }
671      }
672   }
673
674   //---------------------------------------------------------------------------
675   /**
676    Swaps the query and the subject.
677    */
678   public void invert()
679   {
680      AlignedQuery newQuery = new AlignedQuery(getAlignedSubject().getSeq(), getAlignedSubject().getAlignedSeq(), getAlignedSubject().getSeqLocation().getStart());
681      AlignedSubject newSubject = new AlignedSubject(getAlignedQuery().getSeq(), getAlignedQuery().getAlignedSeq(), getAlignedQuery().getSeqLocation().getStart());
682
683      setAlignedQuery(newQuery);
684      setAlignedSubject(newSubject);
685   }
686
687   //###########################################################################
688   // PRIVATE METHODS
689   //###########################################################################
690
691   //---------------------------------------------------------------------------
692   private void ensureScoringIdentityStatsAreCalculated()
693   {
694      if (0 == mScoring.getComparisonLength())
695      {
696         mScoring.calculate(this);
697      }
698   }
699
700}