001package com.hfg.bio.seq.alignment;
002
003import java.util.List;
004
005import com.hfg.bio.HfgBioXML;
006import com.hfg.exception.ProgrammingException;
007import com.hfg.math.Range;
008import com.hfg.util.CompareUtil;
009import com.hfg.util.StringUtil;
010import com.hfg.xml.XMLNode;
011import com.hfg.xml.XMLTag;
012
013//------------------------------------------------------------------------------
014/**
015 Pairwise sequence alignment scoring data.
016 <div>
017 @author J. Alex Taylor, hairyfatguy.com
018 </div>
019 */
020//------------------------------------------------------------------------------
021// com.hfg XML/HTML Coding Library
022//
023// This library is free software; you can redistribute it and/or
024// modify it under the terms of the GNU Lesser General Public
025// License as published by the Free Software Foundation; either
026// version 2.1 of the License, or (at your option) any later version.
027//
028// This library is distributed in the hope that it will be useful,
029// but WITHOUT ANY WARRANTY; without even the implied warranty of
030// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
031// Lesser General Public License for more details.
032//
033// You should have received a copy of the GNU Lesser General Public
034// License along with this library; if not, write to the Free Software
035// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
036//
037// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
038// jataylor@hairyfatguy.com
039//------------------------------------------------------------------------------
040
041public class AlignmentScoring implements Cloneable, Comparable<AlignmentScoring>
042{
043   private List<Range<Integer>> mScoringBoundaries; // 1-based
044   private Float            mRawScore;
045   private Integer          mNumIdentities;
046   private Integer          mComparisonLength;
047   private Integer          mAdjustedComparisonLength;
048   private Float            mPctIdentity;
049   private Float            mAdjustedPctIdentity;
050   private Float            mEValue;
051   private Float            mPValue;
052   private Float            mZScore;
053   private Integer          mQueryInternalGapLength;
054
055   //###########################################################################
056   // CONSTRUCTORS
057   //###########################################################################
058
059   //---------------------------------------------------------------------------
060   public AlignmentScoring()
061   {
062   }
063
064   //---------------------------------------------------------------------------
065   public AlignmentScoring(XMLNode inXMLNode)
066   {
067      if (! inXMLNode.getTagName().equals(HfgBioXML.ALIGNMENT_SCORING))
068      {
069         throw new RuntimeException("Cannot construct a " + this.getClass().getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!");
070      }
071
072
073      String rawScoreString = inXMLNode.getAttributeValue(HfgBioXML.SCORE_ATT);
074      if (StringUtil.isSet(rawScoreString))
075      {
076         setScore(Float.parseFloat(rawScoreString));
077      }
078
079      String numIdentititesString = inXMLNode.getAttributeValue(HfgBioXML.NUM_IDENTITIES_ATT);
080      if (StringUtil.isSet(numIdentititesString))
081      {
082         setNumIdentities(Integer.parseInt(numIdentititesString));
083      }
084
085      String comparisionLenString = inXMLNode.getAttributeValue(HfgBioXML.COMPARISON_LENGTH_ATT);
086      if (StringUtil.isSet(comparisionLenString))
087      {
088         setComparisonLength(Integer.parseInt(comparisionLenString));
089      }
090   }
091
092   //###########################################################################
093   // PUBLIC METHODS
094   //###########################################################################
095
096   //---------------------------------------------------------------------------
097   public AlignmentScoring combine(AlignmentScoring inValue)
098   {
099      AlignmentScoring result = clone();
100
101      if (inValue != null
102          && inValue.getComparisonLength() > 0)
103      {
104         result.incrementComparisonLength(inValue.getComparisonLength());
105         result.incrementNumIdentities(inValue.getNumIdentities());
106
107         if (result.mAdjustedComparisonLength != null
108               && inValue.mAdjustedComparisonLength != null)
109         {
110            result.mAdjustedComparisonLength += inValue.mAdjustedComparisonLength;
111         }
112
113         if (inValue.mQueryInternalGapLength != null)
114         {
115            if (null == result.mQueryInternalGapLength)
116            {
117               result.mQueryInternalGapLength = inValue.mQueryInternalGapLength;
118            }
119            else
120            {
121               result.mQueryInternalGapLength += inValue.mQueryInternalGapLength;
122            }
123         }
124
125         if (result.getScore() != null
126               && inValue.getScore() != null)
127         {
128            result.setScore(result.getScore() + inValue.getScore());
129         }
130
131         if (result.getPValue() != null
132               && inValue.getPValue() != null)
133         {
134            result.setPValue(result.getPValue() * inValue.getPValue());
135         }
136
137         // TODO: E-value & Z-score
138         // TODO: Scoring boundaries?
139
140         result.mPctIdentity = null;
141         result.mAdjustedPctIdentity = null;
142      }
143
144      return result;
145   }
146
147   //---------------------------------------------------------------------------
148   @Override
149   public AlignmentScoring clone()
150   {
151      AlignmentScoring cloneObj;
152      try
153      {
154         cloneObj = (AlignmentScoring) super.clone();
155      }
156      catch (Exception e)
157      {
158         throw new ProgrammingException(e);
159      }
160
161      return cloneObj;
162   }
163
164   //--------------------------------------------------------------------------
165   @Override
166   public boolean equals(Object inObj2)
167   {
168      return (inObj2 != null
169            && inObj2 instanceof AlignmentScoring
170            && 0 == compareTo((AlignmentScoring) inObj2));
171   }
172
173   //--------------------------------------------------------------------------
174   @Override
175   public int hashCode()
176   {
177      int hashCode = 0;
178      if (getScore() != null)
179      {
180         hashCode += getScore().hashCode();
181      }
182
183      hashCode += 31 * getNumIdentities();
184
185      return hashCode;
186   }
187
188   //--------------------------------------------------------------------------
189   @Override
190   public int compareTo(AlignmentScoring inObj2)
191   {
192      int score = - CompareUtil.compare(getScore(), inObj2.getScore());
193      if (0 == score)
194      {
195         score = - CompareUtil.compare(getNumIdentities(), inObj2.getNumIdentities());
196      }
197
198      return score;
199   }
200
201   //---------------------------------------------------------------------------
202   @Override
203   public String toString()
204   {
205      return String.format("%.2f: [%d / %d]", getScore(), getNumIdentities(), getComparisonLength());
206   }
207
208   //--------------------------------------------------------------------------
209   public XMLNode toXMLNode()
210   {
211      XMLTag tag = new XMLTag(HfgBioXML.ALIGNMENT_SCORING);
212
213      if (getScore() != null)
214      {
215         tag.setAttribute(HfgBioXML.SCORE_ATT, getScore());
216      }
217
218      if (getComparisonLength() != 0)
219      {
220         tag.setAttribute(HfgBioXML.NUM_IDENTITIES_ATT, getNumIdentities());
221      }
222
223      if (getComparisonLength() != 0)
224      {
225         tag.setAttribute(HfgBioXML.COMPARISON_LENGTH_ATT, getComparisonLength());
226      }
227
228
229      return tag;
230   }
231
232   //---------------------------------------------------------------------------
233   public void clear()
234   {
235      mNumIdentities = null;
236      mComparisonLength = null;
237      mAdjustedComparisonLength = null;
238      mPctIdentity = null;
239      mAdjustedPctIdentity = null;
240      mEValue = null;
241      mPValue = null;
242      mZScore = null;
243   }
244
245   //---------------------------------------------------------------------------
246   /**
247    Specifies an optional 1-based list of alignment position ranges which should
248    be considered for scoring.
249    @param inValue List of 1-based Ranges to score
250    @return this AlignmentScoring object to enable method chaining
251    */
252   public AlignmentScoring setScoringBoundaries(List<Range<Integer>> inValue)
253   {
254      mScoringBoundaries = inValue;
255      return this;
256   }
257
258   //---------------------------------------------------------------------------
259   public List<Range<Integer>> getScoringBoundaries()
260   {
261      return mScoringBoundaries;
262   }
263
264   //---------------------------------------------------------------------------
265   public AlignmentScoring setScore(Float inValue)
266   {
267      mRawScore = inValue;
268      return this;
269   }
270
271   //---------------------------------------------------------------------------
272   public Float getScore()
273   {
274      return mRawScore;
275   }
276
277   //---------------------------------------------------------------------------
278   public AlignmentScoring setNumIdentities(Integer inValue)
279   {
280      mNumIdentities = inValue;
281      return this;
282   }
283
284   //---------------------------------------------------------------------------
285   public AlignmentScoring incrementNumIdentities()
286   {
287      return incrementNumIdentities(1);
288   }
289
290   //---------------------------------------------------------------------------
291   public AlignmentScoring incrementNumIdentities(int inValue)
292   {
293      mNumIdentities = (mNumIdentities != null ? mNumIdentities : 0) + inValue;
294      return this;
295   }
296
297   //---------------------------------------------------------------------------
298   public int getNumIdentities()
299   {
300      return mNumIdentities != null ? mNumIdentities : 0;
301   }
302
303
304   //---------------------------------------------------------------------------
305   public AlignmentScoring setComparisonLength(Integer inValue)
306   {
307      mComparisonLength = inValue;
308      return this;
309   }
310
311   //---------------------------------------------------------------------------
312   public AlignmentScoring incrementComparisonLength()
313   {
314      return incrementComparisonLength(1);
315   }
316
317   //---------------------------------------------------------------------------
318   public AlignmentScoring incrementComparisonLength(int inValue)
319   {
320      mComparisonLength = (mComparisonLength != null ? mComparisonLength : 0) + inValue;
321      return this;
322   }
323
324   //---------------------------------------------------------------------------
325   public int getComparisonLength()
326   {
327      return mComparisonLength != null ? mComparisonLength : 0;
328   }
329
330   //---------------------------------------------------------------------------
331   /**
332    Doesn't penalize sequence's terminal gaps.
333    @return the adjusted comparison length
334    */
335   public int getAdjustedComparisonLength()
336   {
337      return mAdjustedComparisonLength != null ? mAdjustedComparisonLength : mComparisonLength != null ? mComparisonLength : 0;
338   }
339
340   //---------------------------------------------------------------------------
341   public Float getPctIdentity()
342   {
343      if (null == mPctIdentity
344            && mComparisonLength != null)
345      {
346         mPctIdentity = mComparisonLength > 0 && mNumIdentities != null ? 100f * mNumIdentities / mComparisonLength.floatValue() : 0;
347      }
348
349      return mPctIdentity;
350   }
351
352   //---------------------------------------------------------------------------
353   /**
354    Sets the percent identity that doesn't penalize sequence's terminal gaps.
355    @return this AlignmentScoring object to enable method chaining
356    */
357   public AlignmentScoring setAdjustedPctIdentity(Float inValue)
358   {
359      mAdjustedPctIdentity = inValue;
360      return this;
361   }
362
363   //---------------------------------------------------------------------------
364   /**
365    Doesn't penalize sequence's terminal gaps.
366    @return the adjusted percent identity
367    */
368   public Float getAdjustedPctIdentity()
369   {
370      if (null == mAdjustedPctIdentity
371            && getAdjustedComparisonLength() > 0)
372      {
373         mAdjustedPctIdentity = mNumIdentities != null ? 100f * mNumIdentities / (float) getAdjustedComparisonLength() : 0;
374      }
375
376      return mAdjustedPctIdentity != null ? mAdjustedPctIdentity : getPctIdentity();
377   }
378
379
380   //---------------------------------------------------------------------------
381   public AlignmentScoring setEValue(Float inValue)
382   {
383      mEValue = inValue;
384      return this;
385   }
386
387   //---------------------------------------------------------------------------
388   public Float getEValue()
389   {
390      return mEValue;
391   }
392
393   //---------------------------------------------------------------------------
394   public AlignmentScoring setPValue(Float inValue)
395   {
396      mPValue = inValue;
397      return this;
398   }
399
400   //---------------------------------------------------------------------------
401   public Float getPValue()
402   {
403      return mPValue;
404   }
405
406   //---------------------------------------------------------------------------
407   public AlignmentScoring setZScore(Float inValue)
408   {
409      mZScore = inValue;
410      return this;
411   }
412
413   //---------------------------------------------------------------------------
414   public Float getZScore()
415   {
416      return mZScore;
417   }
418
419
420   //---------------------------------------------------------------------------
421   /**
422    Returns the total number of non-terminal query gap positions where the subject is not a gap.
423    * @return the total number of non-terminal query gap positions where the subject is not a gap
424    */
425   public Integer getQueryInternalGapLength()
426   {
427      return mQueryInternalGapLength;
428   }
429
430   //---------------------------------------------------------------------------
431   public void calculate(PairwiseSeqAlignment inAlignment)
432   {
433      int numIdentities = 0;
434      int comparisonLength = 0;
435      int adjustedComparisonLength = 0;
436      int queryInternalGapLength = 0;  // The total number of non-terminal query gap positions where the subject is not a gap
437
438      AlignedQuery alignedQuery = inAlignment.getAlignedQuery();
439      AlignedSubject alignedSubject = inAlignment.getAlignedSubject();
440
441      String queryResidues = alignedQuery.getAlignedSeq().toLowerCase(); // Lower-casing for case-insensitive comparison
442      String subjectResidues = alignedSubject.getAlignedSeq().toLowerCase();
443
444
445      Integer lastQueryResidueIndex = queryResidues.length() - 1;
446      if (null == alignedQuery.getSeq()
447            || alignedQuery.getSeqLocation().getEnd().equals(alignedQuery.getSeq().length()))
448      {
449         for (int i = queryResidues.length() - 1; i >= 0; i--)
450         {
451            char queryResidue = queryResidues.charAt(i);
452            if (queryResidue != '-')
453            {
454               lastQueryResidueIndex = i;
455               break;
456            }
457         }
458      }
459
460      Integer lastSubjectResidueIndex = subjectResidues.length() - 1;
461      if (null == alignedSubject.getSeq()
462            || alignedSubject.getSeqLocation().getEnd().equals(alignedSubject.getSeq().length()))
463      {
464         for (int i = subjectResidues.length() - 1; i >= 0; i--)
465         {
466            char subjectResidue = subjectResidues.charAt(i);
467            if (subjectResidue != '-')
468            {
469               lastSubjectResidueIndex = i;
470               break;
471            }
472         }
473      }
474
475      int lastIndex = Math.min(lastQueryResidueIndex, lastSubjectResidueIndex);
476
477      // Did the query start before this alignment?
478      boolean queryStarted = alignedQuery.getSeqLocation() != null
479            && alignedQuery.getSeqLocation().getStart() != null
480            && alignedQuery.getSeqLocation().getStart() > 1;
481
482      // Did the subject start before this alignment?
483      boolean subjectStarted = alignedSubject.getSeqLocation() != null
484            && alignedSubject.getSeqLocation().getStart() != null
485            && alignedSubject.getSeqLocation().getStart() > 1;
486
487      boolean firstAlignedQueryResidueEncountered = false;
488
489      for (int i = 0; i < queryResidues.length(); i++)
490      {
491         char queryResidue   = queryResidues.charAt(i);
492         char subjectResidue = subjectResidues.charAt(i);
493
494         if (! firstAlignedQueryResidueEncountered
495               && queryResidue != '-')
496         {
497            firstAlignedQueryResidueEncountered = true;
498
499            if (! queryStarted)
500            {
501               queryStarted = true;
502            }
503         }
504
505
506         if (! subjectStarted
507               && subjectResidue != '-')
508         {
509            subjectStarted = true;
510         }
511
512         // Default to true if no boundaries were specified
513         boolean isInScoringRegion = true;
514         if (mScoringBoundaries != null)
515         {
516            isInScoringRegion = false;
517            for (Range<Integer> range : mScoringBoundaries)
518            {
519               if (range.contains(i + 1))
520               {
521                  isInScoringRegion = true;
522                  break;
523               }
524            }
525         }
526
527         if ((queryResidue != '-' || subjectResidue != '-')
528             && isInScoringRegion)
529         {
530            if (queryResidue == subjectResidue)
531            {
532               numIdentities++;
533            }
534
535            comparisonLength++;
536
537            if (queryStarted
538                  && subjectStarted
539                  && i <= lastIndex)
540            {
541               adjustedComparisonLength++;
542            }
543         }
544
545         if (queryResidue == '-'
546               && subjectResidue != '-'
547               && firstAlignedQueryResidueEncountered
548               && isInScoringRegion
549               && i < lastQueryResidueIndex)
550         {
551            queryInternalGapLength++;
552         }
553      }
554
555      mNumIdentities = numIdentities;
556      mComparisonLength = comparisonLength;
557      mAdjustedComparisonLength = adjustedComparisonLength;
558      mQueryInternalGapLength = queryInternalGapLength;
559   }
560}