001package com.hfg.bio.phylogeny;
002
003import com.hfg.bio.seq.BioSequence;
004
005//------------------------------------------------------------------------------
006/**
007 Distance matrix calculation method that is simply the fraction of mismatches.
008 <pre>
009 distance = mismatches / aligned_length
010 </pre>
011 If both sequences contain a gap at a given position, it is not counted towards
012 the alignment length.
013
014 @author J. Alex Taylor, hairyfatguy.com
015 */
016//------------------------------------------------------------------------------
017// com.hfg Library
018//
019// This library is free software; you can redistribute it and/or
020// modify it under the terms of the GNU Lesser General Public
021// License as published by the Free Software Foundation; either
022// version 2.1 of the License, or (at your option) any later version.
023//
024// This library is distributed in the hope that it will be useful,
025// but WITHOUT ANY WARRANTY; without even the implied warranty of
026// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
027// Lesser General Public License for more details.
028//
029// You should have received a copy of the GNU Lesser General Public
030// License along with this library; if not, write to the Free Software
031// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
032//
033// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
034// jataylor@hairyfatguy.com
035//------------------------------------------------------------------------------
036
037public class UncorrectedModel implements DistanceMatrixModel
038{
039   //---------------------------------------------------------------------------
040   public UncorrectedModel()
041   {
042   }
043
044   //---------------------------------------------------------------------------
045   /**
046    Returns the model name as required by the DistanceMatrixModel interface.
047    */
048   public String name()
049   {
050      return "Uncorrected";
051   }
052
053   //---------------------------------------------------------------------------
054   /**
055    Calculates the distance score for a pair of sequences.
056    The sequences must be aligned to be the same length.
057    If both sequences contain a gap at a given position, it is not counted towards
058    the alignment length.
059    */
060   public float calculateDistance(BioSequence inSeq1, BioSequence inSeq2)
061   {
062      int mismatches = 0;
063      int comparisonLength = 0;
064
065      String seq1 = inSeq1.getSequence();
066      String seq2 = inSeq2.getSequence();
067
068      if (seq1.length() != seq2.length())
069      {
070         throw new RuntimeException("The length of seq1 [" + seq1.length() + "] and seq2 [" + seq2.length() + "] don't match!");
071      }
072
073      // This way will avoid a NaN result from comparing two all-gapped sequences.
074
075      float distance = 0.0f;
076      if (! seq1.equals(seq2))
077      {
078         for (int i = 0; i < seq1.length(); i++)
079         {
080            char char1 = seq1.charAt(i);
081            char char2 = seq2.charAt(i);
082            if (char1 != char2) mismatches++;
083            // If both seqs are gapped at this position, don't count it against length.
084            if (!(char1 == '-' && char2 == '-')) comparisonLength++;
085         }
086
087         distance = mismatches / (float) comparisonLength;
088      }
089
090      return distance;
091   }
092}