001package com.hfg.bio.phylogeny;
002
003import com.hfg.bio.seq.BioSequence;
004
005//------------------------------------------------------------------------------
006/**
007 Implementation of the Jukes-Cantor distance matrix calculation model.
008 It assumes that all changes are equally likely.
009 <p style='font-style:italic'>
010 Jukes, T.H., Cantor, C.R. (1969). "Evolution of protein molecules".
011 In Munro, H.N.. Mammalian protein metabolism. New York: Academic Press. pp. 21-123.
012 </p>
013 <p>
014  For a quick overview, see <a href='http://en.wikipedia.org/wiki/Jukes-Cantor_model'>
015 http://en.wikipedia.org/wiki/Jukes-Cantor_model</a>
016 </p>
017
018 @author J. Alex Taylor, hairyfatguy.com
019 */
020//------------------------------------------------------------------------------
021// com.hfg Library
022//
023// This library is free software; you can redistribute it and/or
024// modify it under the terms of the GNU Lesser General Public
025// License as published by the Free Software Foundation; either
026// version 2.1 of the License, or (at your option) any later version.
027//
028// This library is distributed in the hope that it will be useful,
029// but WITHOUT ANY WARRANTY; without even the implied warranty of
030// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
031// Lesser General Public License for more details.
032//
033// You should have received a copy of the GNU Lesser General Public
034// License along with this library; if not, write to the Free Software
035// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
036//
037// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
038// jataylor@hairyfatguy.com
039//------------------------------------------------------------------------------
040
041public class JukesCantorModel implements DistanceMatrixModel
042{
043   //---------------------------------------------------------------------------
044   /**
045    Returns the model name as required by the DistanceMatrixModel interface.
046    */
047   public String name()
048   {
049      return "Jukes-Cantor";
050   }
051
052   //---------------------------------------------------------------------------
053   /**
054    Calculates the Jukes-Cantor distance score for a pair of sequences.
055    The sequences must be aligned to be the same length.
056    */
057   public float calculateDistance(BioSequence inSeq1, BioSequence inSeq2)
058   {
059      if (inSeq1.length() != inSeq2.length())
060      {
061         throw new RuntimeException("The length of seq1 [" + inSeq1.length() + "] and seq2 [" + inSeq2.length() + "] don't match!");
062      }
063
064      int mismatches = 0;
065      int length = 0;
066
067      String seq1 = inSeq1.getSequence().toUpperCase();
068      String seq2 = inSeq2.getSequence().toUpperCase();
069
070      for (int i = 0; i < seq1.length(); i++)
071      {
072         char char1 = seq1.charAt(i);
073         char char2 = seq2.charAt(i);
074         if (char1 != char2) mismatches++;
075         // If both seqs are gapped at this position, don't count it against length.
076         if (! (char1 == '-' && char2 == '-')) length++;
077      }
078
079      return (float) (- (3/4f) * Math.log(1 - (4/3f) *(mismatches / (float) length)));
080   }
081}