001package com.hfg.bio.phylogeny; 002 003import com.hfg.bio.seq.BioSequence; 004 005//------------------------------------------------------------------------------ 006/** 007 Implementation of the Jukes-Cantor distance matrix calculation model. 008 It assumes that all changes are equally likely. 009 <p style='font-style:italic'> 010 Jukes, T.H., Cantor, C.R. (1969). "Evolution of protein molecules". 011 In Munro, H.N.. Mammalian protein metabolism. New York: Academic Press. pp. 21-123. 012 </p> 013 <p> 014 For a quick overview, see <a href='http://en.wikipedia.org/wiki/Jukes-Cantor_model'> 015 http://en.wikipedia.org/wiki/Jukes-Cantor_model</a> 016 </p> 017 018 @author J. Alex Taylor, hairyfatguy.com 019 */ 020//------------------------------------------------------------------------------ 021// com.hfg Library 022// 023// This library is free software; you can redistribute it and/or 024// modify it under the terms of the GNU Lesser General Public 025// License as published by the Free Software Foundation; either 026// version 2.1 of the License, or (at your option) any later version. 027// 028// This library is distributed in the hope that it will be useful, 029// but WITHOUT ANY WARRANTY; without even the implied warranty of 030// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 031// Lesser General Public License for more details. 032// 033// You should have received a copy of the GNU Lesser General Public 034// License along with this library; if not, write to the Free Software 035// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 036// 037// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 038// jataylor@hairyfatguy.com 039//------------------------------------------------------------------------------ 040 041public class JukesCantorModel implements DistanceMatrixModel 042{ 043 //--------------------------------------------------------------------------- 044 /** 045 Returns the model name as required by the DistanceMatrixModel interface. 046 */ 047 public String name() 048 { 049 return "Jukes-Cantor"; 050 } 051 052 //--------------------------------------------------------------------------- 053 /** 054 Calculates the Jukes-Cantor distance score for a pair of sequences. 055 The sequences must be aligned to be the same length. 056 */ 057 public float calculateDistance(BioSequence inSeq1, BioSequence inSeq2) 058 { 059 if (inSeq1.length() != inSeq2.length()) 060 { 061 throw new RuntimeException("The length of seq1 [" + inSeq1.length() + "] and seq2 [" + inSeq2.length() + "] don't match!"); 062 } 063 064 int mismatches = 0; 065 int length = 0; 066 067 String seq1 = inSeq1.getSequence().toUpperCase(); 068 String seq2 = inSeq2.getSequence().toUpperCase(); 069 070 for (int i = 0; i < seq1.length(); i++) 071 { 072 char char1 = seq1.charAt(i); 073 char char2 = seq2.charAt(i); 074 if (char1 != char2) mismatches++; 075 // If both seqs are gapped at this position, don't count it against length. 076 if (! (char1 == '-' && char2 == '-')) length++; 077 } 078 079 return (float) (- (3/4f) * Math.log(1 - (4/3f) *(mismatches / (float) length))); 080 } 081}