001package com.hfg.bio.phylogeny; 002 003import com.hfg.bio.seq.BioSequence; 004 005//------------------------------------------------------------------------------ 006/** 007 Distance matrix calculation method that is simply the fraction of mismatches. 008 <pre> 009 distance = mismatches / aligned_length 010 </pre> 011 If both sequences contain a gap at a given position, it is not counted towards 012 the alignment length. 013 014 @author J. Alex Taylor, hairyfatguy.com 015 */ 016//------------------------------------------------------------------------------ 017// com.hfg Library 018// 019// This library is free software; you can redistribute it and/or 020// modify it under the terms of the GNU Lesser General Public 021// License as published by the Free Software Foundation; either 022// version 2.1 of the License, or (at your option) any later version. 023// 024// This library is distributed in the hope that it will be useful, 025// but WITHOUT ANY WARRANTY; without even the implied warranty of 026// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 027// Lesser General Public License for more details. 028// 029// You should have received a copy of the GNU Lesser General Public 030// License along with this library; if not, write to the Free Software 031// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 032// 033// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 034// jataylor@hairyfatguy.com 035//------------------------------------------------------------------------------ 036 037public class UncorrectedModel implements DistanceMatrixModel 038{ 039 //--------------------------------------------------------------------------- 040 public UncorrectedModel() 041 { 042 } 043 044 //--------------------------------------------------------------------------- 045 /** 046 Returns the model name as required by the DistanceMatrixModel interface. 047 */ 048 public String name() 049 { 050 return "Uncorrected"; 051 } 052 053 //--------------------------------------------------------------------------- 054 /** 055 Calculates the distance score for a pair of sequences. 056 The sequences must be aligned to be the same length. 057 If both sequences contain a gap at a given position, it is not counted towards 058 the alignment length. 059 */ 060 public float calculateDistance(BioSequence inSeq1, BioSequence inSeq2) 061 { 062 int mismatches = 0; 063 int comparisonLength = 0; 064 065 String seq1 = inSeq1.getSequence(); 066 String seq2 = inSeq2.getSequence(); 067 068 if (seq1.length() != seq2.length()) 069 { 070 throw new RuntimeException("The length of seq1 [" + seq1.length() + "] and seq2 [" + seq2.length() + "] don't match!"); 071 } 072 073 // This way will avoid a NaN result from comparing two all-gapped sequences. 074 075 float distance = 0.0f; 076 if (! seq1.equals(seq2)) 077 { 078 for (int i = 0; i < seq1.length(); i++) 079 { 080 char char1 = seq1.charAt(i); 081 char char2 = seq2.charAt(i); 082 if (char1 != char2) mismatches++; 083 // If both seqs are gapped at this position, don't count it against length. 084 if (!(char1 == '-' && char2 == '-')) comparisonLength++; 085 } 086 087 distance = mismatches / (float) comparisonLength; 088 } 089 090 return distance; 091 } 092}