001package com.hfg.bio.phylogeny;
002
003import java.util.HashMap;
004import java.util.Map;
005
006import com.hfg.network.Edge;
007
008//------------------------------------------------------------------------------
009/**
010 * UPGMA (unweighted pair-group method using arithmetic averages) method of
011 * phylogenetic tree construction.
012 * <p>
013 * See <a href='http://en.wikipedia.org/wiki/UPGMA'>wikipedia</a>.
014 * Note that distances in the resulting tree will not exactly match those from
015 * the input distance matrix.
016 * </p>
017 * @author J. Alex Taylor, hairyfatguy.com
018 */
019//------------------------------------------------------------------------------
020// com.hfg Library
021//
022// This library is free software; you can redistribute it and/or
023// modify it under the terms of the GNU Lesser General Public
024// License as published by the Free Software Foundation; either
025// version 2.1 of the License, or (at your option) any later version.
026//
027// This library is distributed in the hope that it will be useful,
028// but WITHOUT ANY WARRANTY; without even the implied warranty of
029// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
030// Lesser General Public License for more details.
031//
032// You should have received a copy of the GNU Lesser General Public
033// License along with this library; if not, write to the Free Software
034// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
035//
036// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
037// jataylor@hairyfatguy.com
038//------------------------------------------------------------------------------
039
040public class UPGMA implements TreeMethod
041{
042
043   //**************************************************************************
044   // CONSTRUCTORS
045   //**************************************************************************
046
047   //---------------------------------------------------------------------------
048   public UPGMA()
049   {
050   }
051
052   //**************************************************************************
053   // PUBLIC METHODS
054   //**************************************************************************
055
056   //--------------------------------------------------------------------------
057   @Override
058   public String toString()
059   {
060      return getClass().getSimpleName();
061   }
062
063   //--------------------------------------------------------------------------
064   @Override
065   public boolean equals(Object inObj2)
066   {
067      return (inObj2 != null
068              && inObj2.getClass().equals(getClass()));
069   }
070
071   //--------------------------------------------------------------------------
072   public PhylogeneticTree constructTree(DistanceMatrix inDistanceMatrix)
073   {
074      int nodeIndex = 1;
075      Map<String, PhyloNode> nodeMap = new HashMap<>();
076
077      DistanceMatrix matrix = inDistanceMatrix;
078      if (! matrix.isConsumable())
079      {
080         matrix = matrix.clone();
081      }
082
083      while (matrix.size() > 1)
084      {
085         Edge<String> shortestEdge = matrix.getShortestEdge();
086         String minKey1 = shortestEdge.getFrom();
087         String minKey2 = shortestEdge.getTo();
088         float  minDistance = shortestEdge.getDistance();
089
090         String newNodeName = "_" + (nodeIndex++);
091         PhyloNode newNode = new PhyloNode();
092         nodeMap.put(newNodeName, newNode);
093
094         if (! nodeMap.containsKey(minKey1))
095         {
096            PhyloNode childNode = new PhyloNode().setLabel(minKey1);
097            newNode.addEdge(childNode, minDistance / 2);
098         }
099         else
100         {
101            float distance = (minDistance / 2) - nodeMap.get(minKey1).getMaxDistanceToLeaf();
102            newNode.addEdge(nodeMap.get(minKey1), distance);
103            nodeMap.remove(minKey1);
104         }
105
106         if (! nodeMap.containsKey(minKey2))
107         {
108            PhyloNode childNode = new PhyloNode().setLabel(minKey2);
109            newNode.addEdge(childNode, minDistance / 2);
110         }
111         else
112         {
113            float distance = (minDistance / 2) - nodeMap.get(minKey2).getMaxDistanceToLeaf();
114            newNode.addEdge(nodeMap.get(minKey2), distance);
115            nodeMap.remove(minKey2);
116         }
117
118         // Reduce the matrix
119         matrix.addKey(newNodeName);
120         for (String key : matrix.keySet())
121         {
122            if (key.equals(minKey1) || key.equals(minKey2) || key.equals(newNodeName)) continue;
123
124            float avgDistance = (matrix.getDistance(minKey1, key) + matrix.getDistance(minKey2, key)) / 2;
125            matrix.setDistance(newNodeName, key, avgDistance);
126         }
127
128         // It is (slightly) faster to remove both keys at once
129 //        Set<String> keys = new HashSet<>(2);
130 //        keys.add(minKey1);
131 //        keys.add(minKey2);
132 //        matrix.removeKeys(keys);
133         matrix.removeKey(minKey1);
134         matrix.removeKey(minKey2);
135      }
136
137      PhylogeneticTree tree = new PhylogeneticTree();
138      tree.setRootNode(nodeMap.get(matrix.keySet().iterator().next()));
139      tree.orderByNodeCount();
140
141      matrix.setIsConsumed();
142
143      return tree;
144   }
145}