001package com.hfg.bio.seq.alignment;
002
003import com.hfg.bio.HfgBioXML;
004import com.hfg.bio.seq.BioSequence;
005import com.hfg.bio.seq.SeqLocation;
006import com.hfg.exception.ProgrammingException;
007import com.hfg.util.StringUtil;
008import com.hfg.util.collection.CollectionUtil;
009import com.hfg.xml.XMLNode;
010import com.hfg.xml.XMLTag;
011
012//------------------------------------------------------------------------------
013/**
014 Base class for the aligned sequences that compose a pairwise sequence alignment.
015 <div>
016 @author J. Alex Taylor, hairyfatguy.com
017 </div>
018 */
019//------------------------------------------------------------------------------
020// com.hfg XML/HTML Coding Library
021//
022// This library is free software; you can redistribute it and/or
023// modify it under the terms of the GNU Lesser General Public
024// License as published by the Free Software Foundation; either
025// version 2.1 of the License, or (at your option) any later version.
026//
027// This library is distributed in the hope that it will be useful,
028// but WITHOUT ANY WARRANTY; without even the implied warranty of
029// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
030// Lesser General Public License for more details.
031//
032// You should have received a copy of the GNU Lesser General Public
033// License along with this library; if not, write to the Free Software
034// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
035//
036// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
037// jataylor@hairyfatguy.com
038//------------------------------------------------------------------------------
039
040public abstract class AlignedSeq implements Cloneable
041{
042   private BioSequence mSeq;
043   private String      mAlignedSeq;
044   private SeqLocation mLocation;
045
046   //###########################################################################
047   // CONSTRUCTORS
048   //###########################################################################
049
050   //---------------------------------------------------------------------------
051   public AlignedSeq(BioSequence inSeq, CharSequence inAlignedSeq, int inStart)
052   {
053      init(inSeq, inAlignedSeq, inStart);
054   }
055
056   //--------------------------------------------------------------------------
057   public AlignedSeq(XMLNode inXMLNode)
058   {
059      int start = Integer.parseInt(inXMLNode.getAttributeValue(HfgBioXML.START_ATT));
060
061      XMLNode seqTag = inXMLNode.getOptionalSubtagByName(HfgBioXML.QUERY);
062      if (null == seqTag)
063      {
064         seqTag = inXMLNode.getOptionalSubtagByName(HfgBioXML.SUBJECT);
065      }
066      
067      BioSequence seq = (seqTag != null && CollectionUtil.hasValues(seqTag.getSubtags()) ? BioSequence.instantiate((XMLNode) seqTag.getSubtags().get(0)) : null);
068
069      init(seq, inXMLNode.getUnescapedContent(), start);
070   }
071
072   //---------------------------------------------------------------------------
073   private void init(BioSequence inSeq, CharSequence inAlignedSeq, int inStart)
074   {
075      mSeq        = inSeq;
076      mAlignedSeq = inAlignedSeq.toString();
077      
078      mLocation   = new SeqLocation(inStart,
079                                    mAlignedSeq.length() - StringUtil.getCharCount(mAlignedSeq, '-') + inStart - 1);
080   }
081
082   //###########################################################################
083   // PUBLIC METHODS
084   //###########################################################################
085
086   //---------------------------------------------------------------------------
087   @Override
088   public String toString()
089   {
090      StringBuilder buffer = new StringBuilder();
091      if (mLocation != null)
092      {
093         buffer.append(mLocation);
094      }
095
096      return buffer.toString();
097   }
098
099   //--------------------------------------------------------------------------
100   public XMLNode toXMLNode()
101   {
102      XMLTag tag = new XMLTag(this instanceof AlignedQuery ? HfgBioXML.ALIGNED_QUERY : HfgBioXML.ALIGNED_SUBJECT);
103
104      tag.setAttribute(HfgBioXML.START_ATT, mLocation.getStart());
105      tag.setContent(mAlignedSeq);
106
107      if (mSeq != null)
108      {
109         XMLTag seqTag = new XMLTag(this instanceof AlignedQuery ? HfgBioXML.QUERY : HfgBioXML.SUBJECT);
110         seqTag.addSubtag(mSeq.toXMLNode());
111
112         tag.addSubtag(seqTag);
113      }
114
115      return tag;
116   }
117
118   //---------------------------------------------------------------------------
119   @Override
120   public AlignedSeq clone()
121   {
122      AlignedSeq cloneObj;
123      try
124      {
125         cloneObj = (AlignedSeq) super.clone();
126      }
127      catch (CloneNotSupportedException e)
128      {
129         throw new ProgrammingException(e);
130      }
131
132      if (mSeq != null)
133      {
134         cloneObj.mSeq = mSeq.clone();
135
136      }
137
138      if (mLocation != null)
139      {
140         cloneObj.mLocation = mLocation.clone();
141
142      }
143
144      return cloneObj;
145   }
146
147   //---------------------------------------------------------------------------
148   public AlignedSeq setSeq(BioSequence inValue)
149   {
150      mSeq = inValue;
151      return this;
152   }
153
154   //---------------------------------------------------------------------------
155   public BioSequence getSeq()
156   {
157      return mSeq;
158   }
159
160   //---------------------------------------------------------------------------
161   public AlignedSeq setAlignedSeq(CharSequence inValue)
162   {
163      mAlignedSeq = inValue.toString();
164      return this;
165   }
166
167   //---------------------------------------------------------------------------
168   public String getAlignedSeq()
169   {
170      return mAlignedSeq;
171   }
172
173   //---------------------------------------------------------------------------
174   public SeqLocation getSeqLocation()
175   {
176      return mLocation;
177   }
178
179   //---------------------------------------------------------------------------
180   public int getIndexedPosition(int inLinearPosition)
181   {
182      int indexedPosition = 0;
183      int linearPosition = 0;
184
185      int residueCount = 0;
186      int index = 0;
187      for (char residue : mAlignedSeq.toCharArray())
188      {
189         if (residue != '-')
190         {
191            residueCount++;
192         }
193
194         linearPosition = mLocation.getStart() + residueCount - 1 + (residue != '-' ? 0 : 1);
195         if (linearPosition == inLinearPosition)
196         {
197            indexedPosition = index;
198            break;
199         }
200
201         index++;
202      }
203
204      return indexedPosition;
205   }
206
207   //---------------------------------------------------------------------------
208   public int getLinearPosition(int inIndexedPosition)
209   {
210      int linearPosition = 0;
211
212      int residueCount = 0;
213      int index = 0;
214      for (char residue : mAlignedSeq.toCharArray())
215      {
216         if (residue != '-')
217         {
218            residueCount++;
219         }
220
221         if (index == inIndexedPosition)
222         {
223            linearPosition = mLocation.getStart() + residueCount - 1 + (residue != '-' ? 0 : 1);
224            break;
225         }
226
227         index++;
228      }
229
230      return linearPosition;
231   }
232
233   //---------------------------------------------------------------------------
234   public int getNTerminalGapLength()
235   {
236      int gapLength = 0;
237      for (int i = 0; i < mAlignedSeq.length(); i++)
238      {
239         if ('-' == mAlignedSeq.charAt(i))
240         {
241            gapLength++;
242         }
243         else
244         {
245            break;
246         }
247      }
248
249      return gapLength;
250   }
251
252   //---------------------------------------------------------------------------
253   public int getCTerminalGapLength()
254   {
255      int gapLength = 0;
256      for (int i = mAlignedSeq.length() - 1; i > 0; i--)
257      {
258         if ('-' == mAlignedSeq.charAt(i))
259         {
260            gapLength++;
261         }
262         else
263         {
264            break;
265         }
266      }
267
268      return gapLength;
269   }
270}