001package com.hfg.bio;
002
003import java.util.*;
004
005import com.hfg.exception.UnmodifyableObjectException;
006import com.hfg.util.CompareUtil;
007import com.hfg.xml.XMLNode;
008import com.hfg.xml.XMLTag;
009import com.hfg.util.StringUtil;
010
011
012
013//------------------------------------------------------------------------------
014/**
015 Mapping of sequence characters to AminoAcids.
016 <div>
017  @author J. Alex Taylor, hairyfatguy.com
018 </div>
019 */
020//------------------------------------------------------------------------------
021// com.hfg XML/HTML Coding Library
022//
023// This library is free software; you can redistribute it and/or
024// modify it under the terms of the GNU Lesser General Public
025// License as published by the Free Software Foundation; either
026// version 2.1 of the License, or (at your option) any later version.
027//
028// This library is distributed in the hope that it will be useful,
029// but WITHOUT ANY WARRANTY; without even the implied warranty of
030// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
031// Lesser General Public License for more details.
032//
033// You should have received a copy of the GNU Lesser General Public
034// License along with this library; if not, write to the Free Software
035// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
036//
037// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
038// jataylor@hairyfatguy.com
039//------------------------------------------------------------------------------
040
041public class AminoAcidSet implements Set<AminoAcid>, Cloneable
042{
043   //##########################################################################
044   // PUBLIC FIELDS
045   //##########################################################################
046
047   public static final AminoAcidSet STANDARD_LC = new AminoAcidSet();
048   public static final AminoAcidSet STANDARD_UC = new AminoAcidSet();
049
050   /**
051    Contains both upper and lowercase standard mappings.
052    */
053   public static final AminoAcidSet STANDARD    = new AminoAcidSet();
054
055   /**
056    Contains both upper and lowercase standard mappings plus B, J, and Z ambiguity codes.
057    */
058   public static final AminoAcidSet EXTENDED    = new AminoAcidSet();
059
060   static
061   {
062      STANDARD_LC.setMapping('a', AminoAcid.ALANINE);
063      STANDARD_LC.setMapping('c', AminoAcid.CYSTEINE);
064      STANDARD_LC.setMapping('d', AminoAcid.ASPARTIC_ACID);
065      STANDARD_LC.setMapping('e', AminoAcid.GLUTAMIC_ACID);
066      STANDARD_LC.setMapping('f', AminoAcid.PHENYLALANINE);
067      STANDARD_LC.setMapping('g', AminoAcid.GLYCINE);
068      STANDARD_LC.setMapping('h', AminoAcid.HISTIDINE);
069      STANDARD_LC.setMapping('i', AminoAcid.ISOLEUCINE);
070      STANDARD_LC.setMapping('k', AminoAcid.LYSINE);
071      STANDARD_LC.setMapping('l', AminoAcid.LEUCINE);
072      STANDARD_LC.setMapping('m', AminoAcid.METHIONINE);
073      STANDARD_LC.setMapping('n', AminoAcid.ASPARAGINE);
074      STANDARD_LC.setMapping('p', AminoAcid.PROLINE);
075      STANDARD_LC.setMapping('q', AminoAcid.GLUTAMINE);
076      STANDARD_LC.setMapping('r', AminoAcid.ARGININE);
077      STANDARD_LC.setMapping('s', AminoAcid.SERINE);
078      STANDARD_LC.setMapping('t', AminoAcid.THREONIE);
079      STANDARD_LC.setMapping('v', AminoAcid.VALINE);
080      STANDARD_LC.setMapping('w', AminoAcid.TRYPTOPHAN);
081      STANDARD_LC.setMapping('y', AminoAcid.TYROSINE);
082      STANDARD_LC.setMapping('x', AminoAcid.UNDEFINED);
083      STANDARD_LC.setMapping('*', AminoAcid.STOP);
084      STANDARD_LC.setName("Standard (lower case)");
085      STANDARD_LC.lock();
086
087      STANDARD_UC.setMapping('A', AminoAcid.ALANINE);
088      STANDARD_UC.setMapping('C', AminoAcid.CYSTEINE);
089      STANDARD_UC.setMapping('D', AminoAcid.ASPARTIC_ACID);
090      STANDARD_UC.setMapping('E', AminoAcid.GLUTAMIC_ACID);
091      STANDARD_UC.setMapping('F', AminoAcid.PHENYLALANINE);
092      STANDARD_UC.setMapping('G', AminoAcid.GLYCINE);
093      STANDARD_UC.setMapping('H', AminoAcid.HISTIDINE);
094      STANDARD_UC.setMapping('I', AminoAcid.ISOLEUCINE);
095      STANDARD_UC.setMapping('K', AminoAcid.LYSINE);
096      STANDARD_UC.setMapping('L', AminoAcid.LEUCINE);
097      STANDARD_UC.setMapping('M', AminoAcid.METHIONINE);
098      STANDARD_UC.setMapping('N', AminoAcid.ASPARAGINE);
099      STANDARD_UC.setMapping('P', AminoAcid.PROLINE);
100      STANDARD_UC.setMapping('Q', AminoAcid.GLUTAMINE);
101      STANDARD_UC.setMapping('R', AminoAcid.ARGININE);
102      STANDARD_UC.setMapping('S', AminoAcid.SERINE);
103      STANDARD_UC.setMapping('T', AminoAcid.THREONIE);
104      STANDARD_UC.setMapping('V', AminoAcid.VALINE);
105      STANDARD_UC.setMapping('W', AminoAcid.TRYPTOPHAN);
106      STANDARD_UC.setMapping('Y', AminoAcid.TYROSINE);
107      STANDARD_UC.setMapping('X', AminoAcid.UNDEFINED);
108      STANDARD_UC.setMapping('*', AminoAcid.STOP);
109      STANDARD_UC.setName("Standard (upper case)");
110      STANDARD_UC.lock();
111
112      STANDARD.setMappings(STANDARD_LC);
113      STANDARD.setMappings(STANDARD_UC);
114      STANDARD.setName("Standard");
115      STANDARD.lock();
116
117      EXTENDED.setMappings(STANDARD);
118      EXTENDED.setMapping('B', AminoAcid.ASP_ASN_AVG);
119      EXTENDED.setMapping('b', AminoAcid.ASP_ASN_AVG);
120      EXTENDED.setMapping('J', AminoAcid.ILE_LEU_AVG);
121      EXTENDED.setMapping('j', AminoAcid.ILE_LEU_AVG);
122      EXTENDED.setMapping('Z', AminoAcid.GLU_GLN_AVG);
123      EXTENDED.setMapping('z', AminoAcid.GLU_GLN_AVG);
124      EXTENDED.setName("Extended");
125      EXTENDED.lock();
126   }
127
128
129   //##########################################################################
130   // PRIVATE FIELDS
131   //##########################################################################
132
133   private String         mName;
134   private NTerminalGroup mNTerminalGroup = NTerminalGroup.UNMODIFIED_N_TERMINUS;
135   private CTerminalGroup mCTerminalGroup = CTerminalGroup.UNMODIFIED_C_TERMINUS;
136   private boolean        mLocked;
137   private Map<Character, AminoAcid> mMap = new HashMap<>();
138
139   // Since the map values may have the same AA mapped to different characters,
140   // we will cache the unique set of AA's for performance.
141   private Collection<AminoAcid> mCachedSet;
142
143   //##########################################################################
144   // CONSTRUCTORS
145   //##########################################################################
146
147   //--------------------------------------------------------------------------
148   public AminoAcidSet()
149   {
150
151   }
152
153   //--------------------------------------------------------------------------
154   public AminoAcidSet(AminoAcidSet inAASet)
155   {
156      setMappings(inAASet);
157      setNTerminalGroup(inAASet.getNTerminalGroup());
158      setCTerminalGroup(inAASet.getCTerminalGroup());
159   }
160
161   //--------------------------------------------------------------------------
162   public static AminoAcidSet instantiate(XMLNode inXMLNode)
163   {
164      if (!inXMLNode.getTagName().equals(HfgBioXML.AASET_TAG))
165      {
166         throw new RuntimeException("Cannot construct an " + AminoAcidSet.class.getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!");
167      }
168
169      AminoAcidSet aaSet = null;
170
171      String name = inXMLNode.getAttributeValue(HfgBioXML.NAME_ATT);
172
173      XMLNode aminoAcidsTag = inXMLNode.getOptionalSubtagByName(HfgBioXML.AMINO_ACIDS_TAG);
174      if (aminoAcidsTag != null)
175      {
176         aaSet = new AminoAcidSet().setName(name);
177         for (XMLNode subtag : aminoAcidsTag.getXMLNodeSubtags())
178         {
179            aaSet.setMapping(subtag.getAttributeValue(HfgBioXML.MAPPING_ATT).charAt(0), new AminoAcid(subtag));
180         }
181
182         aaSet.setNTerminalGroup(new NTerminalGroup((XMLNode) inXMLNode.getRequiredSubtagByName(HfgBioXML.NTERM_TAG)));
183         aaSet.setCTerminalGroup(new CTerminalGroup((XMLNode) inXMLNode.getRequiredSubtagByName(HfgBioXML.CTERM_TAG)));
184      }
185      else
186      {
187         // Pre-defined amino acid sets can be specified with just a name.
188         for (AminoAcidSet predefinedAASet : new AminoAcidSet[] { STANDARD, STANDARD_LC, STANDARD_UC })
189         {
190            if (name.equals(predefinedAASet.getName()))
191            {
192               aaSet = predefinedAASet;
193               break;
194            }
195         }
196      }
197
198      return aaSet;
199   }
200
201   //##########################################################################
202   // PUBLIC METHODS
203   //##########################################################################
204
205   //--------------------------------------------------------------------------
206   public XMLNode toXMLNode()
207   {
208      XMLNode node = new XMLTag(HfgBioXML.AASET_TAG);
209      if (StringUtil.isSet(getName()))
210      {
211         node.setAttribute(HfgBioXML.NAME_ATT, getName());
212      }
213
214      if (! isPredefinedSet())
215      {
216         XMLNode aminoAcidsTag = new XMLTag(HfgBioXML.AMINO_ACIDS_TAG);
217         node.addSubtag(aminoAcidsTag);
218         for (Map.Entry<Character, AminoAcid> mapping : mMap.entrySet())
219         {
220            XMLNode aaTag = mapping.getValue().toXMLNode();
221            aaTag.setAttribute(HfgBioXML.MAPPING_ATT, mapping.getKey() + "");
222            aminoAcidsTag.addSubtag(aaTag);
223         }
224
225         node.addSubtag(mNTerminalGroup.toXMLNode());
226         node.addSubtag(mCTerminalGroup.toXMLNode());
227      }
228
229      return node;
230   }
231
232   //--------------------------------------------------------------------------
233   public AminoAcidSet clone()
234   {
235      AminoAcidSet newObj;
236      try
237      {
238         newObj = (AminoAcidSet) super.clone();
239      }
240      catch (CloneNotSupportedException e)
241      {
242         throw new RuntimeException(e);
243      }
244
245      newObj.mMap = new HashMap<>(mMap);
246      newObj.mLocked = false;
247
248      return newObj;
249   }
250
251   //--------------------------------------------------------------------------
252   public boolean isLocked()
253   {
254      return mLocked;
255   }
256
257   //--------------------------------------------------------------------------
258   public void lock()
259   {
260      mLocked = true;
261   }
262
263   //--------------------------------------------------------------------------
264   public AminoAcidSet setName(String inValue)
265   {
266      if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
267      mName = inValue;
268
269      return this;
270   }
271
272   //--------------------------------------------------------------------------
273   public String getName()
274   {
275      return mName;
276   }
277
278   //--------------------------------------------------------------------------
279   public AminoAcidSet setNTerminalGroup(NTerminalGroup inValue)
280   {
281      if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
282      mNTerminalGroup = inValue;
283      return this;
284   }
285
286   //--------------------------------------------------------------------------
287   public NTerminalGroup getNTerminalGroup()
288   {
289      return mNTerminalGroup;
290   }
291
292   //--------------------------------------------------------------------------
293   public AminoAcidSet setCTerminalGroup(CTerminalGroup inValue)
294   {
295      if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
296      mCTerminalGroup = inValue;
297      return this;
298   }
299
300   //--------------------------------------------------------------------------
301   public CTerminalGroup getCTerminalGroup()
302   {
303      return mCTerminalGroup;
304   }
305
306   //--------------------------------------------------------------------------
307   public boolean setMapping(char inChar, AminoAcid inAA)
308   {
309      if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
310
311      boolean result = mMap.containsKey(inChar);
312
313      mMap.put(inChar, inAA);
314      clearCachedValues();
315
316      return result;
317   }
318
319   //--------------------------------------------------------------------------
320   public void setMappings(AminoAcidSet inAASet)
321   {
322      if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
323
324      if (inAASet != null)
325      {
326         Iterator iter = inAASet.mapIterator();
327         while (iter.hasNext())
328         {
329            Character letter = (Character) iter.next();
330            mMap.put(letter, inAASet.getAA(letter));
331         }
332
333         clearCachedValues();
334      }
335   }
336
337   //--------------------------------------------------------------------------
338   public Collection<AminoAcid> getAminoAcids()
339   {
340      if (null == mCachedSet)
341      {
342         List<AminoAcid> aaList = new ArrayList<>(new HashSet<>(mMap.values()));
343         Collections.sort(aaList, AminoAcid.AA_ORDINAL_COMPARATOR);
344
345         mCachedSet = Collections.unmodifiableCollection(aaList);;
346      }
347
348      return mCachedSet;
349   }
350
351   //--------------------------------------------------------------------------
352   /**
353    Returns an iteration of the Characters mapped to amino acids
354    @return iteration of the Characters mapped to amino acids
355    */
356   public Iterator<Character> mapIterator()
357   {
358      return mMap.keySet().iterator();
359   }
360
361   //--------------------------------------------------------------------------
362   public Set<Character> getResidueChars()
363   {
364      return Collections.unmodifiableSet(mMap.keySet());
365   }
366
367   //--------------------------------------------------------------------------
368   public Set<Character> getMapping(AminoAcid inAA)
369   {
370      Set<Character> residues = new HashSet<>(20);
371      for (Character mappedResidue : mMap.keySet())
372      {
373         if (mMap.get(mappedResidue).equals(inAA))
374         {
375            residues.add(mappedResidue);
376         }
377      }
378
379      return residues;
380   }
381
382   //--------------------------------------------------------------------------
383   public AminoAcid getAA(char inResidue)
384   {
385      return getAA(new Character(inResidue));
386   }
387
388   //--------------------------------------------------------------------------
389   public AminoAcid getAA(Character inResidue)
390   {
391      return mMap.get(inResidue);
392   }
393
394
395   //--------------------------------------------------------------------------
396   public int size()
397   {
398      return mMap.size();
399   }
400
401   //--------------------------------------------------------------------------
402   @Override
403   public boolean isEmpty()
404   {
405      return mMap.isEmpty();
406   }
407
408   //--------------------------------------------------------------------------
409   @Override
410   public boolean contains(Object inObject)
411   {
412      return inObject instanceof AminoAcid ? mMap.values().contains(inObject) : false;
413   }
414
415   //--------------------------------------------------------------------------
416   @Override
417   public Iterator<AminoAcid> iterator()
418   {
419      return getAminoAcids().iterator(); // Want to reduce values to the unique set before iterating.
420   }
421
422   //--------------------------------------------------------------------------
423   @Override
424   public Object[] toArray()
425   {
426      return (Object[]) getAminoAcids().toArray();
427   }
428
429   //--------------------------------------------------------------------------
430   @Override
431   public <T> T[] toArray(T[] inArray)
432   {
433      if (inArray.length < size())
434      {
435         return (T[]) Arrays.copyOf(getAminoAcids().toArray(), size(), inArray.getClass());
436      }
437
438      System.arraycopy(getAminoAcids().toArray(), 0, inArray, 0, size());
439      return inArray;
440   }
441
442   //--------------------------------------------------------------------------
443   @Override
444   public boolean add(AminoAcid inAminoAcid)
445   {
446      clearCachedValues();
447      return setMapping(inAminoAcid.getOneLetterCode(), inAminoAcid);
448   }
449
450   //--------------------------------------------------------------------------
451   @Override
452   public boolean remove(Object inObj)
453   {
454      if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
455
456      boolean result = false;
457      if (inObj instanceof AminoAcid)
458      {
459         for (Character key : mMap.keySet())
460         {
461            if (mMap.get(key).equals(inObj))
462            {
463               mMap.remove(key);
464               result = true;
465               clearCachedValues();
466            }
467         }
468      }
469
470      return result;
471   }
472
473   //--------------------------------------------------------------------------
474   @Override
475   public boolean containsAll(Collection<?> inCollection)
476   {
477      boolean result = true;
478      for (Object obj : inCollection)
479      {
480         if (! contains(obj))
481         {
482            result = false;
483            break;
484         }
485      }
486
487      return result;
488   }
489
490   //--------------------------------------------------------------------------
491   @Override
492   public boolean addAll(Collection<? extends AminoAcid> inCollection)
493   {
494      boolean result = false;
495      for (AminoAcid obj : inCollection)
496      {
497         if (add(obj))
498         {
499            result = true;
500         }
501      }
502
503      return result;
504   }
505
506   //--------------------------------------------------------------------------
507   @Override
508   public boolean retainAll(Collection<?> inCollection)
509   {
510      if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
511
512      boolean result = false;
513      for (Character key : mMap.keySet())
514      {
515         if (! inCollection.contains(mMap.get(key)))
516         {
517            mMap.remove(key);
518            result = true;
519            clearCachedValues();
520         }
521      }
522
523      return result;
524   }
525
526   //--------------------------------------------------------------------------
527   @Override
528   public boolean removeAll(Collection<?> inCollection)
529   {
530      boolean result = false;
531      for (Object obj : inCollection)
532      {
533         if (remove(obj))
534         {
535            result = true;
536         }
537      }
538
539      return result;
540   }
541
542   //--------------------------------------------------------------------------
543   @Override
544   public void clear()
545   {
546      if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!");
547
548      mMap.clear();
549
550      clearCachedValues();
551   }
552
553   //--------------------------------------------------------------------------
554   @Override
555   public boolean equals(Object inObj2)
556   {
557      boolean result = true;
558      if (inObj2 instanceof AminoAcidSet)
559      {
560         AminoAcidSet aaSet2 = (AminoAcidSet) inObj2;
561
562         if (aaSet2.size() != size())
563         {
564            result = false;
565         }
566         else
567         {
568            for (Character residue : mMap.keySet())
569            {
570               if (! getAA(residue).equals(aaSet2.getAA(residue)))
571               {
572                  result = false;
573                  break;
574               }
575            }
576
577            if (result
578                && ! getNTerminalGroup().equals(aaSet2.getNTerminalGroup()))
579            {
580               result = false;
581            }
582
583            if (result
584                && ! getAminoAcids().containsAll(aaSet2.getAminoAcids()))
585            {
586               result = false;
587            }
588         }
589      }
590      else
591      {
592         result = false;
593      }
594
595      return result;
596   }
597
598   //--------------------------------------------------------------------------
599   @Override
600   public int hashCode()
601   {
602      int hashCode = 0;
603      for (AminoAcid aa : getAminoAcids())
604      {
605         hashCode += 31 * aa.hashCode();
606      }
607
608      hashCode += 31 * getNTerminalGroup().hashCode();
609      hashCode += 31 * getCTerminalGroup().hashCode();
610
611      return hashCode;
612   }
613
614   //--------------------------------------------------------------------------
615   public AminoAcid remove(Character inChar)
616   {
617      clearCachedValues();
618
619      return mMap.remove(inChar);
620   }
621
622   //--------------------------------------------------------------------------
623   public AminoAcidSet subtract(AminoAcidSet inAminoAcidSet2)
624   {
625      AminoAcidSet subtractedSet = clone();
626
627      if (inAminoAcidSet2 != null)
628      {
629         for (Character aaChar : mMap.keySet())
630         {
631            AminoAcid currentAA = mMap.get(aaChar);
632            AminoAcid oldAA = inAminoAcidSet2.getAA(aaChar);
633
634            if (currentAA != null
635                && oldAA != null)
636            {
637               int comparison = CompareUtil.compare(currentAA.getElementalComposition(), oldAA.getElementalComposition());
638               if (0 == comparison)
639               {
640                  if (StringUtil.isSet(currentAA.getChemicalFormula())
641                      || StringUtil.isSet(oldAA.getChemicalFormula())
642                      // Compare the names if neither has a composition
643                      || currentAA.name().equals(oldAA.name()))
644                  {
645                     subtractedSet.remove(aaChar);
646                  }
647               }
648            }
649         }
650
651         if (getNTerminalGroup() != null
652            && inAminoAcidSet2.getNTerminalGroup() != null
653            && getNTerminalGroup().equals(inAminoAcidSet2.getNTerminalGroup()))
654         {
655            subtractedSet.setNTerminalGroup(null);
656         }
657
658         if (getCTerminalGroup() != null
659            && inAminoAcidSet2.getCTerminalGroup() != null
660            && getCTerminalGroup().equals(inAminoAcidSet2.getCTerminalGroup()))
661         {
662            subtractedSet.setCTerminalGroup(null);
663         }
664
665         clearCachedValues();
666      }
667
668      return subtractedSet;
669   }
670
671   //--------------------------------------------------------------------------
672   private void clearCachedValues()
673   {
674      mCachedSet = null;
675   }
676
677   //--------------------------------------------------------------------------
678   private boolean isPredefinedSet()
679   {
680      return (this == STANDARD
681            || this == STANDARD_LC
682            || this == STANDARD_UC);
683   }
684}