001package com.hfg.bio.seq;
002
003import java.io.Reader;
004import java.math.BigDecimal;
005import java.math.MathContext;
006import java.util.*;
007import java.util.regex.Pattern;
008import java.util.regex.Matcher;
009
010import com.hfg.bio.*;
011import com.hfg.bio.glyco.Glycan;
012import com.hfg.bio.proteinproperty.ExtinctionCoeff;
013import com.hfg.bio.proteinproperty.IsoelectricPoint;
014import com.hfg.bio.proteinproperty.PctExtinctionCoeff;
015import com.hfg.bio.proteinproperty.ProteinAnalysisMode;
016import com.hfg.bio.proteinproperty.ReducedAnalysisMode;
017import com.hfg.bio.proteinproperty.SimpleProteinPropertyCalcSettings;
018import com.hfg.bio.taxonomy.ncbi.NCBITaxon;
019import com.hfg.chem.Element;
020import com.hfg.chem.IonizableGroup;
021import com.hfg.chem.Molecule;
022import com.hfg.util.ChecksumUtil;
023import com.hfg.util.CompareUtil;
024import com.hfg.util.StringBuilderPlus;
025import com.hfg.util.collection.CollectionUtil;
026import com.hfg.util.StringUtil;
027import com.hfg.util.collection.OrderedMap;
028import com.hfg.xml.XMLNode;
029import com.hfg.xml.XMLTag;
030
031//------------------------------------------------------------------------------
032/**
033 Biological protein sequence.
034 <div>
035  @author J. Alex Taylor, hairyfatguy.com
036 </div>
037 */
038//------------------------------------------------------------------------------
039// com.hfg XML/HTML Coding Library
040//
041// This library is free software; you can redistribute it and/or
042// modify it under the terms of the GNU Lesser General Public
043// License as published by the Free Software Foundation; either
044// version 2.1 of the License, or (at your option) any later version.
045//
046// This library is distributed in the hope that it will be useful,
047// but WITHOUT ANY WARRANTY; without even the implied warranty of
048// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
049// Lesser General Public License for more details.
050//
051// You should have received a copy of the GNU Lesser General Public
052// License along with this library; if not, write to the Free Software
053// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
054//
055// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
056// jataylor@hairyfatguy.com
057//------------------------------------------------------------------------------
058
059public class Protein extends BioSequencePlusImpl
060{
061
062   //##########################################################################
063   // PRIVATE FIELDS
064   //##########################################################################
065
066   private AminoAcidSet mAASet;
067
068   // Properties
069   private AminoAcidComposition    mAAComposition;
070   private Float                   mIsoelectricPoint;
071   private KaSet                   mIsoelectricPointKaSet;
072   private Integer                 mExtinctionCoeff;
073   private Float                   mPercentExtinctionCoeff;
074
075   private List<Protein>           mChains;
076   private Integer                 mNumDisulfideBonds;
077   private Set<ProteinXLink>       mXLinks;
078   private List<Glycan>            mGlycans;
079
080   private Protein                 mParent;
081   private Map<String, Protein>    mChainIdMap = new OrderedMap<>(4);
082
083   private static Pattern sChainIdPattern = Pattern.compile("_(\\w)$");
084
085   //##########################################################################
086   // CONSTRUCTORS
087   //##########################################################################
088
089   //--------------------------------------------------------------------------
090   public Protein()
091   {
092
093   }
094
095   //--------------------------------------------------------------------------
096   public Protein(XMLNode inXML)
097   {
098      super(inXML);
099
100      if (! inXML.getTagName().equals(HfgBioXML.HFGBIOSEQ_TAG))
101      {
102         throw new RuntimeException("Cannot construct an " + this.getClass().getSimpleName() + " from a " + inXML.getTagName() + " tag!");
103      }
104
105      XMLNode aminoAcidSetTag = inXML.getOptionalSubtagByName(HfgBioXML.AASET_TAG);
106      if (aminoAcidSetTag != null)
107      {
108         mAASet = AminoAcidSet.instantiate(aminoAcidSetTag);
109      }
110
111      XMLNode chainsTag = inXML.getOptionalSubtagByName(HfgBioXML.CHAINS_TAG);
112      if (chainsTag != null)
113      {
114         for (XMLNode subtag : chainsTag.getXMLNodeSubtags())
115         {
116            addChain(new Protein(subtag));
117         }
118      }
119
120      // This needs to be set AFTER the chains have been added.
121      if (inXML.hasAttribute(HfgBioXML.DISULFIDE_CNT_ATT))
122      {
123         setNumDisulfideBonds(Integer.parseInt(inXML.getAttributeValue(HfgBioXML.DISULFIDE_CNT_ATT)));
124      }
125
126      XMLNode xlinksTag = inXML.getOptionalSubtagByName(HfgBioXML.XLINKS_TAG);
127      if (xlinksTag != null)
128      {
129         for (XMLNode subtag : chainsTag.getXMLNodeSubtags())
130         {
131            addXLink(new ProteinXLink(subtag));
132         }
133      }
134   }
135
136   //##########################################################################
137   // PUBLIC METHODS
138   //##########################################################################
139
140   //--------------------------------------------------------------------------
141   @Override
142   public BioSequenceType getType()
143   {
144      return BioSequenceType.PROTEIN;
145   }
146
147   //--------------------------------------------------------------------------
148   @Override
149   public Protein clone()
150   {
151      clearCalculatedProperties();
152
153      Protein theClone = (Protein) super.clone();
154
155      if (mAAComposition != null)
156      {
157         theClone.mAAComposition = mAAComposition.clone();
158      }
159
160      if (mChains != null)
161      {
162         theClone.mChains = new ArrayList<>(mChains.size());
163         for (Protein chain : mChains)
164         {
165            Protein chainClone = chain.clone();
166            // Can't call addChain() here because it will mess with chain naming
167            theClone.mChains.add(chainClone);
168            chainClone.mParent = theClone;
169         }
170
171         theClone.mChainIdMap = new HashMap<>(mChains.size());
172         for (Protein chain : theClone.mChains)
173         {
174            theClone.mChainIdMap.put(chain.getID(), chain);
175         }
176      }
177
178      theClone.mParent = null;
179
180      if (mXLinks != null)
181      {
182         theClone.mXLinks = new HashSet<>(mXLinks.size());
183         for (ProteinXLink xlink : mXLinks)
184         {
185            theClone.mXLinks.add(xlink.clone());
186         }
187      }
188
189      return theClone;
190   }
191
192   //---------------------------------------------------------------------------
193   // The hashcode is based on the sequence and not the id.
194   @Override
195   public int hashCode()
196   {
197      byte[] md5 = getMD5Checksum();
198
199      return md5 != null ? new String(md5).hashCode() : 0;
200   }
201
202   //---------------------------------------------------------------------------
203   @Override
204   public int compareTo(Object inObj2)
205   {
206      int result = -1;
207
208      if (this == inObj2)
209      {
210         result = 0;
211      }
212      else if (inObj2 != null
213               && inObj2 instanceof Protein)
214      {
215         Protein protein2 = (Protein) inObj2;
216
217         // First compare the lengths
218         result = CompareUtil.compare(length(), protein2.length());
219
220         if (0 == result)
221         {
222            // Second compare the number of chains
223            result = CompareUtil.compare(getNumChains(), protein2.getNumChains());
224         }
225
226         if (0 == result)
227         {
228            // Third compare the sequences themselves. (We could use a checksum first but that causes calculation of the seq data string anyway.)
229            result = CompareUtil.compare(getSeqDataString(), protein2.getSeqDataString());
230         }
231      }
232
233      return result;
234   }
235
236   //--------------------------------------------------------------------------
237   @Override
238   public byte[] getMD5Checksum()
239   {
240      byte[] checksum = null;
241      if (getSequence() != null)
242      {
243         checksum = super.getMD5Checksum();
244      }
245      else if (CollectionUtil.hasValues(getChains()))
246      {
247         checksum = ChecksumUtil.calculateMD5(getSeqDataString());
248      }
249
250      return checksum;
251   }
252
253   //--------------------------------------------------------------------------
254   @Override
255   public byte[] getSHA1Checksum()
256   {
257      byte[] checksum = null;
258      if (getSequence() != null)
259      {
260         checksum = super.getSHA1Checksum();
261      }
262      else if (CollectionUtil.hasValues(getChains()))
263      {
264         checksum = ChecksumUtil.calculateSHA1(getSeqDataString());
265      }
266
267      return checksum;
268   }
269
270   //---------------------------------------------------------------------------
271   /**
272    Recursively calculates the number of chains in the protein.
273    @return the number of chains in the protein.
274    */
275   public int getNumChains()
276   {
277      int numChains = 0;
278
279      if (getSequence() != null)
280      {
281         numChains = 1;
282      }
283      else if (CollectionUtil.hasValues(getChains()))
284      {
285         for (Protein chain : getChains())
286         {
287            numChains += chain.getNumChains();
288         }
289      }
290
291      return numChains;
292   }
293
294   //--------------------------------------------------------------------------
295   /**
296    Specifies the protein's name / identifier.
297    @param inValue the name / identifier for the protein
298    @return this Protein object to enable method chaining
299    */
300   @Override
301   public Protein setID(String inValue)
302   {
303      String oldId = getID();
304
305      super.setID(inValue);
306
307      propogateIdChange(oldId, inValue);
308
309      if (mParent != null)
310      {
311         Protein topProtein = getTopProtein();
312         topProtein.mChainIdMap.remove(oldId);
313         String newId = topProtein.assignChainId(this);
314
315         if (getID() != null
316               && !getID().equals(newId))
317         {
318            super.setID(newId);
319            propogateIdChange(oldId, newId);
320         }
321      }
322
323
324      return this;
325   }
326
327   //--------------------------------------------------------------------------
328   /**
329    Specifies the protein's description.
330    @param inValue the description of the protein
331    @return this Protein object to enable method chaining
332    */
333   @Override
334   public Protein setDescription(CharSequence inValue)
335   {
336      super.setDescription(inValue);
337      return this;
338   }
339
340   //--------------------------------------------------------------------------
341   /**
342    Specifies the protein's sequence. A Protein can contain either a sequence or
343    other Protein objects as chains but not both.
344    @param inValue the sequence of the protein
345    @return this Protein object to enable method chaining
346    */
347   @Override
348   public Protein setSequence(CharSequence inValue)
349   {
350      if (CollectionUtil.hasValues(mChains))
351      {
352         throw new RuntimeException("A Protein cannot have both chains and a sequence!");
353      }
354
355      return (Protein) super.setSequence(inValue);
356   }
357
358
359   //--------------------------------------------------------------------------
360   @Override
361   public Protein setNCBITaxon(NCBITaxon inValue)
362   {
363      return (Protein) super.setNCBITaxon(inValue);
364   }
365
366
367   //--------------------------------------------------------------------------
368   /**
369    Specifies the protein's sequence. A Protein can contain either a sequence or
370    other Protein objects as chains but not both.
371    @param inReader the sequence of the protein specified via a Reader
372    @return this Protein object to enable method chaining
373    */
374   @Override
375   public Protein setSequence(Reader inReader)
376   {
377      if (CollectionUtil.hasValues(mChains))
378      {
379         throw new RuntimeException("A Protein cannot have both chains and a sequence!");
380      }
381
382      return (Protein) super.setSequence(inReader);
383   }
384
385   //--------------------------------------------------------------------------
386   /**
387    Specifies the protein's chains. A Protein can contain either a sequence or
388    other Protein objects as chains but not both.
389    @param inChains Protein objects that are chains of this Protein object
390    */
391   public void setChains(Collection<Protein> inChains)
392   {
393      if (super.length() > 0)
394      {
395         throw new RuntimeException("A Protein cannot have both chains and a sequence!");
396      }
397
398      mChains = null;
399
400      if (CollectionUtil.hasValues(inChains))
401      {
402         for (Protein chain : inChains)
403         {
404            addChain(chain);
405         }
406      }
407   }
408
409   //--------------------------------------------------------------------------
410   /**
411    Adds a specified protein chain to this Protein object (with a stoichiometry of one).
412    A Protein can contain either a sequence or other Protein objects as chains but not both.
413    @param inChain Protein object that is a chain of this Protein object
414    */
415   public void addChain(Protein inChain)
416   {
417      if (super.length() > 0)
418      {
419         throw new RuntimeException("A Protein cannot have both chains and a sequence!");
420      }
421
422      if (inChain != null)
423      {
424         if (null == mChains) mChains = new ArrayList<>(5);
425
426         if (inChain.mParent != null)
427         {
428            inChain = inChain.clone();
429         }
430
431         mChains.add(inChain);
432         inChain.mParent = this;
433
434         clearElementalCompositionAndCalculatedProperties();
435
436         inChain.checkId();
437
438         // If we're already using this chain, clone it before adding.
439         if (CollectionUtil.hasValues(inChain.getChains()))
440         {
441            List<Protein> newList = new ArrayList<>(inChain.getChains().size());
442            boolean duplicatesDetected = false;
443            for (Protein chain : inChain.getChains())
444            {
445               if (getTopProtein().mChainIdMap.values().contains(chain))
446               {
447                  chain = chain.clone();
448                  newList.add(chain);
449                  duplicatesDetected = true;
450               }
451               else
452               {
453                  newList.add(chain);
454               }
455
456               chain.checkId();
457            }
458
459            if (duplicatesDetected) inChain.setChains(newList);
460         }
461      }
462   }
463
464   //--------------------------------------------------------------------------
465   /**
466    Adds specified protein chains to this Protein object (with a stoichiometry of one).
467    A Protein can contain either a sequence or other Protein objects as chains but not both.
468    @param inChains Protein objects that are chains of this Protein object
469    */
470   public void addChains(Collection<Protein> inChains)
471   {
472      // Not the most efficient way to do it, but it keeps things simple.
473      if (CollectionUtil.hasValues(inChains))
474      {
475         for (Protein chain : inChains)
476         {
477            addChain(chain);
478         }
479      }
480   }
481
482   //--------------------------------------------------------------------------
483   /**
484    Adds a specified number of copies of a specified protein chain to this Protein object.
485    (Ex. 2 heavy or light chains in an antibody.)
486    A Protein can contain either a sequence or other Protein objects as chains but not both.
487    @param inChain Protein object that is a chain of this Protein object
488    @param inNumCopies the number of copies of the specified chain that should be added
489    */
490   public void addChains(Protein inChain, int inNumCopies)
491   {
492      String baseId = null;
493      for (int i = 0; i < inNumCopies; i++)
494      {
495         Protein chain = inChain.clone();
496         if (baseId != null)
497         {
498            chain.setID(baseId + "_" + (i + 1));
499         }
500
501         addChain(chain);
502
503         if (null == baseId)
504         {
505            baseId = chain.getID();
506            chain.setID(baseId + "_" + (i + 1));
507         }
508      }
509   }
510
511   //--------------------------------------------------------------------------
512   public boolean hasChains()
513   {
514      return CollectionUtil.hasValues(mChains);
515   }
516
517   //--------------------------------------------------------------------------
518   public Collection<Protein> getChains()
519   {
520      return mChains;
521   }
522
523   //--------------------------------------------------------------------------
524   /**
525    Returns the chain with the specified id.
526    @param inChainId the id of the chain to return
527    @return the requested chain
528    */
529   public Protein getChain(String inChainId)
530   {
531      return mChainIdMap.get(inChainId);
532   }
533
534   //--------------------------------------------------------------------------
535   public Collection<Protein> getDistinctChains()
536   {
537      Set<Protein> distinctChains = null;
538
539      if (CollectionUtil.hasValues(mChains))
540      {
541         distinctChains = new HashSet<>(3);
542         for (Protein chain : mChains)
543         {
544            if (chain.hasChains())
545            {
546               distinctChains.addAll(chain.getDistinctChains());
547            }
548            else
549            {
550               distinctChains.add(chain);
551            }
552         }
553      }
554
555      return distinctChains;
556   }
557
558   //--------------------------------------------------------------------------
559   /**
560    Returns chains of this Protein object organized into groups that are identical sequences.
561    @return the protein's chains grouped by sequence
562    */
563   public Collection<Collection<Protein>> getChainStoichiometryGroups()
564   {
565      Collection<Collection<Protein>> stoichiometryGroups = new ArrayList<>();
566
567      if (CollectionUtil.hasValues(getChains()))
568      {
569         for (Protein chain : getChains())
570         {
571            boolean added = false;
572            for (Collection<Protein> existingGroup : stoichiometryGroups)
573            {
574               Protein comparisonChain = existingGroup.iterator().next();
575               if (chain.length() == comparisonChain.length()
576                   && new String(chain.getMD5Checksum()).equals(new String(comparisonChain.getMD5Checksum()))
577                   && new String(chain.getSHA1Checksum()).equals(new String(comparisonChain.getSHA1Checksum())))
578               {
579                  existingGroup.add(chain);
580                  added = true;
581                  break;
582               }
583            }
584
585            if (! added)
586            {
587               Collection<Protein> newGroup = new ArrayList<>(5);
588               newGroup.add(chain);
589               stoichiometryGroups.add(newGroup);
590            }
591         }
592      }
593
594      return stoichiometryGroups;
595   }
596
597   //--------------------------------------------------------------------------
598   public Set<String> getChainIds()
599   {
600      return (mChainIdMap != null ? mChainIdMap.keySet() : null);
601   }
602
603   //--------------------------------------------------------------------------
604   /**
605    If this Protein object contains a sequence, the length of that sequence is returned.
606    If this Protein object contains other Protein chains, the sum of their lengths is returned.
607    @return the total sequence length
608    */
609   @Override
610   public int length()
611   {
612      int length = 0;
613      if (CollectionUtil.hasValues(mChains))
614      {
615         for (Protein chain : mChains)
616         {
617            length += chain.length();
618         }
619      }
620      else
621      {
622         length = super.length();
623      }
624
625      return length;
626   }
627
628   //--------------------------------------------------------------------------
629   // Setup this way to avoid a stackoverflow if clearElementalComposition() is called within clearCalculatedProperties().
630   @Override
631   public void clearElementalCompositionAndCalculatedProperties()
632   {
633      super.clearElementalComposition();
634
635      if (mParent != null)
636      {
637         mParent.clearElementalCompositionAndCalculatedProperties();
638      }
639   }
640
641   //--------------------------------------------------------------------------
642   public void setAminoAcidSet(AminoAcidSet inValue)
643   {
644      mAASet = inValue;
645      if (CollectionUtil.hasValues(mChains))
646      {
647         for (Protein chain : mChains)
648         {
649            chain.setAminoAcidSet(inValue);
650         }
651      }
652
653      clearElementalCompositionAndCalculatedProperties();
654   }
655
656   //--------------------------------------------------------------------------
657   public AminoAcidSet getAminoAcidSet()
658   {
659      if (null == mAASet)
660      {
661         if (getParent() != null)
662         {
663            Protein topProtein = getTopProtein();
664            if (topProtein != null)
665            {
666               mAASet = topProtein.getAminoAcidSet();
667            }
668         }
669
670         if (null == mAASet)
671         {
672            mAASet = AminoAcidSet.STANDARD;
673         }
674      }
675
676      return mAASet;
677   }
678
679   //--------------------------------------------------------------------------
680   /**
681    Protein objects can be recursively composed of other Protein objects that represent
682    chains or subunits and this method returns the Protein object that contains this Protein object
683    or null if this Protein object is the top object.
684    @return the Protein object that contains this Protein object as a chain / subunit
685            or null if this Protein object is the top object.
686    */
687   public Protein getParent()
688   {
689      return mParent;
690   }
691
692   //--------------------------------------------------------------------------
693   // Need to override in order to work with chains.
694   @Override
695   protected void countGaps()
696   {
697      int count = 0;
698      int totalGapLength = 0;
699
700      if (CollectionUtil.hasValues(mChains))
701      {
702         for (Protein chain : mChains)
703         {
704            count += chain.getNumGaps();
705            totalGapLength += chain.getTotalGapLength();
706         }
707      }
708      else
709      {
710         Matcher m = GAP_PATTERN.matcher(getSequence());
711         while (m.find())
712         {
713            count++;
714            totalGapLength += m.group(0).length();
715         }
716      }
717
718      setNumGaps(count);
719      setTotalGapLength(totalGapLength);
720   }
721
722   //--------------------------------------------------------------------------
723   /**
724    Returns a Map containing the amino acid composition of the protein including any subchains.
725    @return a Map with AminoAcids as keys and Integers as the values
726    */
727   public AminoAcidComposition getAminoAcidComposition()
728   {
729      if (null == mAAComposition)
730      {
731         AminoAcidComposition aaComposition = new AminoAcidComposition();
732
733         if (CollectionUtil.hasValues(mChains))
734         {
735            for (Protein chain : mChains)
736            {
737               aaComposition.addAll(chain.getAminoAcidComposition());
738            }
739         }
740         else
741         {
742            // AA Composition
743            Map<String, Integer> composition = getComposition();
744            for (String aaString : composition.keySet())
745            {
746               if (! aaString.equals("-")) // Ignore gaps
747               {
748                  AminoAcid aa = getAminoAcidSet().getAA(aaString.charAt(0));
749                  if (null == aa)
750                  {
751                     aa = AminoAcid.UNDEFINED;
752                  }
753
754                  Integer value = composition.get(aaString);
755
756                  aaComposition.increment(aa, value);
757               }
758            }
759         }
760
761         mAAComposition = aaComposition;
762      }
763
764      return mAAComposition;
765   }
766
767   //--------------------------------------------------------------------------
768   /**
769    Returns an unmodifiable copy of the elemental composition Map adjusted (if necessary)
770    for the specified protein analysis mode. The keys are
771    Element objects and the values are Floats. Why Floats instead of Integers you
772    ask? Because some amino acid codes such as B and Z are ambiguous averages.
773    @return the elemental composition map
774    */
775   public Map<Element, Float> getElementalComposition(ProteinAnalysisMode inMode)
776   {
777      Map<Element, Float> elementalCompositionMap = super.getElementalComposition();
778
779      if (inMode instanceof ReducedAnalysisMode)
780      {
781         // Create a reduced version
782         Molecule reducedMol = new Molecule(elementalCompositionMap);
783
784         // Was an alkylated cysteine form provided?
785         AminoAcid alkylatedCys = ((ReducedAnalysisMode)inMode).getAlkylatedCysteine();
786         if (alkylatedCys != null)
787         {
788            AminoAcid currentCysForm = getAminoAcidSet().getAA('C');
789            // Adjust the elemental composition
790            reducedMol.remove(currentCysForm, getAminoAcidComposition().get(currentCysForm));
791            reducedMol.add(alkylatedCys, getAminoAcidComposition().get(currentCysForm));
792         }
793
794         // Reduce any disulfide crosslinks. S-S goes to SH HS which adds 2 hydrogens
795         if (CollectionUtil.hasValues(mXLinks))
796         {
797            for (ProteinXLink xlink : mXLinks)
798            {
799               if (xlink.getType() == ProteinXLinkType.DISULFIDE)
800               {
801                  reducedMol.add(Element.HYDROGEN, 2);
802               }
803            }
804         }
805         else if (mNumDisulfideBonds != null)
806         {
807            reducedMol.add(Element.HYDROGEN, 2 * mNumDisulfideBonds);
808         }
809
810         elementalCompositionMap = reducedMol.getElementalComposition();
811      }
812
813      return elementalCompositionMap;
814   }
815
816   //--------------------------------------------------------------------------
817   @Override
818   public Double getMonoisotopicMass()
819   {
820      return getMonoisotopicMass(ProteinAnalysisMode.NATIVE);
821   }
822
823   //--------------------------------------------------------------------------
824   public Double getMonoisotopicMass(ProteinAnalysisMode inMode)
825   {
826      Double mass = 0.0;
827      if (inMode.equals(ProteinAnalysisMode.NATIVE))
828      {
829         mass = super.getMonoisotopicMass();
830      }
831      else if (length() > 0)
832      {
833         Molecule organicMatter = new Molecule(getElementalComposition(inMode));
834         mass = organicMatter.getMonoisotopicMass();
835      }
836
837      return mass;
838   }
839
840   //--------------------------------------------------------------------------
841   @Override
842   public Double getAverageMass()
843   {
844      return getAverageMass(ProteinAnalysisMode.NATIVE);
845   }
846
847   //--------------------------------------------------------------------------
848   public Double getAverageMass(ProteinAnalysisMode inMode)
849   {
850      Double mass = 0.0;
851      if (inMode.equals(ProteinAnalysisMode.NATIVE))
852      {
853         mass = super.getAverageMass();
854      }
855      else if (length() > 0)
856      {
857         Molecule organicMatter = new Molecule(getElementalComposition(inMode));
858         mass = organicMatter.getAverageMass();
859      }
860
861      return mass;
862   }
863
864   //--------------------------------------------------------------------------
865   @Override
866   public Double getOrganicAverageMass()
867   {
868      return getOrganicAverageMass(ProteinAnalysisMode.NATIVE);
869   }
870
871   //--------------------------------------------------------------------------
872   public Double getOrganicAverageMass(ProteinAnalysisMode inMode)
873   {
874      Double mass = 0.0;
875      if (inMode.equals(ProteinAnalysisMode.NATIVE))
876      {
877         mass = super.getOrganicAverageMass();
878      }
879      else if (length() > 0)
880      {
881         Molecule organicMatter = new Molecule(getElementalComposition(inMode));
882         mass = organicMatter.getOrganicAverageMass();
883      }
884
885      return mass;
886   }
887
888   //--------------------------------------------------------------------------
889   /**
890    Determines the isoelectric point (the pH at which the net charge is zero) for the protein.
891    Uses KaSet.BJELLQVIST by default.
892    @return the isoelectric point for the protein
893    */
894   public Float getIsoelectricPoint()
895   {
896      return getIsoelectricPoint(KaSet.BJELLQVIST);
897   }
898
899   //--------------------------------------------------------------------------
900   /**
901    Determines the isoelectric point (the pH at which the net charge is zero)
902    for the protein assuming reducing conditions.
903    @param inKaSet the specific set of pKa values to use in calculating the isoelectric point
904    @return the isoelectric point for the protein
905    */
906   public Float getIsoelectricPoint(KaSet inKaSet)
907   {
908      return getIsoelectricPoint(inKaSet, inKaSet.getDefaultProteinAnalysisMode());
909   }
910
911   //--------------------------------------------------------------------------
912   /**
913    Determines the isoelectric point (the pH at which the net charge is zero) for the protein.
914    @param inKaSet the specific set of pKa values to use in calculating the isoelectric point
915    @param inMode the anlysis mode conditions to apply to the calculation
916    @return the isoelectric point for the protein
917    */
918   public Float getIsoelectricPoint(KaSet inKaSet, ProteinAnalysisMode inMode)
919   {
920      return IsoelectricPoint.valueOf(inKaSet).calculate(this, new SimpleProteinPropertyCalcSettings().setProteinAnalysisMode(inMode));
921   }
922
923   //--------------------------------------------------------------------------
924   /**
925    Estimates the protein's net charge at the specified pH assuming reducing conditions.
926    @param pH the specific pH value at which to calculate the protein's net charge
927    @param inKaSet the specific set of pKa values to use in calculating the isoelectric point
928    @return the net charge of the protein at the specified pH
929    */
930   public Double getNetCharge(double pH, KaSet inKaSet)
931   {
932      return getNetCharge(pH, inKaSet, inKaSet.getDefaultProteinAnalysisMode());
933   }
934
935   //--------------------------------------------------------------------------
936   /**
937    Estimates the protein's net charge at the specified pH.
938    @param pH the specific pH value at which to calculate the protein's net charge
939    @param inKaSet the specific set of pKa values to use in calculating the isoelectric point
940    @param inMode the anlysis mode conditions to apply to the calculation
941    @return the net charge of the protein at the specified pH
942    */
943   public Double getNetCharge(double pH, KaSet inKaSet, ProteinAnalysisMode inMode)
944   {
945      return getNetCharge(pH, constructIonizableGroupMap(inKaSet, inMode));
946   }
947
948   //--------------------------------------------------------------------------
949   /**
950    Returns the total number of specified disulfide bonds or null if the number
951    of disulfides has not been set at any chain level.
952    @return the total number of specified disulfide bonds
953    */
954   public Integer getTotalNumDisulfideBonds()
955   {
956      // Possibilities: - set for 'parent' protein, null in individual chains
957      //                - set  for 'parent' protein and individual chains
958      //                - null for 'parent' protein and individual chains
959      //                - wouldn't really make sense for it to be null for the 'parent' protein and set for individual chains
960
961      int count = 0;
962      boolean allNull = true;
963
964      if (CollectionUtil.hasValues(mChains))
965      {
966         for (Protein chain : mChains)
967         {
968            Integer chainCount = chain.getTotalNumDisulfideBonds();
969            if (chainCount != null)
970            {
971               count += chainCount;
972               allNull = false;
973            }
974         }
975      }
976
977      if (CollectionUtil.hasValues(mXLinks))
978      {
979         for (ProteinXLink xlink : mXLinks)
980         {
981            if (xlink.getType() == ProteinXLinkType.DISULFIDE)
982            {
983               count++;
984               allNull = false;
985            }
986         }
987      }
988      else if (mNumDisulfideBonds != null)
989      {
990         count += mNumDisulfideBonds;
991         allNull = false;
992      }
993
994      return (allNull ? null : count);
995   }
996
997   //--------------------------------------------------------------------------
998   public Protein setNumDisulfideBonds(int inValue)
999   {
1000      // Possibilities: - set for 'parent' protein, null in individual chains
1001      //                - set  for 'parent' protein and individual chains
1002      //                - null for 'parent' protein and individual chains
1003      //                - wouldn't really make sense for it to be null for the 'parent' protein and set for individual chains
1004
1005      // TODO: If it has already been set for chains below this protein what should I do?
1006
1007      int cysCount = getAminoAcidComposition().get(AminoAcid.CYSTEINE);
1008      if (inValue > cysCount / 2)
1009      {
1010         throw new RuntimeException("There are not enough cysteines for " + inValue + " disulfide bonds!");
1011      }
1012
1013      mNumDisulfideBonds = inValue;
1014
1015      clearElementalCompositionAndCalculatedProperties();
1016
1017      return this;
1018   }
1019
1020   //--------------------------------------------------------------------------
1021   /**
1022    Returns the total number of free cysteines (or the total number of cysteines if the number
1023    of disulfides has not been set at any chain level.
1024    @return the total number of free cysteines
1025    */
1026   public int getTotalNumFreeCysteines()
1027   {
1028      int numCys = getAminoAcidComposition().get(AminoAcid.CYSTEINE);
1029      Integer numDisulfides = getTotalNumDisulfideBonds();
1030      if (numDisulfides != null) numCys -= (numDisulfides * 2);
1031
1032      return numCys;
1033   }
1034
1035   //--------------------------------------------------------------------------
1036   /**
1037    Returns the estimated molar extinction coefficient at A<sub>280</sub>. If the number of
1038    disulfide bonds has not been specified, it assumes that all cysteines are disulfide-linked.
1039    <p>
1040    This method utilizes the coefficients derived by <i>Pace et al. (1995) Protein Science 4:2411-2423.</i>
1041    </p>
1042    @return the estimated molar extinction coefficient for the protein
1043    */
1044   public int getExtinctionCoeff()
1045   {
1046      if (null == mExtinctionCoeff)
1047      {
1048         mExtinctionCoeff = ExtinctionCoeff.PROPERTY.calculate(this);
1049      }
1050
1051      return mExtinctionCoeff.intValue();
1052   }
1053
1054   //--------------------------------------------------------------------------
1055   /**
1056    Returns the estimated mass attenuation coefficient (ml mg<sup>-1</sup> cm<sup>-1</sup>) at A<sub>280</sub>. If the number of
1057    disulfide bonds has not been specified, it assumes that all cysteines are disulfide-linked.
1058    <p>
1059    This method utilizes the coefficients derived by <i>Pace et al. (1995) Protein Science 4:2411-2423.</i>
1060    </p>
1061    @return the estimated percent molar extinction coefficient for the protein
1062    */
1063   public float getPercentExtinctionCoeff()
1064   {
1065      if (null == mPercentExtinctionCoeff)
1066      {
1067         mPercentExtinctionCoeff = PctExtinctionCoeff.PROPERTY.calculate(this);
1068      }
1069
1070      return mPercentExtinctionCoeff;
1071   }
1072
1073
1074    //--------------------------------------------------------------------------
1075    /**
1076     Returns the concentration (mM) of the protein solution by using the Beer Lambert Law.
1077     <pre>
1078     Abs = PCE
1079
1080     Where: Abs = Absorbance at a specific wavelength
1081     P = path length of the cell (assumed to be 1 cm)
1082     C = concentration in moles / liter
1083     E = Molar extinction coeff at a specific wavelength
1084
1085     </pre>
1086     @param inAbsorbance the observed absorbance at 280nm
1087     @return the estimated protein concentration in mM
1088     */
1089    public float getMillimolarConcFromAbsorbance280(float inAbsorbance)
1090    {
1091        // Add a tiny amount to avoid divide by zero errors
1092        return (float) (1000 * inAbsorbance / (getExtinctionCoeff() + 0.0000001));
1093    }
1094
1095
1096   //--------------------------------------------------------------------------
1097   public AminoAcid aminoAcidAt(int inPosition)
1098   {
1099      return getAminoAcidSet().getAA(residueAt(inPosition));
1100   }
1101
1102   //--------------------------------------------------------------------------
1103   /**
1104    Convenience method for setting the N-terminal group as pyro-glu based on
1105    whether the N-terminal residue is Glu or Gln.
1106    */
1107   public void createNTerminalPyroGlu()
1108   {
1109      if (length() > 0)
1110      {
1111         AminoAcid nTerminalResidue = aminoAcidAt(1);
1112         NTerminalGroup nTerminalGroup;
1113         if (nTerminalResidue.equals(AminoAcid.GLUTAMIC_ACID))
1114         {
1115            nTerminalGroup = NTerminalGroup.PYRO_GLU_N_TERM_GLU;
1116         }
1117         else if (nTerminalResidue.equals(AminoAcid.GLUTAMINE))
1118         {
1119            nTerminalGroup = NTerminalGroup.PYRO_GLU_N_TERM_GLN;
1120         }
1121         else
1122         {
1123            throw new RuntimeException("The N-Terminal residue must be Glu or Gln in order to form pyro-glutamic acid!");
1124         }
1125
1126         if (getAminoAcidSet().isLocked())
1127         {
1128            setAminoAcidSet(getAminoAcidSet().clone());
1129         }
1130
1131         getAminoAcidSet().setNTerminalGroup(nTerminalGroup);
1132      }
1133   }
1134
1135   //--------------------------------------------------------------------------
1136   /**
1137    This method converts the asparagine residue of each putative N-link site
1138    into aspartic acid, mimicing enzymatic treatment with PNGase F to remove N-linked carbohydrates.
1139    This method is not reversible and assumes that all putative N-link sites have attached carbohydrate structures.
1140    */
1141   public void treatWithPNGaseF()
1142   {
1143      List<SeqLocation> nLinkSites = findNLinkedSites();
1144      if (CollectionUtil.hasValues(nLinkSites))
1145      {
1146         for (SeqLocation seqLocation : nLinkSites)
1147         {
1148            Protein chain = (seqLocation.getChainId() != null && ! seqLocation.getChainId().equals(getID()) ? getChain(seqLocation.getChainId()) : this);
1149            if (! chain.aminoAcidAt(seqLocation.getStart()).equals(AminoAcid.ASPARAGINE))
1150            {
1151               throw new RuntimeException("The residue at position " + seqLocation.getStart() + " isn't an asparagine as expected!");
1152            }
1153
1154            chain.setResidueAt(seqLocation.getStart(), AminoAcid.ASPARTIC_ACID.getOneLetterCode());
1155         }
1156      }
1157   }
1158
1159   //--------------------------------------------------------------------------
1160   public List<SeqLocation> findNLinkedSites()
1161   {
1162      List<SeqLocation> sites = new ArrayList<>();
1163
1164      if (CollectionUtil.hasValues(mChains))
1165      {
1166         for (Protein chain : mChains)
1167         {
1168            sites.addAll(chain.findNLinkedSites());
1169         }
1170      }
1171      else
1172      {
1173         Pattern pattern = Pattern.compile("N[^P][ST]([^P]|$)", Pattern.CASE_INSENSITIVE);
1174         Matcher m = pattern.matcher(this.getSequence());
1175         int start = 0;
1176         while (m.find(start))
1177         {
1178            sites.add(new SeqLocation(getID(), m.start() + 1, m.start() + 3));
1179            start = m.start() + 1;
1180         }
1181      }
1182
1183      return sites;
1184   }
1185
1186   //--------------------------------------------------------------------------
1187   public XMLNode toXMLNode()
1188   {
1189      XMLNode node = super.toXMLNode();
1190      //node.setTagName(HfgBioXML.PROTEIN_TAG);
1191      if (mNumDisulfideBonds != null)
1192      {
1193         node.setAttribute(HfgBioXML.DISULFIDE_CNT_ATT, mNumDisulfideBonds);
1194      }
1195
1196      if (null == mParent
1197          || getAminoAcidSet() != getTopProtein().getAminoAcidSet())
1198      {
1199         node.addSubtag(getAminoAcidSet().toXMLNode());
1200      }
1201
1202      if (CollectionUtil.hasValues(mChains))
1203      {
1204         XMLNode chainsTag = new XMLTag(HfgBioXML.CHAINS_TAG);
1205         node.addSubtag(chainsTag);
1206         for (Protein chain : mChains)
1207         {
1208            chainsTag.addSubtag(chain.toXMLNode());
1209         }
1210      }
1211
1212      if (CollectionUtil.hasValues(mXLinks))
1213      {
1214         XMLNode xlinksTag = new XMLTag(HfgBioXML.XLINKS_TAG);
1215         node.addSubtag(xlinksTag);
1216         for (ProteinXLink xlink : mXLinks)
1217         {
1218            xlinksTag.addSubtag(xlink.toXMLNode());
1219         }
1220      }
1221
1222      return node;
1223   }
1224
1225   // TODO: Site-specific glycan attachment?
1226
1227   //--------------------------------------------------------------------------
1228   public Protein addGlycans(Glycan inValue, int inCount)
1229   {
1230      if (inValue != null
1231            && inCount > 0)
1232      {
1233         for (int i = 0; i < inCount; i++)
1234         {
1235            addGlycan(inValue);
1236         }
1237      }
1238
1239      return this;
1240   }
1241
1242   //--------------------------------------------------------------------------
1243   public Protein addGlycan(Glycan inValue)
1244   {
1245      if (inValue != null)
1246      {
1247         if (null == mGlycans) mGlycans = new ArrayList<>(5);
1248
1249         mGlycans.add(inValue);
1250         clearElementalCompositionAndCalculatedProperties();
1251      }
1252
1253      return this;
1254   }
1255
1256   //--------------------------------------------------------------------------
1257   public List<Glycan> getGlycans()
1258   {
1259      List<Glycan> glycans = null;
1260      if (CollectionUtil.hasValues(mGlycans))
1261      {
1262         glycans = new ArrayList<>(mGlycans);
1263      }
1264
1265      if (CollectionUtil.hasValues(mChains))
1266      {
1267         for (Protein chain : mChains)
1268         {
1269            List<Glycan> chainGlycans = chain.getGlycans();
1270            if (CollectionUtil.hasValues(chainGlycans))
1271            {
1272               if (null == glycans)
1273               {
1274                  glycans = new ArrayList<>(chainGlycans);
1275               }
1276               else
1277               {
1278                  glycans.addAll(chainGlycans);
1279               }
1280            }
1281         }
1282      }
1283
1284      return glycans;
1285   }
1286
1287   //--------------------------------------------------------------------------
1288   public void addXLink(ProteinXLink inXLink)
1289   {
1290      if (inXLink != null)
1291      {
1292         if (null == mXLinks) mXLinks = new HashSet<>();
1293
1294         inXLink.setParentProtein(this);
1295         mXLinks.add(inXLink);
1296         clearElementalCompositionAndCalculatedProperties();
1297      }
1298   }
1299
1300   //--------------------------------------------------------------------------
1301   public Set<ProteinXLink> getXLinks()
1302   {
1303      Set<ProteinXLink> xLinks = new HashSet<>(10);
1304      if (CollectionUtil.hasValues(mXLinks))
1305      {
1306         xLinks.addAll(mXLinks);
1307      }
1308
1309      if (CollectionUtil.hasValues(mChains))
1310      {
1311         for (Protein chain : mChains)
1312         {
1313            chain.getXLinks(xLinks);
1314         }
1315      }
1316
1317      return xLinks;
1318   }
1319
1320   //--------------------------------------------------------------------------
1321   public void removeXLink(ProteinXLink inXLink)
1322   {
1323      if (CollectionUtil.hasValues(mXLinks))
1324      {
1325         for (ProteinXLink xlink : mXLinks)
1326         {
1327            if (xlink.equals(inXLink))
1328            {
1329               mXLinks.remove(xlink);
1330               break;
1331            }
1332         }
1333      }
1334
1335      if (CollectionUtil.hasValues(mChains))
1336      {
1337         for (Protein chain : mChains)
1338         {
1339            chain.removeXLink(inXLink);
1340         }
1341      }
1342
1343   }
1344
1345   //--------------------------------------------------------------------------
1346   public Set<ProteinXLink> removeXLinks()
1347   {
1348      return removeXLinks(null);
1349   }
1350
1351   //--------------------------------------------------------------------------
1352   public Set<ProteinXLink> removeXLinks(ProteinXLinkType inXLinkType)
1353   {
1354      Set<ProteinXLink> removedXLinks = new HashSet<>(10);
1355      if (CollectionUtil.hasValues(mXLinks))
1356      {
1357         for (ProteinXLink xlink : mXLinks)
1358         {
1359            if (null == inXLinkType
1360                || xlink.getType().equals(inXLinkType))
1361            {
1362               removedXLinks.add(xlink);
1363            }
1364         }
1365
1366         for (ProteinXLink xlink : removedXLinks)
1367         {
1368            mXLinks.remove(xlink);
1369         }
1370      }
1371
1372      if (CollectionUtil.hasValues(mChains))
1373      {
1374         for (Protein chain : mChains)
1375         {
1376            removedXLinks.addAll(chain.removeXLinks(inXLinkType));
1377         }
1378      }
1379
1380      return removedXLinks;
1381   }
1382
1383   //--------------------------------------------------------------------------
1384   @Override
1385   public void clearCalculatedProperties()
1386   {
1387      super.clearCalculatedProperties();
1388      mAAComposition          = null;
1389      mIsoelectricPoint       = null;
1390      mIsoelectricPointKaSet  = null;
1391      mExtinctionCoeff        = null;
1392      mPercentExtinctionCoeff = null;
1393   }
1394
1395   //##########################################################################
1396   // PROTECTED METHODS
1397   //##########################################################################
1398
1399
1400   //--------------------------------------------------------------------------
1401   protected void getXLinks(Set<ProteinXLink> inXLinkList)
1402   {
1403      if (CollectionUtil.hasValues(mXLinks))
1404      {
1405         inXLinkList.addAll(mXLinks);
1406      }
1407   }
1408
1409
1410   //--------------------------------------------------------------------------
1411   /**
1412    Returns a map with AminoAcids as keys and Integers as the values.
1413    */
1414   @Override
1415   protected Map<String, Integer> getComposition()
1416   {
1417      Map<String, Integer> map;
1418
1419      if (CollectionUtil.hasValues(mChains))
1420      {
1421         map = new HashMap<>(20);
1422         for (Protein chain : mChains)
1423         {
1424            Map<String, Integer> chainMap = chain.getComposition();
1425            for (String key : chainMap.keySet())
1426            {
1427               Integer oldValue = map.get(key);
1428               map.put(key, (oldValue != null ? oldValue : 0) + chainMap.get(key));
1429            }
1430         }
1431      }
1432      else
1433      {
1434         map = super.getComposition();
1435      }
1436
1437      return map;
1438   }
1439
1440   //--------------------------------------------------------------------------
1441   @Override
1442   protected Map<Molecule, Integer> getResidueComposition()
1443   {
1444      Map<Molecule, Integer> residueComposition = new HashMap<>(25);
1445
1446      AminoAcidComposition aaComposition = getAminoAcidComposition();
1447      for (AminoAcid aa : aaComposition.keySet())
1448      {
1449         residueComposition.put(aa, aaComposition.get(aa));
1450      }
1451
1452      return residueComposition;
1453   }
1454
1455   //--------------------------------------------------------------------------
1456   @Override
1457   protected Map<Molecule, Integer> getTerminiComposition()
1458   {
1459      Map<Molecule, Integer> terminiComposition = new HashMap<>(5);
1460
1461      if (CollectionUtil.hasValues(mChains))
1462      {
1463         for (Protein chain : mChains)
1464         {
1465            Molecule nTerminus = chain.getAminoAcidSet().getNTerminalGroup();
1466            Integer oldCount = terminiComposition.get(nTerminus);
1467            int newCount = 1 + (oldCount != null ? oldCount : 0);
1468            terminiComposition.put(nTerminus, newCount);
1469
1470            Molecule cTerminus = chain.getAminoAcidSet().getCTerminalGroup();
1471            oldCount = terminiComposition.get(cTerminus);
1472            newCount = 1 + (oldCount != null ? oldCount : 0);
1473            terminiComposition.put(cTerminus, newCount);
1474         }
1475      }
1476      else
1477      {
1478         terminiComposition.put(getAminoAcidSet().getNTerminalGroup(), 1);
1479         terminiComposition.put(getAminoAcidSet().getCTerminalGroup(), 1);
1480      }
1481
1482      return terminiComposition;
1483   }
1484
1485   //--------------------------------------------------------------------------
1486   @Override
1487   protected Map<ProteinXLinkType, Integer> getXLinkComposition()
1488   {
1489      Map<ProteinXLinkType, Integer> xLinkComposition = new HashMap<>(5);
1490
1491      if (CollectionUtil.hasValues(mChains))
1492      {
1493         for (Protein chain : mChains)
1494         {
1495            Map<ProteinXLinkType, Integer> chainXLinkComposition = chain.getXLinkComposition();
1496            if (CollectionUtil.hasValues(chainXLinkComposition))
1497            {
1498               for (ProteinXLinkType xlinkType : chainXLinkComposition.keySet())
1499               {
1500                  Integer oldCount = xLinkComposition.get(xlinkType);
1501                  int newCount = chainXLinkComposition.get(xlinkType) + (oldCount != null ? oldCount : 0);
1502                  xLinkComposition.put(xlinkType, newCount);
1503               }
1504            }
1505         }
1506      }
1507
1508      if (CollectionUtil.hasValues(mXLinks))
1509      {
1510         for (ProteinXLink xlink : mXLinks)
1511         {
1512            Integer oldCount = xLinkComposition.get(xlink.getType());
1513            int newCount = 1 + (oldCount != null ? oldCount : 0);
1514            xLinkComposition.put(xlink.getType(), newCount);
1515         }
1516      }
1517      else
1518      {
1519         Integer disulfideCount = getTotalNumDisulfideBonds();
1520         if (disulfideCount != null)
1521         {
1522            Integer oldCount = xLinkComposition.get(ProteinXLinkType.DISULFIDE);
1523            int newCount = disulfideCount + (oldCount != null ? oldCount : 0);
1524            xLinkComposition.put(ProteinXLinkType.DISULFIDE, newCount);
1525         }
1526      }
1527
1528      return xLinkComposition;
1529   }
1530
1531   //--------------------------------------------------------------------------
1532   @Override
1533   protected void recalculateElementalComposition()
1534   {
1535      super.recalculateElementalComposition();
1536
1537      List<Glycan> glycans = getGlycans();
1538      if (CollectionUtil.hasValues(glycans))
1539      {
1540         for (Glycan glycan : glycans)
1541         {
1542            addElementalComposition(glycan.getElementalComposition());
1543            remove(Molecule.H2O); // Subtract a water lost in the bonding
1544         }
1545      }
1546   }
1547
1548   //##########################################################################
1549   // PRIVATE METHODS
1550   //##########################################################################
1551
1552   //--------------------------------------------------------------------------
1553   private Protein getTopProtein()
1554   {
1555      return mParent != null ? mParent.getTopProtein() : this;
1556   }
1557
1558   //--------------------------------------------------------------------------
1559   private void checkId()
1560   {
1561      if (mParent != null)
1562      {
1563         Protein topProtein = getTopProtein();
1564
1565//         if (CollectionUtil.hasValues(mChains))
1566//         {
1567//            for (Protein chain : mChains)
1568//            {
1569//               chain.checkId();
1570//            }
1571//         }
1572//         else
1573//         {
1574            String newId = topProtein.assignChainId(this);
1575
1576            if (null == getID()
1577                || ! getID().equals(newId))
1578            {
1579               String oldId = getID();
1580               super.setID(newId);
1581               propogateIdChange(oldId, newId);
1582            }
1583//         }
1584      }
1585   }
1586
1587   //--------------------------------------------------------------------------
1588   private void propogateIdChange(String inOldId, String inNewId)
1589   {
1590      if (mXLinks != null)
1591      {
1592         for (ProteinXLink xlink : mXLinks)
1593         {
1594            if (xlink.getDonorChainId() != null
1595                && xlink.getDonorChainId().equals(inOldId))
1596            {
1597               xlink.setDonorChainId(inNewId);
1598            }
1599
1600            if (xlink.getAcceptorChainId() != null
1601                && xlink.getAcceptorChainId().equals(inOldId))
1602            {
1603               xlink.setAcceptorChainId(inNewId);
1604            }
1605         }
1606      }
1607
1608      Protein parent = mParent;
1609      Protein topParent = getTopProtein();
1610      while (parent != null
1611             && parent != topParent)
1612      {
1613         if (parent.mChainIdMap != null)
1614         {
1615            for (String id : parent.mChainIdMap.keySet())
1616            {
1617               if (parent.mChainIdMap.get(id).equals(this))
1618               {
1619                  parent.mChainIdMap.remove(id);
1620                  parent.mChainIdMap.put(getID(), this);
1621                  break;
1622               }
1623            }
1624         }
1625
1626         if (parent.mXLinks != null)
1627         {
1628            for (ProteinXLink xlink : parent.mXLinks)
1629            {
1630               if (xlink.getDonorChainId() != null
1631                     && xlink.getDonorChainId().equals(inOldId))
1632               {
1633                  xlink.setDonorChainId(inNewId);
1634               }
1635
1636               if (xlink.getAcceptorChainId() != null
1637                     && xlink.getAcceptorChainId().equals(inOldId))
1638               {
1639                  xlink.setAcceptorChainId(inNewId);
1640               }
1641            }
1642         }
1643
1644         parent = parent.mParent;
1645      }
1646   }
1647
1648   //--------------------------------------------------------------------------
1649   private String assignChainId(Protein inChain)
1650   {
1651      String chainId = inChain.getID();
1652
1653      if (inChain.mParent != null)
1654      {
1655         if (! StringUtil.isSet(chainId))
1656         {
1657            chainId = "" + (char)('A' + mChainIdMap.size());
1658         }
1659
1660         while (mChainIdMap.containsKey(chainId))
1661         {
1662            if (chainId.length() == 1
1663                && Character.isLetter(chainId.charAt(0)))
1664            {
1665               if (StringUtil.isSet(inChain.mParent.getID())
1666                   && inChain.mParent != this)
1667               {
1668                  chainId = inChain.mParent.getID() + " chain_" + chainId;
1669               }
1670               else
1671               {
1672                  chainId = "" + (char)((int)chainId.charAt(0) + 1);
1673               }
1674            }
1675            else
1676            {
1677               Matcher matcher = sChainIdPattern.matcher(chainId);
1678               if (matcher.find())
1679               {
1680                  chainId = matcher.replaceFirst("_" + (char) ((int)matcher.group(1).charAt(0) + 1));
1681               }
1682               else
1683               {
1684                  chainId += "_B";
1685               }
1686            }
1687         }
1688
1689         // We'll go into an infinite loop if we try inChain.setID() here.
1690         mChainIdMap.put(chainId, inChain);
1691      }
1692
1693      return chainId;
1694   }
1695
1696   //--------------------------------------------------------------------------
1697   private Map<IonizableGroup, Integer> constructIonizableGroupMap(KaSet inKaSet, ProteinAnalysisMode inMode)
1698   {
1699      Map<IonizableGroup, Integer> ionizableGroupMap = new HashMap<>();
1700
1701      if (CollectionUtil.hasValues(mChains))
1702      {
1703         for (Protein chain : mChains)
1704         {
1705            Map<IonizableGroup, Integer> chainMap = chain.constructIonizableGroupMap(inKaSet, ProteinAnalysisMode.REDUCED);
1706            for (IonizableGroup group : chainMap.keySet())
1707            {
1708               Integer oldValue = ionizableGroupMap.get(group);
1709               int newValue = (oldValue != null ? oldValue : 0) + chainMap.get(group);
1710               ionizableGroupMap.put(group, newValue);
1711            }
1712         }
1713
1714         if (inMode == ProteinAnalysisMode.NATIVE)
1715         {
1716            // Exclude disulfide-linked cysteines
1717            List<IonizableGroup> cysGroups = inKaSet.getIonizableGroups(AminoAcid.CYSTEINE);
1718            if (cysGroups != null)
1719            {
1720               ionizableGroupMap.put(cysGroups.get(0), getTotalNumFreeCysteines());
1721            }
1722         }
1723
1724      }
1725      else if (length() > 0)
1726      {
1727         AminoAcid cTerminalResidue = aminoAcidAt(length());
1728         AminoAcidComposition aaComposition = getAminoAcidComposition();
1729         for (AminoAcid aa : aaComposition.keySet())
1730         {
1731            Integer aaCount = aaComposition.get(aa);
1732            if (aaCount != null && aaCount > 0)
1733            {
1734               if (aa == cTerminalResidue
1735                   && inKaSet.getCTerminalSidechainKa(cTerminalResidue) != null
1736                   && getAminoAcidSet().getCTerminalGroup().equals(CTerminalGroup.UNMODIFIED_C_TERMINUS))
1737               {
1738                  IonizableGroup group = inKaSet.getCTerminalSidechainKa(cTerminalResidue);
1739                  if (group != null)
1740                  {
1741                     ionizableGroupMap.put(group, 1);
1742                     aaCount--;
1743                  }
1744               }
1745
1746               List<IonizableGroup> groups = inKaSet.getIonizableGroups(aa);
1747               if (groups != null)
1748               {
1749                  if (inMode == ProteinAnalysisMode.NATIVE)
1750                  {
1751                     // Exclude disulfide-linked cysteines
1752                     if (aa.equals(AminoAcid.CYSTEINE))
1753                     {
1754                        aaCount = getTotalNumFreeCysteines();
1755                     }
1756                  }
1757
1758                  for (IonizableGroup group : groups)
1759                  {
1760                     ionizableGroupMap.put(group, aaCount);
1761                  }
1762               }
1763            }
1764         }
1765
1766         IonizableGroup group = inKaSet.getNTerminalKa(getAminoAcidSet().getNTerminalGroup(), aminoAcidAt(1));
1767         if (group != null) ionizableGroupMap.put(group, 1);
1768
1769         group = inKaSet.getCTerminalKa(getAminoAcidSet().getCTerminalGroup(), aminoAcidAt(length()));
1770         if (group != null) ionizableGroupMap.put(group, 1);
1771      }
1772
1773      return ionizableGroupMap;
1774   }
1775
1776   //--------------------------------------------------------------------------
1777   /**
1778    Estimates the protein's net charge at the specified pH.
1779    */
1780   private double getNetCharge(double pH, Map<IonizableGroup, Integer> inIonizableGroupMap)
1781   {
1782      double netCharge = 0;
1783
1784      double concOfHIions = Math.pow(10, -pH);
1785
1786      if (inIonizableGroupMap != null)
1787      {
1788         for (IonizableGroup group : inIonizableGroupMap.keySet())
1789         {
1790            netCharge += group.getCharge(inIonizableGroupMap.get(group), concOfHIions);
1791         }
1792      }
1793
1794      return netCharge;
1795   }
1796
1797   //---------------------------------------------------------------------------
1798   private void recursivelyBuildSequenceInstanceMap(Map<String, Integer> inSequenceInstanceMap)
1799   {
1800      if (getSequence() != null)
1801      {
1802         String refinedChain = getSequence().toUpperCase();
1803         // Remove any trailing stops
1804         if (refinedChain.endsWith("*"))
1805         {
1806            refinedChain = refinedChain.substring(0, refinedChain.length() - 1);
1807         }
1808
1809         if (inSequenceInstanceMap.containsKey(refinedChain))
1810         {
1811            inSequenceInstanceMap.put(refinedChain, inSequenceInstanceMap.get(refinedChain) + 1);
1812         }
1813         else
1814         {
1815            inSequenceInstanceMap.put(refinedChain, 1);
1816         }
1817      }
1818      else if (CollectionUtil.hasValues(getChains()))
1819      {
1820         for (Protein chain : getChains())
1821         {
1822            chain.recursivelyBuildSequenceInstanceMap(inSequenceInstanceMap);
1823         }
1824      }
1825   }
1826
1827   //---------------------------------------------------------------------------
1828   private String getSeqDataString()
1829   {
1830      // Build a chain map
1831      Map<String, Integer> sequenceInstanceMap = new HashMap<>(5);
1832      recursivelyBuildSequenceInstanceMap(sequenceInstanceMap);
1833
1834      List<String> sortedChains = new ArrayList<>(sequenceInstanceMap.keySet());
1835      Collections.sort(sortedChains);
1836
1837      StringBuilderPlus seqData = new StringBuilderPlus().setDelimiter("/");
1838      for (String chain : sortedChains)
1839      {
1840         seqData.delimitedAppend(sequenceInstanceMap.get(chain));
1841         seqData.append("_");
1842         seqData.append(chain);
1843      }
1844
1845      return seqData.toString();
1846   }
1847
1848}