001package com.hfg.bio;
002
003import java.util.ArrayList;
004import java.util.Collection;
005import java.util.Collections;
006import java.util.List;
007import java.util.Map;
008import java.util.HashMap;
009
010import com.hfg.bio.glyco.Monosaccharide;
011import com.hfg.bio.proteinproperty.ProteinAnalysisMode;
012import com.hfg.chem.Charge;
013import com.hfg.chem.IonizableGroup;
014import com.hfg.chem.Molecule;
015import com.hfg.exception.InvalidValueException;
016import com.hfg.exception.UnmodifyableObjectException;
017import com.hfg.util.BooleanUtil;
018import com.hfg.util.CompareUtil;
019import com.hfg.util.StringUtil;
020import com.hfg.util.collection.CollectionUtil;
021import com.hfg.util.collection.OrderedMap;
022import com.hfg.xml.XMLNode;
023import com.hfg.xml.XMLTag;
024
025//------------------------------------------------------------------------------
026/**
027 * Encapsulation for a group of K<sub>a</sub> (dissociation constant) values.
028 * Most commonly used for protein isoelectric point prediction.
029 *
030 * @author J. Alex Taylor, hairyfatguy.com
031 */
032//------------------------------------------------------------------------------
033// com.hfg XML/HTML Coding Library
034//
035// This library is free software; you can redistribute it and/or
036// modify it under the terms of the GNU Lesser General Public
037// License as published by the Free Software Foundation; either
038// version 2.1 of the License, or (at your option) any later version.
039//
040// This library is distributed in the hope that it will be useful,
041// but WITHOUT ANY WARRANTY; without even the implied warranty of
042// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
043// Lesser General Public License for more details.
044//
045// You should have received a copy of the GNU Lesser General Public
046// License along with this library; if not, write to the Free Software
047// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
048//
049// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
050// jataylor@hairyfatguy.com
051//------------------------------------------------------------------------------
052
053public class KaSet implements Cloneable, Comparable<KaSet>
054{
055   private static Map<String, KaSet> sUniqueMap = new OrderedMap<>();
056
057   /**
058    Bjellqvist B, Hughes GJ, Pasquali C, et al. <i>The focusing positions of polypeptides
059    in immobilized pH gradients can be predicted from their amino acid sequences</i>.
060    <b>Electrophoresis</b>. 1993;14(10):1023-31.
061    Available at: <a href='http://www.ncbi.nlm.nih.gov/pubmed/8125050'>http://www.ncbi.nlm.nih.gov/pubmed/8125050</a>.
062    Intended for use with reduced and denatured proteins on 2D gels.
063    */
064   public static final KaSet BJELLQVIST       = new KaSet("Bjellqvist");
065
066   /**
067    Method from <a href='http://expasy.org/tools/pi_tool.html'>Expasy's Compute pI/Mw tool</a>
068    which implements the values from Bjellqvist but mistakenly treats C-terminal Asp and Glu
069    <u>sidechain</u> values as C-terminal values.
070    Intended for use with reduced and denatured proteins on 2D gels.
071    */
072   public static final KaSet EXPASY           = new KaSet("Expasy");
073
074   /**
075    Method from <a href='http://emboss.open-bio.org/rel/rel6/apps/iep.html'>EMBOSS's iep tool</a>.
076    Intended for use with reduced and denatured proteins on 2D gels.
077    */
078   public static final KaSet EMBOSS           = new KaSet("EMBOSS");
079
080   /**
081    Sillero A, Ribeiro JM. <i>Isoelectric points of proteins:
082    theoretical determination</i>. <b>Analytical biochemistry</b>. 1989;179(2):319-325.
083    Available at: <a href='http://www.ncbi.nlm.nih.gov/pubmed/2774179'>http://www.ncbi.nlm.nih.gov/pubmed/2774179</a>.
084    Intended for use with reduced and denatured proteins on 2D gels.
085    */
086   public static final KaSet SILLERO          = new KaSet("Sillero");
087
088   /**
089    Sillero A, Ribeiro JM. <i>Isoelectric points of proteins:
090    theoretical determination</i>. <b>Analytical biochemistry</b>. 1989;179(2):319-325.
091    Available at: <a href='http://www.ncbi.nlm.nih.gov/pubmed/2774179'>http://www.ncbi.nlm.nih.gov/pubmed/2774179</a>.
092    Intended for use with reduced and denatured proteins on 2D gels. An oversimplification for ease of calculation.
093    */
094   public static final KaSet SILLERO_ABRIDGED = new KaSet("Sillero (abridged)");
095
096   /**
097    Patrickios, CS, Yamasaki, EN. <i>Polypeptide amino acid composition and isoelectric point. II. Comparison between experiment and theory</i>.
098    <b>Analytical Biochemistry</b>. 1995:231(1):82-91. <a href='http://doi.org/10.1006/abio.1995.1506'>http://doi.org/10.1006/abio.1995.1506</a>.
099    An oversimplification for ease of calculation.
100    */
101   public static final KaSet PATRICKIOS_SIMPLE = new KaSet("Patrickios (simple)");
102
103   /**
104    <i>Biochemistry</i> by Lubert Stryer, 1995
105    */
106   public static final KaSet STRYER_1995      = new KaSet("Stryer (1995)");
107
108   /**
109    Grimsley GR, Scholtz JM, Pace CN (2009). <i>A summary of the measured pK values of the ionizable groups in folded proteins.</i>
110    <b>Protein Science</b> 18(1), 247-251. Note that they do not provide a value for Arginine so a default value of 12 is used.
111    Intended for use with native proteins.
112    */
113   public static final KaSet GRIMSLEY      = new KaSet("Grimsley");
114
115   /**
116    Intended for use with native proteins.
117    */
118   public static final KaSet TAYLOR_NATIVE      = new KaSet("Taylor");
119
120
121   private String                         mName;
122   private ProteinAnalysisMode            mDefaultProteinAnalysisMode = ProteinAnalysisMode.REDUCED;
123   private boolean                        mLocked;
124   private Map<Molecule, List<IonizableGroup>>  mKaMap                   = new HashMap<>(20);
125   private Map<Molecule, IonizableGroup>  mNTerminalKaMap          = new HashMap<>(10);
126   private Map<Molecule, IonizableGroup>  mCTerminalKaMap          = new HashMap<>(10);
127   private Map<AminoAcid, IonizableGroup> mCTerminalSidechainKaMap = new HashMap<>(10);
128
129   static
130   {
131
132      BJELLQVIST.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE)
133         .setNTerminalpKa(AminoAcid.ALANINE,       7.59f,  Charge.POSITIVE)
134         .setNTerminalpKa(AminoAcid.METHIONINE,    7.00f,  Charge.POSITIVE)
135         .setNTerminalpKa(AminoAcid.SERINE,        6.93f,  Charge.POSITIVE)
136         .setNTerminalpKa(AminoAcid.PROLINE,       8.36f,  Charge.POSITIVE)
137         .setNTerminalpKa(AminoAcid.THREONIE,      6.82f,  Charge.POSITIVE)
138         .setNTerminalpKa(AminoAcid.VALINE,        7.44f,  Charge.POSITIVE)
139         .setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f,  Charge.POSITIVE)
140         .addpKa(AminoAcid.ASPARTIC_ACID, 4.05f,  Charge.NEUTRAL)
141         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.45f,  Charge.NEUTRAL)
142         .addpKa(AminoAcid.CYSTEINE,         9f,  Charge.NEUTRAL)
143         .addpKa(AminoAcid.TYROSINE,        10f,  Charge.NEUTRAL)
144         .addpKa(AminoAcid.HISTIDINE,     5.98f,  Charge.POSITIVE)
145         .addpKa(AminoAcid.LYSINE,          10f,  Charge.POSITIVE)
146         .addpKa(AminoAcid.ARGININE,        12f,  Charge.POSITIVE)
147         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL)
148         .setCTerminalSidechainpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL)
149         .setCTerminalSidechainpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL)
150         .lock();
151
152      EXPASY.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE)
153         .setNTerminalpKa(AminoAcid.ALANINE,       7.59f,  Charge.POSITIVE)
154         .setNTerminalpKa(AminoAcid.METHIONINE,    7.00f,  Charge.POSITIVE)
155         .setNTerminalpKa(AminoAcid.SERINE,        6.93f,  Charge.POSITIVE)
156         .setNTerminalpKa(AminoAcid.PROLINE,       8.36f,  Charge.POSITIVE)
157         .setNTerminalpKa(AminoAcid.THREONIE,      6.82f,  Charge.POSITIVE)
158         .setNTerminalpKa(AminoAcid.VALINE,        7.44f,  Charge.POSITIVE)
159         .setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f,  Charge.POSITIVE)
160         .addpKa(AminoAcid.ASPARTIC_ACID, 4.05f,  Charge.NEUTRAL)
161         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.45f,  Charge.NEUTRAL)
162         .addpKa(AminoAcid.CYSTEINE,         9f,  Charge.NEUTRAL)
163         .addpKa(AminoAcid.TYROSINE,        10f,  Charge.NEUTRAL)
164         .addpKa(AminoAcid.HISTIDINE,     5.98f,  Charge.POSITIVE)
165         .addpKa(AminoAcid.LYSINE,          10f,  Charge.POSITIVE)
166         .addpKa(AminoAcid.ARGININE,        12f,  Charge.POSITIVE)
167         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL)
168          // Note: mistakenly treats C-terminal Asp and Glu sidechain values as C -terminal values.
169         .setCTerminalpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL)
170         .setCTerminalpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL)
171         .lock();
172
173      EMBOSS.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.6f, Charge.POSITIVE)
174            .addpKa(AminoAcid.ASPARTIC_ACID, 3.9f, Charge.NEUTRAL)
175            .addpKa(AminoAcid.GLUTAMIC_ACID, 4.1f, Charge.NEUTRAL)
176            .addpKa(AminoAcid.CYSTEINE,      8.5f, Charge.NEUTRAL)
177            .addpKa(AminoAcid.TYROSINE,     10.1f, Charge.NEUTRAL)
178            .addpKa(AminoAcid.HISTIDINE,     6.5f, Charge.POSITIVE)
179            .addpKa(AminoAcid.LYSINE,       10.8f, Charge.POSITIVE)
180            .addpKa(AminoAcid.ARGININE,     12.5f, Charge.POSITIVE)
181            .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.6f, Charge.NEUTRAL)
182            .lock();
183
184      SILLERO.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.2f, Charge.POSITIVE)
185         .addpKa(AminoAcid.ASPARTIC_ACID, 4.0f, Charge.NEUTRAL)
186         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.5f, Charge.NEUTRAL)
187         .addpKa(AminoAcid.CYSTEINE,      9.0f, Charge.NEUTRAL)
188         .addpKa(AminoAcid.TYROSINE,     10.0f, Charge.NEUTRAL)
189         .addpKa(AminoAcid.HISTIDINE,     6.4f, Charge.POSITIVE)
190         .addpKa(AminoAcid.LYSINE,       10.4f, Charge.POSITIVE)
191         .addpKa(AminoAcid.ARGININE,     12.0f, Charge.POSITIVE)
192         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.2f, Charge.NEUTRAL)
193         .lock();
194
195      SILLERO_ABRIDGED.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 11.2f, Charge.POSITIVE)
196         .addpKa(AminoAcid.ASPARTIC_ACID, 4.2f, Charge.NEUTRAL)
197         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL)
198         .addpKa(AminoAcid.CYSTEINE,      9.5f, Charge.NEUTRAL)
199         .addpKa(AminoAcid.TYROSINE,      9.5f, Charge.NEUTRAL)
200         .addpKa(AminoAcid.HISTIDINE,     6.4f, Charge.POSITIVE)
201         .addpKa(AminoAcid.LYSINE,       11.2f, Charge.POSITIVE)
202         .addpKa(AminoAcid.ARGININE,     11.2f, Charge.POSITIVE)
203         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 4.2f, Charge.NEUTRAL)
204         .lock();
205
206      PATRICKIOS_SIMPLE.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 11.2f, Charge.POSITIVE)
207         .addpKa(AminoAcid.ASPARTIC_ACID, 4.2f, Charge.NEUTRAL)
208         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL)
209         .addpKa(AminoAcid.LYSINE,       11.2f, Charge.POSITIVE)
210         .addpKa(AminoAcid.ARGININE,     11.2f, Charge.POSITIVE)
211         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 4.2f, Charge.NEUTRAL)
212         .lock();
213
214      STRYER_1995.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE)
215         .setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.0f, Charge.POSITIVE)
216         .addpKa(AminoAcid.ASPARTIC_ACID, 4.4f, Charge.NEUTRAL)
217         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.4f, Charge.NEUTRAL)
218         .addpKa(AminoAcid.CYSTEINE,      8.5f, Charge.NEUTRAL)
219         .addpKa(AminoAcid.TYROSINE,     10.0f, Charge.NEUTRAL)
220         .addpKa(AminoAcid.HISTIDINE,     6.5f, Charge.POSITIVE)
221         .addpKa(AminoAcid.LYSINE,       10.0f, Charge.POSITIVE)
222         .addpKa(AminoAcid.ARGININE,     12.0f, Charge.POSITIVE)
223         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.1f, Charge.NEUTRAL)
224         .lock();
225
226      GRIMSLEY.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE)
227         .setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.7f, Charge.POSITIVE)
228         .addpKa(AminoAcid.ASPARTIC_ACID, 3.5f, Charge.NEUTRAL)
229         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL)
230         .addpKa(AminoAcid.CYSTEINE,      6.8f, Charge.NEUTRAL)
231         .addpKa(AminoAcid.TYROSINE,     10.3f, Charge.NEUTRAL)
232         .addpKa(AminoAcid.HISTIDINE,     6.6f, Charge.POSITIVE)
233         .addpKa(AminoAcid.LYSINE,       10.4f, Charge.POSITIVE)
234         .addpKa(AminoAcid.ARGININE,     12f,   Charge.POSITIVE)
235         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.3f, Charge.NEUTRAL)
236         .lock();
237
238      TAYLOR_NATIVE.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE)
239         .setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE)
240         .setNTerminalpKa(AminoAcid.ALANINE,       7.59f,  Charge.POSITIVE)
241         .setNTerminalpKa(AminoAcid.METHIONINE,    7.00f,  Charge.POSITIVE)
242         .setNTerminalpKa(AminoAcid.SERINE,        6.93f,  Charge.POSITIVE)
243         .setNTerminalpKa(AminoAcid.PROLINE,       8.36f,  Charge.POSITIVE)
244         .setNTerminalpKa(AminoAcid.THREONIE,      6.82f,  Charge.POSITIVE)
245         .setNTerminalpKa(AminoAcid.VALINE,        7.44f,  Charge.POSITIVE)
246         .setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f,  Charge.POSITIVE)
247         .addpKa(AminoAcid.ASPARTIC_ACID,    4.5f,   Charge.NEUTRAL)
248         .addpKa(AminoAcid.GLUTAMIC_ACID,    4.65f,  Charge.NEUTRAL)
249         .addpKa(AminoAcid.CYSTEINE,         9f,     Charge.NEUTRAL)
250         .addpKa(AminoAcid.TYROSINE,        10.1f,   Charge.NEUTRAL)
251         .addpKa(AminoAcid.HISTIDINE,        6.6f,   Charge.POSITIVE)
252         .addpKa(AminoAcid.LYSINE,          10.78f,  Charge.POSITIVE)
253         .addpKa(AminoAcid.ARGININE,        12.25f,  Charge.POSITIVE)
254         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL)
255         .setCTerminalpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL)
256         .setCTerminalpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL)
257         .lock();
258
259   }
260
261   //###########################################################################
262   // CONSTRUCTORS
263   //###########################################################################
264
265   //--------------------------------------------------------------------------
266   public KaSet(String inName)
267   {
268      mName = inName;
269      sUniqueMap.put(mName, this);
270   }
271
272   //--------------------------------------------------------------------------
273   public KaSet(XMLNode inXML)
274   {
275      inXML.verifyTagName(HfgBioXML.KA_SET_TAG);
276      mName = inXML.getAttributeValue(HfgBioXML.NAME_ATT);
277
278      XMLTag defaultAnalysisModeTag = inXML.getOptionalSubtagByName(HfgBioXML.DEFAULT_ANALYSIS_MODE_ATT);
279      if (defaultAnalysisModeTag != null)
280      {
281         XMLTag analysisModeTag = defaultAnalysisModeTag.getRequiredSubtagByName(HfgBioXML.PROTEIN_ANALYSIS_MODE_TAG);
282
283         setDefaultProteinAnalysisMode(ProteinAnalysisMode.instantiate(analysisModeTag));
284      }
285
286      List<XMLTag> pKaTags = inXML.getSubtagsByName(HfgBioXML.PKA_TAG);
287      if (CollectionUtil.hasValues(pKaTags))
288      {
289         for (XMLTag pKaTag : pKaTags)
290         {
291            Molecule molecule = null;
292            if (pKaTag.hasAttribute(HfgBioXML.AA_ATT))
293            {
294               molecule = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
295            }
296            else if (pKaTag.hasAttribute(HfgBioXML.MONOSACCHARIDE_ATT))
297            {
298               molecule = Monosaccharide.valueOf(pKaTag.getAttributeValue(HfgBioXML.MONOSACCHARIDE_ATT));
299            }
300
301            List<XMLTag> ionizableGroupTags = pKaTag.getSubtagsByName(IonizableGroup.IONIZABLE_GROUP_TAG);
302            if (CollectionUtil.hasValues(ionizableGroupTags))
303            {
304               for (XMLTag ionizableGroupTag : (List<XMLTag>) (Object) pKaTag.getSubtags())
305               {
306                  addpKa(molecule, new IonizableGroup(ionizableGroupTag));
307               }
308            }
309            else
310            {
311               // Old style
312               addpKa((AminoAcid)molecule, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
313            }
314         }
315      }
316
317      List<XMLTag> nTerm_pKaTags = inXML.getSubtagsByName(HfgBioXML.N_TERM_PKA_TAG);
318      if (CollectionUtil.hasValues(nTerm_pKaTags))
319      {
320         for (XMLTag pKaTag : nTerm_pKaTags)
321         {
322            if (pKaTag.hasAttribute(HfgBioXML.AA_ATT))
323            {
324               AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
325               setNTerminalpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
326            }
327            else
328            {
329               NTerminalGroup nTerminalGroup = NTerminalGroup.valueOf(pKaTag.getAttributeValue(HfgBioXML.N_TERM_ATT));
330               setNTerminalpKa(nTerminalGroup, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
331            }
332         }
333      }
334
335      List<XMLTag> cTerm_pKaTags = inXML.getSubtagsByName(HfgBioXML.C_TERM_PKA_TAG);
336      if (CollectionUtil.hasValues(cTerm_pKaTags))
337      {
338         for (XMLTag pKaTag : cTerm_pKaTags)
339         {
340            if (pKaTag.hasAttribute(HfgBioXML.AA_ATT))
341            {
342               AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
343               setCTerminalpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
344            }
345            else
346            {
347               CTerminalGroup cTerminalGroup = CTerminalGroup.valueOf(pKaTag.getAttributeValue(HfgBioXML.C_TERM_ATT));
348               setCTerminalpKa(cTerminalGroup, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
349            }
350         }
351      }
352
353
354      List<XMLTag> cTermAA_pKaTags = inXML.getSubtagsByName(HfgBioXML.C_TERM_AA_PKA_TAG);
355      if (CollectionUtil.hasValues(cTermAA_pKaTags))
356      {
357         for (XMLTag pKaTag : cTermAA_pKaTags)
358         {
359            AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
360            setCTerminalSidechainpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
361
362         }
363      }
364
365
366      if (BooleanUtil.valueOf(inXML.getAttributeValue(HfgBioXML.LOCKED_ATT)))
367      {
368         lock();
369      }
370   }
371
372
373   //###########################################################################
374   // PUBLIC METHODS
375   //###########################################################################
376
377   //--------------------------------------------------------------------------
378   public static Collection<KaSet> values()
379   {
380      return Collections.unmodifiableCollection(sUniqueMap.values());
381   }
382
383   //--------------------------------------------------------------------------
384   public static KaSet valueOf(String inName)
385   {
386      return sUniqueMap.get(inName);
387   }
388
389   //--------------------------------------------------------------------------
390   public String name()
391   {
392      return mName;
393   }
394
395   //--------------------------------------------------------------------------
396   public KaSet setName(String inValue)
397   {
398      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
399      mName = inValue;
400      return this;
401   }
402
403   //--------------------------------------------------------------------------
404   public boolean isLocked()
405   {
406      return mLocked;
407   }
408
409   //--------------------------------------------------------------------------
410   public KaSet lock()
411   {
412      mLocked = true;
413      return this;
414   }
415
416   //--------------------------------------------------------------------------
417   public KaSet setDefaultProteinAnalysisMode(ProteinAnalysisMode inValue)
418   {
419      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
420
421      mDefaultProteinAnalysisMode = inValue;
422      return this;
423   }
424
425   //--------------------------------------------------------------------------
426   public ProteinAnalysisMode getDefaultProteinAnalysisMode()
427   {
428      return mDefaultProteinAnalysisMode;
429   }
430
431
432   //--------------------------------------------------------------------------
433   public XMLNode toXMLNode()
434   {
435      XMLNode node = new XMLTag(HfgBioXML.KA_SET_TAG);
436
437      if (StringUtil.isSet(name())) node.setAttribute(HfgBioXML.NAME_ATT, name());
438
439      if (isLocked()) node.setAttribute(HfgBioXML.LOCKED_ATT, "true");
440
441      if (getDefaultProteinAnalysisMode() != null)
442      {
443         XMLTag subtag = new XMLTag(HfgBioXML.DEFAULT_ANALYSIS_MODE_ATT);
444         node.addSubtag(subtag);
445         subtag.addSubtag(getDefaultProteinAnalysisMode().toXMLTag());
446      }
447
448      if (CollectionUtil.hasValues(mKaMap))
449      {
450         List<Molecule> sortedKeys = new ArrayList<>(mKaMap.keySet());
451         Collections.sort(sortedKeys);
452         for (Molecule molecule : sortedKeys)
453         {
454            List<IonizableGroup> ionizableGroups = mKaMap.get(molecule);
455            XMLTag pKaTag = new XMLTag(HfgBioXML.PKA_TAG);
456            if (molecule instanceof AminoAcid)
457            {
458               pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid) molecule).getThreeLetterCode());
459            }
460            else if (molecule instanceof Monosaccharide)
461            {
462               pKaTag.setAttribute(HfgBioXML.MONOSACCHARIDE_ATT, molecule.name());
463            }
464
465            for (IonizableGroup ionizableGroup : ionizableGroups)
466            {
467               pKaTag.addSubtag(ionizableGroup.toXMLNode());
468            }
469
470            node.addSubtag(pKaTag);
471         }
472      }
473
474      if (CollectionUtil.hasValues(mNTerminalKaMap))
475      {
476         List<Molecule> sortedKeys = new ArrayList<>(mNTerminalKaMap.keySet());
477         Collections.sort(sortedKeys);
478         for (Molecule nTerminalGroup : sortedKeys)
479         {
480            IonizableGroup ionizableGroup = mNTerminalKaMap.get(nTerminalGroup);
481
482            XMLTag nTerm_pKaTag = new XMLTag(HfgBioXML.N_TERM_PKA_TAG);
483
484            if (nTerminalGroup instanceof AminoAcid)
485            {
486               nTerm_pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid)nTerminalGroup).getThreeLetterCode());
487            }
488            else
489            {
490               nTerm_pKaTag.setAttribute(HfgBioXML.N_TERM_ATT, nTerminalGroup.name());
491            }
492
493            nTerm_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa());
494            nTerm_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm());
495            node.addSubtag(nTerm_pKaTag);
496         }
497      }
498
499      if (CollectionUtil.hasValues(mCTerminalKaMap))
500      {
501         List<Molecule> sortedKeys = new ArrayList<>(mCTerminalKaMap.keySet());
502         Collections.sort(sortedKeys);
503         for (Molecule cTerminalGroup : sortedKeys)
504         {
505            IonizableGroup ionizableGroup = mCTerminalKaMap.get(cTerminalGroup);
506
507            XMLTag cTerm_pKaTag = new XMLTag(HfgBioXML.C_TERM_PKA_TAG);
508            if (cTerminalGroup instanceof AminoAcid)
509            {
510               cTerm_pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid)cTerminalGroup).getThreeLetterCode());
511            }
512            else
513            {
514               cTerm_pKaTag.setAttribute(HfgBioXML.C_TERM_ATT, cTerminalGroup.name());
515            }
516
517            cTerm_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa());
518            cTerm_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm());
519            node.addSubtag(cTerm_pKaTag);
520         }
521      }
522
523      if (CollectionUtil.hasValues(mCTerminalSidechainKaMap))
524      {
525         List<AminoAcid> sortedKeys = new ArrayList<>(mCTerminalSidechainKaMap.keySet());
526         Collections.sort(sortedKeys);
527         for (AminoAcid cTerminalAA : sortedKeys)
528         {
529            IonizableGroup ionizableGroup = mCTerminalSidechainKaMap.get(cTerminalAA);
530
531            XMLTag cTermAA_pKaTag = new XMLTag(HfgBioXML.C_TERM_AA_PKA_TAG);
532            cTermAA_pKaTag.setAttribute(HfgBioXML.AA_ATT, cTerminalAA.getThreeLetterCode());
533            cTermAA_pKaTag.setAttribute(HfgBioXML.VALUE_ATT,  ionizableGroup.getpKa());
534            cTermAA_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm());
535            node.addSubtag(cTermAA_pKaTag);
536         }
537      }
538
539      return node;
540   }
541
542   //--------------------------------------------------------------------------
543   @Override
544   public String toString()
545   {
546      return name();
547   }
548
549   //--------------------------------------------------------------------------
550   @Override
551   public boolean equals(Object inObj2)
552   {
553      boolean result = false;
554
555      if (inObj2 != null
556            && inObj2 instanceof KaSet)
557      {
558         result = (0 == compareTo((KaSet) inObj2));
559      }
560
561      return result;
562   }
563
564   //--------------------------------------------------------------------------
565   @Override
566   public int compareTo(KaSet inObj2)
567   {
568      int result = -1;
569
570      if (inObj2 != null)
571      {
572         result = 0;
573
574         if (this != inObj2)
575         {
576            result = CompareUtil.compare(mKaMap.size(), inObj2.mKaMap.size());
577
578            if (0 == result)
579            {
580               result = CompareUtil.compare(mNTerminalKaMap.size(), inObj2.mNTerminalKaMap.size());
581            }
582
583            if (0 == result)
584            {
585               result = CompareUtil.compare(mCTerminalKaMap.size(), inObj2.mCTerminalKaMap.size());
586            }
587
588            if (0 == result)
589            {
590               result = CompareUtil.compare(mCTerminalSidechainKaMap.size(), inObj2.mCTerminalSidechainKaMap.size());
591            }
592
593            if (0 == result)
594            {
595               for (Molecule molecule : mKaMap.keySet())
596               {
597                  result = CompareUtil.compare(mKaMap.get(molecule), inObj2.mKaMap.get(molecule));
598                  if (result != 0)
599                  {
600                     break;
601                  }
602               }
603            }
604
605            if (0 == result)
606            {
607               for (Molecule molecule : mNTerminalKaMap.keySet())
608               {
609                  result = CompareUtil.compare(mNTerminalKaMap.get(molecule), inObj2.mNTerminalKaMap.get(molecule));
610                  if (result != 0)
611                  {
612                     break;
613                  }
614               }
615            }
616
617            if (0 == result)
618            {
619               for (Molecule molecule : mCTerminalKaMap.keySet())
620               {
621                  result = CompareUtil.compare(mCTerminalKaMap.get(molecule), inObj2.mCTerminalKaMap.get(molecule));
622                  if (result != 0)
623                  {
624                     break;
625                  }
626               }
627            }
628
629            if (0 == result)
630            {
631               for (AminoAcid aa : mCTerminalSidechainKaMap.keySet())
632               {
633                  result = CompareUtil.compare(mCTerminalSidechainKaMap.get(aa), inObj2.mCTerminalSidechainKaMap.get(aa));
634                  if (result != 0)
635                  {
636                     break;
637                  }
638               }
639            }
640         }
641      }
642
643      return result;
644   }
645
646   //--------------------------------------------------------------------------
647   @Override
648   public KaSet clone()
649   {
650      KaSet newObj;
651      try
652      {
653         newObj = (KaSet) super.clone();
654      }
655      catch (CloneNotSupportedException e)
656      {
657         throw new RuntimeException(e);
658      }
659
660      if (mNTerminalKaMap != null)
661      {
662         newObj.mNTerminalKaMap = new HashMap<>(mNTerminalKaMap);
663      }
664
665      if (mKaMap != null)
666      {
667         newObj.mKaMap = new HashMap<>(mKaMap);
668      }
669
670      if (mCTerminalKaMap != null)
671      {
672         newObj.mCTerminalKaMap = new HashMap<>(mCTerminalKaMap);
673      }
674
675      if (mCTerminalSidechainKaMap != null)
676      {
677         newObj.mCTerminalSidechainKaMap = new HashMap<>(mCTerminalSidechainKaMap);
678      }
679
680      newObj.mLocked = false;
681
682      return newObj;
683   }
684
685   //--------------------------------------------------------------------------
686   /**
687    * Adds a pKa value for the specified amino acid to the set.
688    * Ka = 10^(-pKa)
689    * @param inResidue the amino acid that the pKa value should apply to
690    * @param inpKa the pKa value to associate with the specified amino acid
691    * @param inProtonatedForm the charge of the group's protonated form
692    * @return this KaSet to enable method chaining
693    */
694   public KaSet addpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
695   {
696      return addpKa(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
697   }
698
699   //--------------------------------------------------------------------------
700   /**
701    * Adds a pKa value for the specified molecule to the set. This form of addpKa()
702    * can be used to to assign pKa values to monosaccharide groups in a glycan.
703    * Ka = 10^(-pKa)
704    * @param inMolecule the amino acid that the pKa value should apply to
705    * @param inIonizableGroup the ionizable group to associate with the given molecule
706    * @return this KaSet to enable method chaining
707    */
708   public KaSet addpKa(Molecule inMolecule, IonizableGroup inIonizableGroup)
709   {
710      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
711
712      if (inIonizableGroup.getpKa() < 0 || inIonizableGroup.getpKa() > 14)
713      {
714         throw new InvalidValueException(StringUtil.singleQuote(inIonizableGroup.getpKa() + "") + " is not a valid value. The pKa must be between 0 and 14!");
715      }
716
717      List<IonizableGroup> ionizableGroups = mKaMap.get(inMolecule);
718      if (null == ionizableGroups)
719      {
720         ionizableGroups = new ArrayList<>(3);
721         mKaMap.put(inMolecule, ionizableGroups);
722      }
723
724      ionizableGroups.add(inIonizableGroup);
725
726      return this;
727   }
728
729   //--------------------------------------------------------------------------
730   public List<IonizableGroup> getIonizableGroups(Molecule inResidue)
731   {
732      return mKaMap.get(inResidue);
733   }
734
735
736   //--------------------------------------------------------------------------
737   public IonizableGroup getNTerminalKa(NTerminalGroup inNTerminalGroup, AminoAcid inNTerminalResidue)
738   {
739      IonizableGroup value = null;
740
741      if (inNTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS))
742      {
743         value =  mNTerminalKaMap.get(inNTerminalResidue);
744      }
745
746      if (null == value)
747      {
748         value =  mNTerminalKaMap.get(inNTerminalGroup);
749      }
750
751      return value;
752   }
753
754   //--------------------------------------------------------------------------
755   public KaSet setNTerminalpKa(NTerminalGroup inNTerminalGroup, float inpKa, Charge inProtonatedForm)
756   {
757      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
758
759      if (inpKa < 0 || inpKa > 14)
760      {
761         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
762      }
763
764      mNTerminalKaMap.put(inNTerminalGroup, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
765
766      return this;
767   }
768
769   //--------------------------------------------------------------------------
770   public KaSet setNTerminalpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
771   {
772      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
773
774      if (inpKa < 0 || inpKa > 14)
775      {
776         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
777      }
778
779      mNTerminalKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
780
781      return this;
782   }
783
784
785
786
787   //--------------------------------------------------------------------------
788   public IonizableGroup getCTerminalKa(CTerminalGroup inCTerminalGroup, AminoAcid inCTerminalResidue)
789   {
790      IonizableGroup value = null;
791
792      if (inCTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS))
793      {
794         value =  mCTerminalKaMap.get(inCTerminalResidue);
795      }
796
797      if (null == value)
798      {
799         value =  mCTerminalKaMap.get(inCTerminalGroup);
800      }
801
802      return value;
803   }
804
805   //--------------------------------------------------------------------------
806   public KaSet setCTerminalpKa(CTerminalGroup inCTerminalGroup, float inpKa, Charge inProtonatedForm)
807   {
808      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
809
810      if (inpKa < 0 || inpKa > 14)
811      {
812         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
813      }
814
815      mCTerminalKaMap.put(inCTerminalGroup, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
816
817      return this;
818   }
819
820   //--------------------------------------------------------------------------
821   public KaSet setCTerminalpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
822   {
823      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
824
825      if (inpKa < 0 || inpKa > 14)
826      {
827         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
828      }
829
830      mCTerminalKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
831
832      return this;
833   }
834
835   //--------------------------------------------------------------------------
836   public KaSet setCTerminalSidechainpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
837   {
838      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
839
840      if (inpKa < 0 || inpKa > 14)
841      {
842         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
843      }
844
845      mCTerminalSidechainKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
846
847      return this;
848   }
849
850   //--------------------------------------------------------------------------
851   public IonizableGroup getCTerminalSidechainKa(AminoAcid inResidue)
852   {
853      return mCTerminalSidechainKaMap.get(inResidue);
854   }
855
856
857}