001package com.hfg.bio; 002 003import java.util.ArrayList; 004import java.util.Collection; 005import java.util.Collections; 006import java.util.List; 007import java.util.Map; 008import java.util.HashMap; 009 010import com.hfg.bio.glyco.Monosaccharide; 011import com.hfg.bio.proteinproperty.ProteinAnalysisMode; 012import com.hfg.chem.Charge; 013import com.hfg.chem.IonizableGroup; 014import com.hfg.chem.Molecule; 015import com.hfg.exception.InvalidValueException; 016import com.hfg.exception.UnmodifyableObjectException; 017import com.hfg.util.BooleanUtil; 018import com.hfg.util.CompareUtil; 019import com.hfg.util.StringUtil; 020import com.hfg.util.collection.CollectionUtil; 021import com.hfg.util.collection.OrderedMap; 022import com.hfg.xml.XMLNode; 023import com.hfg.xml.XMLTag; 024 025//------------------------------------------------------------------------------ 026/** 027 * Encapsulation for a group of K<sub>a</sub> (dissociation constant) values. 028 * Most commonly used for protein isoelectric point prediction. 029 * 030 * @author J. Alex Taylor, hairyfatguy.com 031 */ 032//------------------------------------------------------------------------------ 033// com.hfg XML/HTML Coding Library 034// 035// This library is free software; you can redistribute it and/or 036// modify it under the terms of the GNU Lesser General Public 037// License as published by the Free Software Foundation; either 038// version 2.1 of the License, or (at your option) any later version. 039// 040// This library is distributed in the hope that it will be useful, 041// but WITHOUT ANY WARRANTY; without even the implied warranty of 042// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 043// Lesser General Public License for more details. 044// 045// You should have received a copy of the GNU Lesser General Public 046// License along with this library; if not, write to the Free Software 047// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 048// 049// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 050// jataylor@hairyfatguy.com 051//------------------------------------------------------------------------------ 052 053public class KaSet implements Cloneable, Comparable<KaSet> 054{ 055 private static Map<String, KaSet> sUniqueMap = new OrderedMap<>(); 056 057 /** 058 Bjellqvist B, Hughes GJ, Pasquali C, et al. <i>The focusing positions of polypeptides 059 in immobilized pH gradients can be predicted from their amino acid sequences</i>. 060 <b>Electrophoresis</b>. 1993;14(10):1023-31. 061 Available at: <a href='http://www.ncbi.nlm.nih.gov/pubmed/8125050'>http://www.ncbi.nlm.nih.gov/pubmed/8125050</a>. 062 Intended for use with reduced and denatured proteins on 2D gels. 063 */ 064 public static final KaSet BJELLQVIST = new KaSet("Bjellqvist"); 065 066 /** 067 Method from <a href='http://expasy.org/tools/pi_tool.html'>Expasy's Compute pI/Mw tool</a> 068 which implements the values from Bjellqvist but mistakenly treats C-terminal Asp and Glu 069 <u>sidechain</u> values as C-terminal values. 070 Intended for use with reduced and denatured proteins on 2D gels. 071 */ 072 public static final KaSet EXPASY = new KaSet("Expasy"); 073 074 /** 075 Method from <a href='http://emboss.open-bio.org/rel/rel6/apps/iep.html'>EMBOSS's iep tool</a>. 076 Intended for use with reduced and denatured proteins on 2D gels. 077 */ 078 public static final KaSet EMBOSS = new KaSet("EMBOSS"); 079 080 /** 081 Sillero A, Ribeiro JM. <i>Isoelectric points of proteins: 082 theoretical determination</i>. <b>Analytical biochemistry</b>. 1989;179(2):319-325. 083 Available at: <a href='http://www.ncbi.nlm.nih.gov/pubmed/2774179'>http://www.ncbi.nlm.nih.gov/pubmed/2774179</a>. 084 Intended for use with reduced and denatured proteins on 2D gels. 085 */ 086 public static final KaSet SILLERO = new KaSet("Sillero"); 087 088 /** 089 Sillero A, Ribeiro JM. <i>Isoelectric points of proteins: 090 theoretical determination</i>. <b>Analytical biochemistry</b>. 1989;179(2):319-325. 091 Available at: <a href='http://www.ncbi.nlm.nih.gov/pubmed/2774179'>http://www.ncbi.nlm.nih.gov/pubmed/2774179</a>. 092 Intended for use with reduced and denatured proteins on 2D gels. An oversimplification for ease of calculation. 093 */ 094 public static final KaSet SILLERO_ABRIDGED = new KaSet("Sillero (abridged)"); 095 096 /** 097 Patrickios, CS, Yamasaki, EN. <i>Polypeptide amino acid composition and isoelectric point. II. Comparison between experiment and theory</i>. 098 <b>Analytical Biochemistry</b>. 1995:231(1):82-91. <a href='http://doi.org/10.1006/abio.1995.1506'>http://doi.org/10.1006/abio.1995.1506</a>. 099 An oversimplification for ease of calculation. 100 */ 101 public static final KaSet PATRICKIOS_SIMPLE = new KaSet("Patrickios (simple)"); 102 103 /** 104 <i>Biochemistry</i> by Lubert Stryer, 1995 105 */ 106 public static final KaSet STRYER_1995 = new KaSet("Stryer (1995)"); 107 108 /** 109 Grimsley GR, Scholtz JM, Pace CN (2009). <i>A summary of the measured pK values of the ionizable groups in folded proteins.</i> 110 <b>Protein Science</b> 18(1), 247-251. Note that they do not provide a value for Arginine so a default value of 12 is used. 111 Intended for use with native proteins. 112 */ 113 public static final KaSet GRIMSLEY = new KaSet("Grimsley"); 114 115 /** 116 Intended for use with native proteins. 117 */ 118 public static final KaSet TAYLOR_NATIVE = new KaSet("Taylor"); 119 120 121 private String mName; 122 private ProteinAnalysisMode mDefaultProteinAnalysisMode = ProteinAnalysisMode.REDUCED; 123 private boolean mLocked; 124 private Map<Molecule, List<IonizableGroup>> mKaMap = new HashMap<>(20); 125 private Map<Molecule, IonizableGroup> mNTerminalKaMap = new HashMap<>(10); 126 private Map<Molecule, IonizableGroup> mCTerminalKaMap = new HashMap<>(10); 127 private Map<AminoAcid, IonizableGroup> mCTerminalSidechainKaMap = new HashMap<>(10); 128 129 static 130 { 131 132 BJELLQVIST.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE) 133 .setNTerminalpKa(AminoAcid.ALANINE, 7.59f, Charge.POSITIVE) 134 .setNTerminalpKa(AminoAcid.METHIONINE, 7.00f, Charge.POSITIVE) 135 .setNTerminalpKa(AminoAcid.SERINE, 6.93f, Charge.POSITIVE) 136 .setNTerminalpKa(AminoAcid.PROLINE, 8.36f, Charge.POSITIVE) 137 .setNTerminalpKa(AminoAcid.THREONIE, 6.82f, Charge.POSITIVE) 138 .setNTerminalpKa(AminoAcid.VALINE, 7.44f, Charge.POSITIVE) 139 .setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f, Charge.POSITIVE) 140 .addpKa(AminoAcid.ASPARTIC_ACID, 4.05f, Charge.NEUTRAL) 141 .addpKa(AminoAcid.GLUTAMIC_ACID, 4.45f, Charge.NEUTRAL) 142 .addpKa(AminoAcid.CYSTEINE, 9f, Charge.NEUTRAL) 143 .addpKa(AminoAcid.TYROSINE, 10f, Charge.NEUTRAL) 144 .addpKa(AminoAcid.HISTIDINE, 5.98f, Charge.POSITIVE) 145 .addpKa(AminoAcid.LYSINE, 10f, Charge.POSITIVE) 146 .addpKa(AminoAcid.ARGININE, 12f, Charge.POSITIVE) 147 .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL) 148 .setCTerminalSidechainpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL) 149 .setCTerminalSidechainpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL) 150 .lock(); 151 152 EXPASY.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE) 153 .setNTerminalpKa(AminoAcid.ALANINE, 7.59f, Charge.POSITIVE) 154 .setNTerminalpKa(AminoAcid.METHIONINE, 7.00f, Charge.POSITIVE) 155 .setNTerminalpKa(AminoAcid.SERINE, 6.93f, Charge.POSITIVE) 156 .setNTerminalpKa(AminoAcid.PROLINE, 8.36f, Charge.POSITIVE) 157 .setNTerminalpKa(AminoAcid.THREONIE, 6.82f, Charge.POSITIVE) 158 .setNTerminalpKa(AminoAcid.VALINE, 7.44f, Charge.POSITIVE) 159 .setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f, Charge.POSITIVE) 160 .addpKa(AminoAcid.ASPARTIC_ACID, 4.05f, Charge.NEUTRAL) 161 .addpKa(AminoAcid.GLUTAMIC_ACID, 4.45f, Charge.NEUTRAL) 162 .addpKa(AminoAcid.CYSTEINE, 9f, Charge.NEUTRAL) 163 .addpKa(AminoAcid.TYROSINE, 10f, Charge.NEUTRAL) 164 .addpKa(AminoAcid.HISTIDINE, 5.98f, Charge.POSITIVE) 165 .addpKa(AminoAcid.LYSINE, 10f, Charge.POSITIVE) 166 .addpKa(AminoAcid.ARGININE, 12f, Charge.POSITIVE) 167 .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL) 168 // Note: mistakenly treats C-terminal Asp and Glu sidechain values as C -terminal values. 169 .setCTerminalpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL) 170 .setCTerminalpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL) 171 .lock(); 172 173 EMBOSS.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.6f, Charge.POSITIVE) 174 .addpKa(AminoAcid.ASPARTIC_ACID, 3.9f, Charge.NEUTRAL) 175 .addpKa(AminoAcid.GLUTAMIC_ACID, 4.1f, Charge.NEUTRAL) 176 .addpKa(AminoAcid.CYSTEINE, 8.5f, Charge.NEUTRAL) 177 .addpKa(AminoAcid.TYROSINE, 10.1f, Charge.NEUTRAL) 178 .addpKa(AminoAcid.HISTIDINE, 6.5f, Charge.POSITIVE) 179 .addpKa(AminoAcid.LYSINE, 10.8f, Charge.POSITIVE) 180 .addpKa(AminoAcid.ARGININE, 12.5f, Charge.POSITIVE) 181 .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.6f, Charge.NEUTRAL) 182 .lock(); 183 184 SILLERO.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.2f, Charge.POSITIVE) 185 .addpKa(AminoAcid.ASPARTIC_ACID, 4.0f, Charge.NEUTRAL) 186 .addpKa(AminoAcid.GLUTAMIC_ACID, 4.5f, Charge.NEUTRAL) 187 .addpKa(AminoAcid.CYSTEINE, 9.0f, Charge.NEUTRAL) 188 .addpKa(AminoAcid.TYROSINE, 10.0f, Charge.NEUTRAL) 189 .addpKa(AminoAcid.HISTIDINE, 6.4f, Charge.POSITIVE) 190 .addpKa(AminoAcid.LYSINE, 10.4f, Charge.POSITIVE) 191 .addpKa(AminoAcid.ARGININE, 12.0f, Charge.POSITIVE) 192 .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.2f, Charge.NEUTRAL) 193 .lock(); 194 195 SILLERO_ABRIDGED.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 11.2f, Charge.POSITIVE) 196 .addpKa(AminoAcid.ASPARTIC_ACID, 4.2f, Charge.NEUTRAL) 197 .addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL) 198 .addpKa(AminoAcid.CYSTEINE, 9.5f, Charge.NEUTRAL) 199 .addpKa(AminoAcid.TYROSINE, 9.5f, Charge.NEUTRAL) 200 .addpKa(AminoAcid.HISTIDINE, 6.4f, Charge.POSITIVE) 201 .addpKa(AminoAcid.LYSINE, 11.2f, Charge.POSITIVE) 202 .addpKa(AminoAcid.ARGININE, 11.2f, Charge.POSITIVE) 203 .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 4.2f, Charge.NEUTRAL) 204 .lock(); 205 206 PATRICKIOS_SIMPLE.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 11.2f, Charge.POSITIVE) 207 .addpKa(AminoAcid.ASPARTIC_ACID, 4.2f, Charge.NEUTRAL) 208 .addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL) 209 .addpKa(AminoAcid.LYSINE, 11.2f, Charge.POSITIVE) 210 .addpKa(AminoAcid.ARGININE, 11.2f, Charge.POSITIVE) 211 .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 4.2f, Charge.NEUTRAL) 212 .lock(); 213 214 STRYER_1995.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE) 215 .setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.0f, Charge.POSITIVE) 216 .addpKa(AminoAcid.ASPARTIC_ACID, 4.4f, Charge.NEUTRAL) 217 .addpKa(AminoAcid.GLUTAMIC_ACID, 4.4f, Charge.NEUTRAL) 218 .addpKa(AminoAcid.CYSTEINE, 8.5f, Charge.NEUTRAL) 219 .addpKa(AminoAcid.TYROSINE, 10.0f, Charge.NEUTRAL) 220 .addpKa(AminoAcid.HISTIDINE, 6.5f, Charge.POSITIVE) 221 .addpKa(AminoAcid.LYSINE, 10.0f, Charge.POSITIVE) 222 .addpKa(AminoAcid.ARGININE, 12.0f, Charge.POSITIVE) 223 .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.1f, Charge.NEUTRAL) 224 .lock(); 225 226 GRIMSLEY.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE) 227 .setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.7f, Charge.POSITIVE) 228 .addpKa(AminoAcid.ASPARTIC_ACID, 3.5f, Charge.NEUTRAL) 229 .addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL) 230 .addpKa(AminoAcid.CYSTEINE, 6.8f, Charge.NEUTRAL) 231 .addpKa(AminoAcid.TYROSINE, 10.3f, Charge.NEUTRAL) 232 .addpKa(AminoAcid.HISTIDINE, 6.6f, Charge.POSITIVE) 233 .addpKa(AminoAcid.LYSINE, 10.4f, Charge.POSITIVE) 234 .addpKa(AminoAcid.ARGININE, 12f, Charge.POSITIVE) 235 .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.3f, Charge.NEUTRAL) 236 .lock(); 237 238 TAYLOR_NATIVE.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE) 239 .setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE) 240 .setNTerminalpKa(AminoAcid.ALANINE, 7.59f, Charge.POSITIVE) 241 .setNTerminalpKa(AminoAcid.METHIONINE, 7.00f, Charge.POSITIVE) 242 .setNTerminalpKa(AminoAcid.SERINE, 6.93f, Charge.POSITIVE) 243 .setNTerminalpKa(AminoAcid.PROLINE, 8.36f, Charge.POSITIVE) 244 .setNTerminalpKa(AminoAcid.THREONIE, 6.82f, Charge.POSITIVE) 245 .setNTerminalpKa(AminoAcid.VALINE, 7.44f, Charge.POSITIVE) 246 .setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f, Charge.POSITIVE) 247 .addpKa(AminoAcid.ASPARTIC_ACID, 4.5f, Charge.NEUTRAL) 248 .addpKa(AminoAcid.GLUTAMIC_ACID, 4.65f, Charge.NEUTRAL) 249 .addpKa(AminoAcid.CYSTEINE, 9f, Charge.NEUTRAL) 250 .addpKa(AminoAcid.TYROSINE, 10.1f, Charge.NEUTRAL) 251 .addpKa(AminoAcid.HISTIDINE, 6.6f, Charge.POSITIVE) 252 .addpKa(AminoAcid.LYSINE, 10.78f, Charge.POSITIVE) 253 .addpKa(AminoAcid.ARGININE, 12.25f, Charge.POSITIVE) 254 .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL) 255 .setCTerminalpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL) 256 .setCTerminalpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL) 257 .lock(); 258 259 } 260 261 //########################################################################### 262 // CONSTRUCTORS 263 //########################################################################### 264 265 //-------------------------------------------------------------------------- 266 public KaSet(String inName) 267 { 268 mName = inName; 269 sUniqueMap.put(mName, this); 270 } 271 272 //-------------------------------------------------------------------------- 273 public KaSet(XMLNode inXML) 274 { 275 inXML.verifyTagName(HfgBioXML.KA_SET_TAG); 276 mName = inXML.getAttributeValue(HfgBioXML.NAME_ATT); 277 278 XMLTag defaultAnalysisModeTag = inXML.getOptionalSubtagByName(HfgBioXML.DEFAULT_ANALYSIS_MODE_ATT); 279 if (defaultAnalysisModeTag != null) 280 { 281 XMLTag analysisModeTag = defaultAnalysisModeTag.getRequiredSubtagByName(HfgBioXML.PROTEIN_ANALYSIS_MODE_TAG); 282 283 setDefaultProteinAnalysisMode(ProteinAnalysisMode.instantiate(analysisModeTag)); 284 } 285 286 List<XMLTag> pKaTags = inXML.getSubtagsByName(HfgBioXML.PKA_TAG); 287 if (CollectionUtil.hasValues(pKaTags)) 288 { 289 for (XMLTag pKaTag : pKaTags) 290 { 291 Molecule molecule = null; 292 if (pKaTag.hasAttribute(HfgBioXML.AA_ATT)) 293 { 294 molecule = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT)); 295 } 296 else if (pKaTag.hasAttribute(HfgBioXML.MONOSACCHARIDE_ATT)) 297 { 298 molecule = Monosaccharide.valueOf(pKaTag.getAttributeValue(HfgBioXML.MONOSACCHARIDE_ATT)); 299 } 300 301 List<XMLTag> ionizableGroupTags = pKaTag.getSubtagsByName(IonizableGroup.IONIZABLE_GROUP_TAG); 302 if (CollectionUtil.hasValues(ionizableGroupTags)) 303 { 304 for (XMLTag ionizableGroupTag : (List<XMLTag>) (Object) pKaTag.getSubtags()) 305 { 306 addpKa(molecule, new IonizableGroup(ionizableGroupTag)); 307 } 308 } 309 else 310 { 311 // Old style 312 addpKa((AminoAcid)molecule, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT))); 313 } 314 } 315 } 316 317 List<XMLTag> nTerm_pKaTags = inXML.getSubtagsByName(HfgBioXML.N_TERM_PKA_TAG); 318 if (CollectionUtil.hasValues(nTerm_pKaTags)) 319 { 320 for (XMLTag pKaTag : nTerm_pKaTags) 321 { 322 if (pKaTag.hasAttribute(HfgBioXML.AA_ATT)) 323 { 324 AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT)); 325 setNTerminalpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT))); 326 } 327 else 328 { 329 NTerminalGroup nTerminalGroup = NTerminalGroup.valueOf(pKaTag.getAttributeValue(HfgBioXML.N_TERM_ATT)); 330 setNTerminalpKa(nTerminalGroup, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT))); 331 } 332 } 333 } 334 335 List<XMLTag> cTerm_pKaTags = inXML.getSubtagsByName(HfgBioXML.C_TERM_PKA_TAG); 336 if (CollectionUtil.hasValues(cTerm_pKaTags)) 337 { 338 for (XMLTag pKaTag : cTerm_pKaTags) 339 { 340 if (pKaTag.hasAttribute(HfgBioXML.AA_ATT)) 341 { 342 AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT)); 343 setCTerminalpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT))); 344 } 345 else 346 { 347 CTerminalGroup cTerminalGroup = CTerminalGroup.valueOf(pKaTag.getAttributeValue(HfgBioXML.C_TERM_ATT)); 348 setCTerminalpKa(cTerminalGroup, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT))); 349 } 350 } 351 } 352 353 354 List<XMLTag> cTermAA_pKaTags = inXML.getSubtagsByName(HfgBioXML.C_TERM_AA_PKA_TAG); 355 if (CollectionUtil.hasValues(cTermAA_pKaTags)) 356 { 357 for (XMLTag pKaTag : cTermAA_pKaTags) 358 { 359 AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT)); 360 setCTerminalSidechainpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT))); 361 362 } 363 } 364 365 366 if (BooleanUtil.valueOf(inXML.getAttributeValue(HfgBioXML.LOCKED_ATT))) 367 { 368 lock(); 369 } 370 } 371 372 373 //########################################################################### 374 // PUBLIC METHODS 375 //########################################################################### 376 377 //-------------------------------------------------------------------------- 378 public static Collection<KaSet> values() 379 { 380 return Collections.unmodifiableCollection(sUniqueMap.values()); 381 } 382 383 //-------------------------------------------------------------------------- 384 public static KaSet valueOf(String inName) 385 { 386 return sUniqueMap.get(inName); 387 } 388 389 //-------------------------------------------------------------------------- 390 public String name() 391 { 392 return mName; 393 } 394 395 //-------------------------------------------------------------------------- 396 public KaSet setName(String inValue) 397 { 398 if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!"); 399 mName = inValue; 400 return this; 401 } 402 403 //-------------------------------------------------------------------------- 404 public boolean isLocked() 405 { 406 return mLocked; 407 } 408 409 //-------------------------------------------------------------------------- 410 public KaSet lock() 411 { 412 mLocked = true; 413 return this; 414 } 415 416 //-------------------------------------------------------------------------- 417 public KaSet setDefaultProteinAnalysisMode(ProteinAnalysisMode inValue) 418 { 419 if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!"); 420 421 mDefaultProteinAnalysisMode = inValue; 422 return this; 423 } 424 425 //-------------------------------------------------------------------------- 426 public ProteinAnalysisMode getDefaultProteinAnalysisMode() 427 { 428 return mDefaultProteinAnalysisMode; 429 } 430 431 432 //-------------------------------------------------------------------------- 433 public XMLNode toXMLNode() 434 { 435 XMLNode node = new XMLTag(HfgBioXML.KA_SET_TAG); 436 437 if (StringUtil.isSet(name())) node.setAttribute(HfgBioXML.NAME_ATT, name()); 438 439 if (isLocked()) node.setAttribute(HfgBioXML.LOCKED_ATT, "true"); 440 441 if (getDefaultProteinAnalysisMode() != null) 442 { 443 XMLTag subtag = new XMLTag(HfgBioXML.DEFAULT_ANALYSIS_MODE_ATT); 444 node.addSubtag(subtag); 445 subtag.addSubtag(getDefaultProteinAnalysisMode().toXMLTag()); 446 } 447 448 if (CollectionUtil.hasValues(mKaMap)) 449 { 450 List<Molecule> sortedKeys = new ArrayList<>(mKaMap.keySet()); 451 Collections.sort(sortedKeys); 452 for (Molecule molecule : sortedKeys) 453 { 454 List<IonizableGroup> ionizableGroups = mKaMap.get(molecule); 455 XMLTag pKaTag = new XMLTag(HfgBioXML.PKA_TAG); 456 if (molecule instanceof AminoAcid) 457 { 458 pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid) molecule).getThreeLetterCode()); 459 } 460 else if (molecule instanceof Monosaccharide) 461 { 462 pKaTag.setAttribute(HfgBioXML.MONOSACCHARIDE_ATT, molecule.name()); 463 } 464 465 for (IonizableGroup ionizableGroup : ionizableGroups) 466 { 467 pKaTag.addSubtag(ionizableGroup.toXMLNode()); 468 } 469 470 node.addSubtag(pKaTag); 471 } 472 } 473 474 if (CollectionUtil.hasValues(mNTerminalKaMap)) 475 { 476 List<Molecule> sortedKeys = new ArrayList<>(mNTerminalKaMap.keySet()); 477 Collections.sort(sortedKeys); 478 for (Molecule nTerminalGroup : sortedKeys) 479 { 480 IonizableGroup ionizableGroup = mNTerminalKaMap.get(nTerminalGroup); 481 482 XMLTag nTerm_pKaTag = new XMLTag(HfgBioXML.N_TERM_PKA_TAG); 483 484 if (nTerminalGroup instanceof AminoAcid) 485 { 486 nTerm_pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid)nTerminalGroup).getThreeLetterCode()); 487 } 488 else 489 { 490 nTerm_pKaTag.setAttribute(HfgBioXML.N_TERM_ATT, nTerminalGroup.name()); 491 } 492 493 nTerm_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa()); 494 nTerm_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm()); 495 node.addSubtag(nTerm_pKaTag); 496 } 497 } 498 499 if (CollectionUtil.hasValues(mCTerminalKaMap)) 500 { 501 List<Molecule> sortedKeys = new ArrayList<>(mCTerminalKaMap.keySet()); 502 Collections.sort(sortedKeys); 503 for (Molecule cTerminalGroup : sortedKeys) 504 { 505 IonizableGroup ionizableGroup = mCTerminalKaMap.get(cTerminalGroup); 506 507 XMLTag cTerm_pKaTag = new XMLTag(HfgBioXML.C_TERM_PKA_TAG); 508 if (cTerminalGroup instanceof AminoAcid) 509 { 510 cTerm_pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid)cTerminalGroup).getThreeLetterCode()); 511 } 512 else 513 { 514 cTerm_pKaTag.setAttribute(HfgBioXML.C_TERM_ATT, cTerminalGroup.name()); 515 } 516 517 cTerm_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa()); 518 cTerm_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm()); 519 node.addSubtag(cTerm_pKaTag); 520 } 521 } 522 523 if (CollectionUtil.hasValues(mCTerminalSidechainKaMap)) 524 { 525 List<AminoAcid> sortedKeys = new ArrayList<>(mCTerminalSidechainKaMap.keySet()); 526 Collections.sort(sortedKeys); 527 for (AminoAcid cTerminalAA : sortedKeys) 528 { 529 IonizableGroup ionizableGroup = mCTerminalSidechainKaMap.get(cTerminalAA); 530 531 XMLTag cTermAA_pKaTag = new XMLTag(HfgBioXML.C_TERM_AA_PKA_TAG); 532 cTermAA_pKaTag.setAttribute(HfgBioXML.AA_ATT, cTerminalAA.getThreeLetterCode()); 533 cTermAA_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa()); 534 cTermAA_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm()); 535 node.addSubtag(cTermAA_pKaTag); 536 } 537 } 538 539 return node; 540 } 541 542 //-------------------------------------------------------------------------- 543 @Override 544 public String toString() 545 { 546 return name(); 547 } 548 549 //-------------------------------------------------------------------------- 550 @Override 551 public boolean equals(Object inObj2) 552 { 553 boolean result = false; 554 555 if (inObj2 != null 556 && inObj2 instanceof KaSet) 557 { 558 result = (0 == compareTo((KaSet) inObj2)); 559 } 560 561 return result; 562 } 563 564 //-------------------------------------------------------------------------- 565 @Override 566 public int compareTo(KaSet inObj2) 567 { 568 int result = -1; 569 570 if (inObj2 != null) 571 { 572 result = 0; 573 574 if (this != inObj2) 575 { 576 result = CompareUtil.compare(mKaMap.size(), inObj2.mKaMap.size()); 577 578 if (0 == result) 579 { 580 result = CompareUtil.compare(mNTerminalKaMap.size(), inObj2.mNTerminalKaMap.size()); 581 } 582 583 if (0 == result) 584 { 585 result = CompareUtil.compare(mCTerminalKaMap.size(), inObj2.mCTerminalKaMap.size()); 586 } 587 588 if (0 == result) 589 { 590 result = CompareUtil.compare(mCTerminalSidechainKaMap.size(), inObj2.mCTerminalSidechainKaMap.size()); 591 } 592 593 if (0 == result) 594 { 595 for (Molecule molecule : mKaMap.keySet()) 596 { 597 result = CompareUtil.compare(mKaMap.get(molecule), inObj2.mKaMap.get(molecule)); 598 if (result != 0) 599 { 600 break; 601 } 602 } 603 } 604 605 if (0 == result) 606 { 607 for (Molecule molecule : mNTerminalKaMap.keySet()) 608 { 609 result = CompareUtil.compare(mNTerminalKaMap.get(molecule), inObj2.mNTerminalKaMap.get(molecule)); 610 if (result != 0) 611 { 612 break; 613 } 614 } 615 } 616 617 if (0 == result) 618 { 619 for (Molecule molecule : mCTerminalKaMap.keySet()) 620 { 621 result = CompareUtil.compare(mCTerminalKaMap.get(molecule), inObj2.mCTerminalKaMap.get(molecule)); 622 if (result != 0) 623 { 624 break; 625 } 626 } 627 } 628 629 if (0 == result) 630 { 631 for (AminoAcid aa : mCTerminalSidechainKaMap.keySet()) 632 { 633 result = CompareUtil.compare(mCTerminalSidechainKaMap.get(aa), inObj2.mCTerminalSidechainKaMap.get(aa)); 634 if (result != 0) 635 { 636 break; 637 } 638 } 639 } 640 } 641 } 642 643 return result; 644 } 645 646 //-------------------------------------------------------------------------- 647 @Override 648 public KaSet clone() 649 { 650 KaSet newObj; 651 try 652 { 653 newObj = (KaSet) super.clone(); 654 } 655 catch (CloneNotSupportedException e) 656 { 657 throw new RuntimeException(e); 658 } 659 660 if (mNTerminalKaMap != null) 661 { 662 newObj.mNTerminalKaMap = new HashMap<>(mNTerminalKaMap); 663 } 664 665 if (mKaMap != null) 666 { 667 newObj.mKaMap = new HashMap<>(mKaMap); 668 } 669 670 if (mCTerminalKaMap != null) 671 { 672 newObj.mCTerminalKaMap = new HashMap<>(mCTerminalKaMap); 673 } 674 675 if (mCTerminalSidechainKaMap != null) 676 { 677 newObj.mCTerminalSidechainKaMap = new HashMap<>(mCTerminalSidechainKaMap); 678 } 679 680 newObj.mLocked = false; 681 682 return newObj; 683 } 684 685 //-------------------------------------------------------------------------- 686 /** 687 * Adds a pKa value for the specified amino acid to the set. 688 * Ka = 10^(-pKa) 689 * @param inResidue the amino acid that the pKa value should apply to 690 * @param inpKa the pKa value to associate with the specified amino acid 691 * @param inProtonatedForm the charge of the group's protonated form 692 * @return this KaSet to enable method chaining 693 */ 694 public KaSet addpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm) 695 { 696 return addpKa(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm)); 697 } 698 699 //-------------------------------------------------------------------------- 700 /** 701 * Adds a pKa value for the specified molecule to the set. This form of addpKa() 702 * can be used to to assign pKa values to monosaccharide groups in a glycan. 703 * Ka = 10^(-pKa) 704 * @param inMolecule the amino acid that the pKa value should apply to 705 * @param inIonizableGroup the ionizable group to associate with the given molecule 706 * @return this KaSet to enable method chaining 707 */ 708 public KaSet addpKa(Molecule inMolecule, IonizableGroup inIonizableGroup) 709 { 710 if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!"); 711 712 if (inIonizableGroup.getpKa() < 0 || inIonizableGroup.getpKa() > 14) 713 { 714 throw new InvalidValueException(StringUtil.singleQuote(inIonizableGroup.getpKa() + "") + " is not a valid value. The pKa must be between 0 and 14!"); 715 } 716 717 List<IonizableGroup> ionizableGroups = mKaMap.get(inMolecule); 718 if (null == ionizableGroups) 719 { 720 ionizableGroups = new ArrayList<>(3); 721 mKaMap.put(inMolecule, ionizableGroups); 722 } 723 724 ionizableGroups.add(inIonizableGroup); 725 726 return this; 727 } 728 729 //-------------------------------------------------------------------------- 730 public List<IonizableGroup> getIonizableGroups(Molecule inResidue) 731 { 732 return mKaMap.get(inResidue); 733 } 734 735 736 //-------------------------------------------------------------------------- 737 public IonizableGroup getNTerminalKa(NTerminalGroup inNTerminalGroup, AminoAcid inNTerminalResidue) 738 { 739 IonizableGroup value = null; 740 741 if (inNTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS)) 742 { 743 value = mNTerminalKaMap.get(inNTerminalResidue); 744 } 745 746 if (null == value) 747 { 748 value = mNTerminalKaMap.get(inNTerminalGroup); 749 } 750 751 return value; 752 } 753 754 //-------------------------------------------------------------------------- 755 public KaSet setNTerminalpKa(NTerminalGroup inNTerminalGroup, float inpKa, Charge inProtonatedForm) 756 { 757 if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!"); 758 759 if (inpKa < 0 || inpKa > 14) 760 { 761 throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!"); 762 } 763 764 mNTerminalKaMap.put(inNTerminalGroup, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm)); 765 766 return this; 767 } 768 769 //-------------------------------------------------------------------------- 770 public KaSet setNTerminalpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm) 771 { 772 if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!"); 773 774 if (inpKa < 0 || inpKa > 14) 775 { 776 throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!"); 777 } 778 779 mNTerminalKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm)); 780 781 return this; 782 } 783 784 785 786 787 //-------------------------------------------------------------------------- 788 public IonizableGroup getCTerminalKa(CTerminalGroup inCTerminalGroup, AminoAcid inCTerminalResidue) 789 { 790 IonizableGroup value = null; 791 792 if (inCTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) 793 { 794 value = mCTerminalKaMap.get(inCTerminalResidue); 795 } 796 797 if (null == value) 798 { 799 value = mCTerminalKaMap.get(inCTerminalGroup); 800 } 801 802 return value; 803 } 804 805 //-------------------------------------------------------------------------- 806 public KaSet setCTerminalpKa(CTerminalGroup inCTerminalGroup, float inpKa, Charge inProtonatedForm) 807 { 808 if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!"); 809 810 if (inpKa < 0 || inpKa > 14) 811 { 812 throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!"); 813 } 814 815 mCTerminalKaMap.put(inCTerminalGroup, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm)); 816 817 return this; 818 } 819 820 //-------------------------------------------------------------------------- 821 public KaSet setCTerminalpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm) 822 { 823 if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!"); 824 825 if (inpKa < 0 || inpKa > 14) 826 { 827 throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!"); 828 } 829 830 mCTerminalKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm)); 831 832 return this; 833 } 834 835 //-------------------------------------------------------------------------- 836 public KaSet setCTerminalSidechainpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm) 837 { 838 if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!"); 839 840 if (inpKa < 0 || inpKa > 14) 841 { 842 throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!"); 843 } 844 845 mCTerminalSidechainKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm)); 846 847 return this; 848 } 849 850 //-------------------------------------------------------------------------- 851 public IonizableGroup getCTerminalSidechainKa(AminoAcid inResidue) 852 { 853 return mCTerminalSidechainKaMap.get(inResidue); 854 } 855 856 857}