001package com.hfg.bio.proteinproperty; 002 003 004import java.util.Collection; 005import java.util.HashMap; 006import java.util.List; 007import java.util.Map; 008 009import com.hfg.bio.*; 010import com.hfg.bio.glyco.Glycan; 011import com.hfg.bio.seq.AminoAcidComposition; 012import com.hfg.bio.seq.Protein; 013import com.hfg.chem.IonizableGroup; 014import com.hfg.util.collection.CollectionUtil; 015import com.hfg.util.collection.OrderedMap; 016 017//------------------------------------------------------------------------------ 018/** 019 Isoelectric point calculation packaged as a protein property for ease of integration 020 with other protein properties. 021 <div> 022 @author J. Alex Taylor, hairyfatguy.com 023 </div> 024 */ 025//------------------------------------------------------------------------------ 026// com.hfg Library 027// 028// This library is free software; you can redistribute it and/or 029// modify it under the terms of the GNU Lesser General Public 030// License as published by the Free Software Foundation; either 031// version 2.1 of the License, or (at your option) any later version. 032// 033// This library is distributed in the hope that it will be useful, 034// but WITHOUT ANY WARRANTY; without even the implied warranty of 035// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 036// Lesser General Public License for more details. 037// 038// You should have received a copy of the GNU Lesser General Public 039// License along with this library; if not, write to the Free Software 040// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 041// 042// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 043// jataylor@hairyfatguy.com 044//------------------------------------------------------------------------------ 045 046public class IsoelectricPoint<S extends SimpleProteinPropertyCalcSettings> extends SimpleProteinProperty<S, Float> 047{ 048 private KaSet mKaSet; 049 050 private static Map<String, IsoelectricPoint> sUniqueMap = new OrderedMap<>(); 051 052 public static final IsoelectricPoint BJELLQVIST = new IsoelectricPoint(KaSet.BJELLQVIST, 053 "Bjellqvist B, Hughes GJ, Pasquali C, et al. (1993). \"The focusing positions of polypeptides in immobilized pH gradients can be predicted from their amino acid sequences\". Electrophoresis. 14(10):1023-31."); 054 055 public static final IsoelectricPoint EXPASY = new IsoelectricPoint(KaSet.EXPASY); 056 057 public static final IsoelectricPoint EMBOSS = new IsoelectricPoint(KaSet.EMBOSS); 058 059 public static final IsoelectricPoint SILLERO = new IsoelectricPoint(KaSet.SILLERO, 060 "Sillero A, Ribeiro JM (1989). \"Isoelectric points of proteins: theoretical determination\". Analytical biochemistry. 179(2):319-325."); 061 062 public static final IsoelectricPoint SILLERO_ABRIDGED = new IsoelectricPoint(KaSet.SILLERO_ABRIDGED, 063 "Sillero A, Ribeiro JM (1989). \"Isoelectric points of proteins: theoretical determination\". Analytical biochemistry. 179(2):319-325."); 064 065 public static final IsoelectricPoint PATRICKIOS_SIMPLE = new IsoelectricPoint(KaSet.PATRICKIOS_SIMPLE, 066 "Patrickios CS, Yamasaki EN (1995). \"Polypeptide amino acid composition and isoelectric point. II. Comparison between experiment and theory\". Analytical biochemistry. 231(1):82-91."); 067 068 public static final IsoelectricPoint STRYER_1995 = new IsoelectricPoint(KaSet.STRYER_1995, 069 "Stryer L (1995) \"Biochemistry\""); 070 071 public static final IsoelectricPoint GRIMSLEY = new IsoelectricPoint(KaSet.GRIMSLEY, 072 "Grimsley GR, Scholtz JM, Pace CN (2009). \"A summary of the measured pK values of the ionizable groups in folded proteins\". Protein Science. 18(1), 247-251."); 073 074 public static final IsoelectricPoint TAYLOR_NATIVE = new IsoelectricPoint(KaSet.TAYLOR_NATIVE); 075 076 //########################################################################### 077 // CONSTRUCTORS 078 //########################################################################### 079 080 //-------------------------------------------------------------------------- 081 public IsoelectricPoint(String inName, KaSet inKaSet) 082 { 083 super(inName); 084 mKaSet = inKaSet; 085 086 sUniqueMap.put(inName, this); 087 } 088 089 //-------------------------------------------------------------------------- 090 public IsoelectricPoint(KaSet inKaSet) 091 { 092 this(inKaSet.name(), inKaSet); 093 } 094 095 //-------------------------------------------------------------------------- 096 public IsoelectricPoint(KaSet inKaSet, String inReference) 097 { 098 this(inKaSet.name(), inKaSet); 099 setReference(inReference); 100 } 101 102 //########################################################################### 103 // PUBLIC METHODS 104 //########################################################################### 105 106 //--------------------------------------------------------------------------- 107 public static IsoelectricPoint valueOf(KaSet inKaSet) 108 { 109 IsoelectricPoint value = null; 110 111 for (IsoelectricPoint isoelectricPoint : sUniqueMap.values()) 112 { 113 if (isoelectricPoint.getKaSet().equals(inKaSet)) 114 { 115 value = isoelectricPoint; 116 break; 117 } 118 } 119 120 if (null == value) 121 { 122 value = new IsoelectricPoint(inKaSet); 123 } 124 125 return value; 126 } 127 128 //--------------------------------------------------------------------------- 129 public static Collection<IsoelectricPoint> values() 130 { 131 return sUniqueMap != null ? sUniqueMap.values() : null; 132 } 133 134 //-------------------------------------------------------------------------- 135 public String getType() 136 { 137 return "pI"; 138 } 139 140 //-------------------------------------------------------------------------- 141 /** 142 Determines the isoelectric point (the pH at which the net charge is zero) for the protein. 143 @param inProtein the protein on which the calculation should be performed 144 @param inSettings settings for the calculation 145 @return the calculated isoelectric point 146 */ 147 public Float calculate(Protein inProtein, S inSettings) 148 { 149 double incrementSize = 1.0; // The size of the pH increment to be taken. Starts @ 1 then decreases at ea. sign change. 150 double pH = 7.0; 151 152 Float isoelectricPoint = null; 153 154 Map<IonizableGroup, Integer> ionizableGroupMap = constructIonizableGroupMap(inProtein, inSettings); 155 if (CollectionUtil.hasValues(ionizableGroupMap)) 156 { 157 double netCharge = getNetCharge(pH, ionizableGroupMap); 158 159 double lastNetCharge = netCharge; 160 pH -= incrementSize; 161 162 163 while (true) 164 { 165 netCharge = getNetCharge(pH, ionizableGroupMap); 166 167 if (Math.abs(netCharge) < 0.0001 168 && incrementSize < 0.1) 169 { 170 break; 171 } 172 else if (netCharge > 0 && lastNetCharge > 0) 173 { 174 pH += incrementSize; 175 } 176 else if (netCharge < 0 && lastNetCharge < 0) 177 { 178 pH -= incrementSize; 179 } 180 else // The net charge must have changed signs 181 { 182 incrementSize = 0.1 * incrementSize; 183 if (netCharge > 0) pH += incrementSize; 184 else pH -= incrementSize; 185 } 186 187 lastNetCharge = netCharge; 188 } 189 190 isoelectricPoint = (float) (Math.round(pH * 100) / 100.0); 191 } 192 193 return isoelectricPoint; 194 } 195 196 //-------------------------------------------------------------------------- 197 /** 198 Estimates the protein's net charge at the specified pH. 199 @param pH the pH at which the calculation should be performed 200 @param inProtein the protein on which the calculation should be performed 201 @param inSettings settings for the calculation 202 @return the calculated net charge 203 */ 204 public double getNetCharge(double pH, Protein inProtein , S inSettings) 205 { 206 return getNetCharge(pH, constructIonizableGroupMap(inProtein, inSettings)); 207 } 208 209 //-------------------------------------------------------------------------- 210 protected void setKaSet(KaSet inValue) 211 { 212 mKaSet = inValue; 213 } 214 215 //-------------------------------------------------------------------------- 216 protected KaSet getKaSet() 217 { 218 return mKaSet; 219 } 220 221 //-------------------------------------------------------------------------- 222 protected Map<IonizableGroup, Integer> constructIonizableGroupMap(Protein inProtein, S inSettings) 223 { 224 Map<IonizableGroup, Integer> ionizableGroupMap = new HashMap<>(); 225 226 boolean nativeMode = (inSettings.getProteinAnalysisMode() instanceof NativeAnalysisMode); 227 228 if (CollectionUtil.hasValues(inProtein.getChains())) 229 { 230 for (Protein chain : inProtein.getChains()) 231 { // TODO: Force reducing? 232 Map<IonizableGroup, Integer> chainMap = constructIonizableGroupMap(chain, inSettings); 233 for (IonizableGroup group : chainMap.keySet()) 234 { 235 Integer oldValue = ionizableGroupMap.get(group); 236 int newValue = (oldValue != null ? oldValue : 0) + chainMap.get(group); 237 ionizableGroupMap.put(group, newValue); 238 } 239 } 240 241 if (nativeMode) 242 { 243 // Exclude disulfide-linked cysteines 244 List<IonizableGroup> cysGroups = mKaSet.getIonizableGroups(AminoAcid.CYSTEINE); 245 if (cysGroups != null) 246 { 247 ionizableGroupMap.put(cysGroups.get(0), inProtein.getTotalNumFreeCysteines()); 248 } 249 } 250 251 } 252 else if (inProtein.length() > 0) 253 { 254 AminoAcid cTerminalResidue = inProtein.aminoAcidAt(inProtein.length()); 255 AminoAcidComposition aaComposition = inProtein.getAminoAcidComposition(); 256 for (AminoAcid aa : aaComposition.keySet()) 257 { 258 Integer aaCount = aaComposition.get(aa); 259 if (aaCount != null && aaCount > 0) 260 { 261 if (aa == cTerminalResidue 262 && mKaSet.getCTerminalSidechainKa(cTerminalResidue) != null 263 && inProtein.getAminoAcidSet().getCTerminalGroup().equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) 264 { 265 IonizableGroup group = mKaSet.getCTerminalSidechainKa(cTerminalResidue); 266 if (group != null) 267 { 268 ionizableGroupMap.put(group, 1); 269 aaCount--; 270 } 271 } 272 273 List<IonizableGroup> groups = mKaSet.getIonizableGroups(aa); 274 if (groups != null) 275 { 276 if (aa.equals(AminoAcid.CYSTEINE)) 277 { 278 if (nativeMode) 279 { 280 // Exclude disulfide-linked cysteines 281 aaCount = inProtein.getTotalNumFreeCysteines(); 282 } 283 else 284 { 285 ReducedAnalysisMode analysisMode = (ReducedAnalysisMode) inSettings.getProteinAnalysisMode(); 286 if (analysisMode.getAlkylatedCysteine() != null) 287 { 288 List<IonizableGroup> alkCysGroups = mKaSet.getIonizableGroups(analysisMode.getAlkylatedCysteine()); 289 if (CollectionUtil.hasValues(alkCysGroups)) 290 { 291 for (IonizableGroup group : alkCysGroups) 292 { 293 ionizableGroupMap.put(group, aaCount); 294 } 295 } 296 297 aaCount = 0; 298 } 299 } 300 } 301 302 for (IonizableGroup group : groups) 303 { 304 Integer existingCount = ionizableGroupMap.get(group); 305 if (null == existingCount) 306 { 307 existingCount = 0; 308 } 309 310 ionizableGroupMap.put(group, aaCount + existingCount); 311 } 312 } 313 } 314 } 315 316 IonizableGroup group = mKaSet.getNTerminalKa(inProtein.getAminoAcidSet().getNTerminalGroup(), inProtein.aminoAcidAt(1)); 317 318 if (group != null) 319 { 320 Integer existingCount = ionizableGroupMap.get(group); 321 if (null == existingCount) 322 { 323 existingCount = 0; 324 } 325 ionizableGroupMap.put(group, 1 + existingCount); 326 } 327 328 group = mKaSet.getCTerminalKa(inProtein.getAminoAcidSet().getCTerminalGroup(), inProtein.aminoAcidAt(inProtein.length())); 329 if (group != null) 330 { 331 Integer existingCount = ionizableGroupMap.get(group); 332 if (null == existingCount) 333 { 334 existingCount = 0; 335 } 336 ionizableGroupMap.put(group, 1 + existingCount); 337 } 338 339 if (CollectionUtil.hasValues(inProtein.getGlycans())) 340 { 341 for (Glycan glycan : inProtein.getGlycans()) 342 { 343 List<IonizableGroup> ionizableGroups = mKaSet.getIonizableGroups(glycan); 344 if (null == ionizableGroups) 345 { 346 // Default Ka values? 347 ionizableGroups = glycan.getKas(); 348 } 349 350 if (CollectionUtil.hasValues(ionizableGroups)) 351 { 352 for (IonizableGroup ionizableGroup : ionizableGroups) 353 { 354 Integer existingCount = ionizableGroupMap.get(ionizableGroup); 355 if (null == existingCount) 356 { 357 existingCount = 0; 358 } 359 ionizableGroupMap.put(ionizableGroup, 1 + existingCount); 360 } 361 } 362 } 363 } 364 } 365 366 return ionizableGroupMap; 367 } 368 369 //-------------------------------------------------------------------------- 370 /** 371 Estimates the protein's net charge at the specified pH. 372 */ 373 private double getNetCharge(double pH, Map<IonizableGroup, Integer> inIonizableGroupMap) 374 { 375 double netCharge = 0; 376 377 double concOfHIions = Math.pow(10, -pH); 378 379 if (inIonizableGroupMap != null) 380 { 381 for (IonizableGroup group : inIonizableGroupMap.keySet()) 382 { 383 netCharge += group.getCharge(inIonizableGroupMap.get(group), concOfHIions); 384 } 385 } 386 387 return netCharge; 388 } 389}