001package com.hfg.bio.proteinproperty;
002
003
004import java.util.Collection;
005import java.util.HashMap;
006import java.util.List;
007import java.util.Map;
008
009import com.hfg.bio.*;
010import com.hfg.bio.glyco.Glycan;
011import com.hfg.bio.seq.AminoAcidComposition;
012import com.hfg.bio.seq.Protein;
013import com.hfg.chem.IonizableGroup;
014import com.hfg.util.collection.CollectionUtil;
015import com.hfg.util.collection.OrderedMap;
016
017//------------------------------------------------------------------------------
018/**
019 Isoelectric point calculation packaged as a protein property for ease of integration
020 with other protein properties.
021 <div>
022  @author J. Alex Taylor, hairyfatguy.com
023 </div>
024 */
025//------------------------------------------------------------------------------
026// com.hfg Library
027//
028// This library is free software; you can redistribute it and/or
029// modify it under the terms of the GNU Lesser General Public
030// License as published by the Free Software Foundation; either
031// version 2.1 of the License, or (at your option) any later version.
032//
033// This library is distributed in the hope that it will be useful,
034// but WITHOUT ANY WARRANTY; without even the implied warranty of
035// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
036// Lesser General Public License for more details.
037//
038// You should have received a copy of the GNU Lesser General Public
039// License along with this library; if not, write to the Free Software
040// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
041//
042// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
043// jataylor@hairyfatguy.com
044//------------------------------------------------------------------------------
045
046public class IsoelectricPoint<S extends SimpleProteinPropertyCalcSettings> extends SimpleProteinProperty<S, Float>
047{
048    private KaSet mKaSet;
049
050    private static Map<String, IsoelectricPoint> sUniqueMap = new OrderedMap<>();
051
052    public static final IsoelectricPoint BJELLQVIST       = new IsoelectricPoint(KaSet.BJELLQVIST,
053          "Bjellqvist B, Hughes GJ, Pasquali C, et al. (1993). \"The focusing positions of polypeptides in immobilized pH gradients can be predicted from their amino acid sequences\". Electrophoresis. 14(10):1023-31.");
054
055    public static final IsoelectricPoint EXPASY           = new IsoelectricPoint(KaSet.EXPASY);
056
057    public static final IsoelectricPoint EMBOSS           = new IsoelectricPoint(KaSet.EMBOSS);
058
059    public static final IsoelectricPoint SILLERO          = new IsoelectricPoint(KaSet.SILLERO,
060          "Sillero A, Ribeiro JM (1989). \"Isoelectric points of proteins:  theoretical determination\". Analytical biochemistry. 179(2):319-325.");
061
062    public static final IsoelectricPoint SILLERO_ABRIDGED = new IsoelectricPoint(KaSet.SILLERO_ABRIDGED,
063          "Sillero A, Ribeiro JM (1989). \"Isoelectric points of proteins:  theoretical determination\". Analytical biochemistry. 179(2):319-325.");
064
065    public static final IsoelectricPoint PATRICKIOS_SIMPLE = new IsoelectricPoint(KaSet.PATRICKIOS_SIMPLE,
066          "Patrickios CS, Yamasaki EN (1995). \"Polypeptide amino acid composition and isoelectric point. II. Comparison between experiment and theory\". Analytical biochemistry. 231(1):82-91.");
067
068    public static final IsoelectricPoint STRYER_1995      = new IsoelectricPoint(KaSet.STRYER_1995,
069          "Stryer L (1995) \"Biochemistry\"");
070
071    public static final IsoelectricPoint GRIMSLEY         = new IsoelectricPoint(KaSet.GRIMSLEY,
072          "Grimsley GR, Scholtz JM, Pace CN (2009). \"A summary of the measured pK values of the ionizable groups in folded proteins\". Protein Science. 18(1), 247-251.");
073
074    public static final IsoelectricPoint TAYLOR_NATIVE    = new IsoelectricPoint(KaSet.TAYLOR_NATIVE);
075
076   //###########################################################################
077   // CONSTRUCTORS
078   //###########################################################################
079
080    //--------------------------------------------------------------------------
081    public IsoelectricPoint(String inName, KaSet inKaSet)
082    {
083        super(inName);
084        mKaSet = inKaSet;
085
086        sUniqueMap.put(inName, this);
087    }
088
089    //--------------------------------------------------------------------------
090    public IsoelectricPoint(KaSet inKaSet)
091    {
092        this(inKaSet.name(), inKaSet);
093    }
094
095    //--------------------------------------------------------------------------
096    public IsoelectricPoint(KaSet inKaSet, String inReference)
097    {
098        this(inKaSet.name(), inKaSet);
099        setReference(inReference);
100    }
101
102   //###########################################################################
103   // PUBLIC METHODS
104   //###########################################################################
105
106    //---------------------------------------------------------------------------
107    public static IsoelectricPoint valueOf(KaSet inKaSet)
108    {
109        IsoelectricPoint value = null;
110
111        for (IsoelectricPoint isoelectricPoint : sUniqueMap.values())
112        {
113            if (isoelectricPoint.getKaSet().equals(inKaSet))
114            {
115                value = isoelectricPoint;
116                break;
117            }
118        }
119
120        if (null == value)
121        {
122            value = new IsoelectricPoint(inKaSet);
123        }
124
125        return value;
126    }
127
128    //---------------------------------------------------------------------------
129    public static Collection<IsoelectricPoint> values()
130    {
131        return sUniqueMap != null ? sUniqueMap.values() : null;
132    }
133
134    //--------------------------------------------------------------------------
135    public String getType()
136    {
137        return "pI";
138    }
139
140    //--------------------------------------------------------------------------
141    /**
142     Determines the isoelectric point (the pH at which the net charge is zero) for the protein.
143     @param inProtein the protein on which the calculation should be performed
144     @param inSettings settings for the calculation
145     @return the calculated isoelectric point
146     */
147    public Float calculate(Protein inProtein, S inSettings)
148    {
149        double incrementSize = 1.0;     // The size of the pH increment to be taken. Starts @ 1 then decreases at ea. sign change.
150        double pH = 7.0;
151
152        Float isoelectricPoint = null;
153
154        Map<IonizableGroup, Integer> ionizableGroupMap = constructIonizableGroupMap(inProtein, inSettings);
155        if (CollectionUtil.hasValues(ionizableGroupMap))
156        {
157            double netCharge = getNetCharge(pH, ionizableGroupMap);
158
159            double lastNetCharge = netCharge;
160            pH -= incrementSize;
161
162
163            while (true)
164            {
165                netCharge = getNetCharge(pH, ionizableGroupMap);
166
167                if (Math.abs(netCharge) < 0.0001
168                      && incrementSize < 0.1)
169                {
170                    break;
171                }
172                else if (netCharge > 0 && lastNetCharge > 0)
173                {
174                    pH += incrementSize;
175                }
176                else if (netCharge < 0 && lastNetCharge < 0)
177                {
178                    pH -= incrementSize;
179                }
180                else // The net charge must have changed signs
181                {
182                    incrementSize = 0.1 * incrementSize;
183                    if (netCharge > 0) pH += incrementSize;
184                    else pH -= incrementSize;
185                }
186
187                lastNetCharge = netCharge;
188            }
189
190            isoelectricPoint = (float) (Math.round(pH * 100) / 100.0);
191        }
192
193        return isoelectricPoint;
194    }
195
196    //--------------------------------------------------------------------------
197    /**
198     Estimates the protein's net charge at the specified pH.
199     @param pH the pH at which the calculation should be performed
200     @param inProtein the protein on which the calculation should be performed
201     @param inSettings settings for the calculation
202     @return the calculated net charge
203     */
204    public double getNetCharge(double pH, Protein inProtein , S inSettings)
205    {
206        return getNetCharge(pH, constructIonizableGroupMap(inProtein, inSettings));
207    }
208
209    //--------------------------------------------------------------------------
210    protected void setKaSet(KaSet inValue)
211    {
212        mKaSet = inValue;
213    }
214
215    //--------------------------------------------------------------------------
216    protected KaSet getKaSet()
217    {
218        return mKaSet;
219    }
220
221    //--------------------------------------------------------------------------
222    protected Map<IonizableGroup, Integer> constructIonizableGroupMap(Protein inProtein, S inSettings)
223    {
224        Map<IonizableGroup, Integer> ionizableGroupMap = new HashMap<>();
225
226        boolean nativeMode = (inSettings.getProteinAnalysisMode() instanceof NativeAnalysisMode);
227
228        if (CollectionUtil.hasValues(inProtein.getChains()))
229        {
230            for (Protein chain : inProtein.getChains())
231            {                                                                             // TODO: Force reducing?
232                Map<IonizableGroup, Integer> chainMap = constructIonizableGroupMap(chain, inSettings);
233                for (IonizableGroup group : chainMap.keySet())
234                {
235                    Integer oldValue = ionizableGroupMap.get(group);
236                    int newValue = (oldValue != null ? oldValue : 0) + chainMap.get(group);
237                    ionizableGroupMap.put(group, newValue);
238                }
239            }
240
241            if (nativeMode)
242            {
243                // Exclude disulfide-linked cysteines
244                List<IonizableGroup> cysGroups = mKaSet.getIonizableGroups(AminoAcid.CYSTEINE);
245                if (cysGroups != null)
246                {
247                    ionizableGroupMap.put(cysGroups.get(0), inProtein.getTotalNumFreeCysteines());
248                }
249            }
250
251        }
252        else if (inProtein.length() > 0)
253        {
254            AminoAcid cTerminalResidue = inProtein.aminoAcidAt(inProtein.length());
255            AminoAcidComposition aaComposition = inProtein.getAminoAcidComposition();
256            for (AminoAcid aa : aaComposition.keySet())
257            {
258                Integer aaCount = aaComposition.get(aa);
259                if (aaCount != null && aaCount > 0)
260                {
261                    if (aa == cTerminalResidue
262                            && mKaSet.getCTerminalSidechainKa(cTerminalResidue) != null
263                            && inProtein.getAminoAcidSet().getCTerminalGroup().equals(CTerminalGroup.UNMODIFIED_C_TERMINUS))
264                    {
265                        IonizableGroup group = mKaSet.getCTerminalSidechainKa(cTerminalResidue);
266                        if (group != null)
267                        {
268                            ionizableGroupMap.put(group, 1);
269                            aaCount--;
270                        }
271                    }
272
273                    List<IonizableGroup> groups = mKaSet.getIonizableGroups(aa);
274                    if (groups != null)
275                    {
276                        if (aa.equals(AminoAcid.CYSTEINE))
277                        {
278                            if (nativeMode)
279                            {
280                                // Exclude disulfide-linked cysteines
281                                aaCount = inProtein.getTotalNumFreeCysteines();
282                            }
283                            else
284                            {
285                                ReducedAnalysisMode analysisMode = (ReducedAnalysisMode) inSettings.getProteinAnalysisMode();
286                                if (analysisMode.getAlkylatedCysteine() != null)
287                                {
288                                    List<IonizableGroup> alkCysGroups = mKaSet.getIonizableGroups(analysisMode.getAlkylatedCysteine());
289                                    if (CollectionUtil.hasValues(alkCysGroups))
290                                    {
291                                        for (IonizableGroup group : alkCysGroups)
292                                        {
293                                            ionizableGroupMap.put(group, aaCount);
294                                        }
295                                    }
296
297                                    aaCount = 0;
298                                }
299                            }
300                        }
301
302                        for (IonizableGroup group : groups)
303                        {
304                            Integer existingCount = ionizableGroupMap.get(group);
305                            if (null == existingCount)
306                            {
307                                existingCount = 0;
308                            }
309
310                            ionizableGroupMap.put(group, aaCount + existingCount);
311                        }
312                    }
313                }
314            }
315
316            IonizableGroup group = mKaSet.getNTerminalKa(inProtein.getAminoAcidSet().getNTerminalGroup(), inProtein.aminoAcidAt(1));
317
318            if (group != null)
319            {
320                Integer existingCount = ionizableGroupMap.get(group);
321                if (null == existingCount)
322                {
323                    existingCount = 0;
324                }
325                ionizableGroupMap.put(group, 1 + existingCount);
326            }
327
328            group = mKaSet.getCTerminalKa(inProtein.getAminoAcidSet().getCTerminalGroup(), inProtein.aminoAcidAt(inProtein.length()));
329            if (group != null)
330            {
331                Integer existingCount = ionizableGroupMap.get(group);
332                if (null == existingCount)
333                {
334                    existingCount = 0;
335                }
336                ionizableGroupMap.put(group, 1 + existingCount);
337            }
338
339            if (CollectionUtil.hasValues(inProtein.getGlycans()))
340            {
341                for (Glycan glycan : inProtein.getGlycans())
342                {
343                    List<IonizableGroup> ionizableGroups = mKaSet.getIonizableGroups(glycan);
344                    if (null == ionizableGroups)
345                    {
346                        // Default Ka values?
347                        ionizableGroups = glycan.getKas();
348                    }
349
350                    if (CollectionUtil.hasValues(ionizableGroups))
351                    {
352                        for (IonizableGroup ionizableGroup : ionizableGroups)
353                        {
354                            Integer existingCount = ionizableGroupMap.get(ionizableGroup);
355                            if (null == existingCount)
356                            {
357                                existingCount = 0;
358                            }
359                            ionizableGroupMap.put(ionizableGroup, 1 + existingCount);
360                        }
361                    }
362                }
363            }
364        }
365
366        return ionizableGroupMap;
367    }
368
369    //--------------------------------------------------------------------------
370    /**
371     Estimates the protein's net charge at the specified pH.
372     */
373    private double getNetCharge(double pH, Map<IonizableGroup, Integer> inIonizableGroupMap)
374    {
375        double netCharge = 0;
376
377        double concOfHIions = Math.pow(10, -pH);
378
379        if (inIonizableGroupMap != null)
380        {
381            for (IonizableGroup group : inIonizableGroupMap.keySet())
382            {
383                netCharge += group.getCharge(inIonizableGroupMap.get(group), concOfHIions);
384            }
385        }
386
387        return netCharge;
388    }
389}