001package com.hfg.bio.proteinproperty;
002
003
004import java.util.Collection;
005import java.util.HashMap;
006import java.util.Map;
007
008import com.hfg.bio.AminoAcid;
009import com.hfg.bio.seq.AminoAcidComposition;
010import com.hfg.bio.seq.Protein;
011import com.hfg.exception.ProgrammingException;
012import com.hfg.util.StringUtil;
013import com.hfg.util.collection.OrderedMap;
014
015//------------------------------------------------------------------------------
016/**
017 Simple container for sequence mapping stats.
018 <div>
019  See http://en.wikipedia.org/wiki/Hydrophobicity_scales
020 </div>
021 <div>
022  See http://web.expasy.org/protscale/
023 </div>
024 <div>
025  @author J. Alex Taylor, hairyfatguy.com
026 </div>
027 */
028//------------------------------------------------------------------------------
029// com.hfg XML/HTML Coding Library
030//
031// This library is free software; you can redistribute it and/or
032// modify it under the terms of the GNU Lesser General Public
033// License as published by the Free Software Foundation; either
034// version 2.1 of the License, or (at your option) any later version.
035//
036// This library is distributed in the hope that it will be useful,
037// but WITHOUT ANY WARRANTY; without even the implied warranty of
038// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
039// Lesser General Public License for more details.
040//
041// You should have received a copy of the GNU Lesser General Public
042// License along with this library; if not, write to the Free Software
043// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
044//
045// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
046// jataylor@hairyfatguy.com
047//------------------------------------------------------------------------------
048
049// TODO: Move data to gzipped XML resource files?
050
051public class Hydrophobicity extends SimpleProteinProperty<SimpleProteinPropertyCalcSettings, Float> implements WindowBasedProteinProperty
052{
053   private Map<AminoAcid, Float> mAAMap;
054
055   private static Map<String, Hydrophobicity> sUniqueMap = new OrderedMap<>();
056
057   private static String KYTE_DOOLITTLE = "Kyte Doolittle";
058   private static String BULL_BREESE    = "Bull & Breese";
059   private static String EISENBERG      = "Eisenberg";
060   private static String ENGLEMAN       = "Engleman";
061   private static String RAGONE         = "Ragone";
062   private static String HOPP_WOODS     = "Hopp Woods";
063   private static String CHOTHIA        = "Chothia";
064
065   public static final Hydrophobicity KyteDoolittle = new Hydrophobicity(KYTE_DOOLITTLE,
066         "Hydropathicity",
067         "Kyte J, Doolittle RF (May 1982). \"A simple method for displaying the hydropathic character of a protein\". J. Mol. Biol. 157 (1): 105-132");
068
069   public static final Hydrophobicity BullBreese     = new Hydrophobicity(BULL_BREESE,
070         "Hydrophobicity (free energy of transfer to surface in kcal/mole)",
071         "Bull H.B., Breese K (1974). Arch. Biochem. Biophys. 161: 665-670.");
072
073   public static final Hydrophobicity Eisenberg     = new Hydrophobicity(EISENBERG,
074         "Normalized consensus hydrophobicity scale",
075         "Eisenberg D (July 1984). \"Three-dimensional structure of membrane and surface proteins\". Ann. Rev. Biochem. 53: 595-623.");
076
077   public static final Hydrophobicity Engleman      = new Hydrophobicity(ENGLEMAN,
078         "Identifying nonpolar transbilayer helices",
079         "Engleman DM, Steitz TA, Goldman A (1986). \"Identifying nonpolar transbilayer helices in amino acid sequences of menbrane proteins\". Ann. Rev. Biophys. Chem. 15: 321-353");
080
081   public static final Hydrophobicity Ragone        = new Hydrophobicity(RAGONE,
082         "Flexibility",
083         "Ragone R, Facchiano F, Facchiano A, Facchiano AM, Colonna G (1989). \"Flexibility plot of proteins\". Prot. Engineering 2 (7): 497-504");
084
085   public static final Hydrophobicity HoppWoods     = new Hydrophobicity(HOPP_WOODS,
086         "Hydrophilicity",
087         "Hopp TP, Woods KR (1981). Proc. Natl. Acad. Sci. U.S.A. 78: 3824-3828.");
088
089   public static final Hydrophobicity Chothia       = new Hydrophobicity(CHOTHIA,
090         "Proportion of residues 95% buried (in 12 proteins).",
091         "Chothia C (1976). \"The nature of the accessible and buried surfaces in proteins\". J. Mol. Biol. 105: 1-14");
092
093
094   //###########################################################################
095   // CONSTRUCTORS
096   //###########################################################################
097
098   //---------------------------------------------------------------------------
099   protected Hydrophobicity(String inName, String inDescription, String inReference)
100   {
101      super(inName);
102      setDescription(inDescription);
103      setReference(inReference);
104
105      sUniqueMap.put(inName, this);
106   }
107
108   //###########################################################################
109   // PUBLIC METHODS
110   //###########################################################################
111
112   //---------------------------------------------------------------------------
113   public static Collection<Hydrophobicity> values()
114   {
115      return sUniqueMap != null ? sUniqueMap.values() : null;
116   }
117
118   //--------------------------------------------------------------------------
119   public String getType()
120   {
121      return "Hydrophobicity";
122   }
123
124   //---------------------------------------------------------------------------
125   public Float calculate(Protein inProtein)
126   {
127      return calculate(inProtein, null);
128   }
129
130   //---------------------------------------------------------------------------
131   public Float calculate(Protein inProtein, SimpleProteinPropertyCalcSettings inSettings)
132   {
133      Map<AminoAcid, Float> aaMap = getMap();
134
135      float total = 0.0f;
136      int   totalNumAAs = 0;
137      AminoAcidComposition aaComposition = inProtein.getAminoAcidComposition();
138      for (AminoAcid aa : aaComposition.keySet())
139      {
140         Float value = aaMap.get(aa);
141         if (value != null)
142         {
143            int aaCount = aaComposition.get(aa);
144            totalNumAAs += aaCount;
145            total += (value * aaCount);
146         }
147      }
148      // Avoid divide by zero
149      float value = (float) (totalNumAAs > 0 ? total / (float) totalNumAAs : 0.0);
150
151      // Round to three digits
152      value = (float) (Math.round(value * 1000)/1000.0);
153
154      return value;
155   }
156
157   //---------------------------------------------------------------------------
158   private Map<AminoAcid, Float> getMap()
159   {
160      if (null == mAAMap)
161      {
162         mAAMap = new HashMap<AminoAcid, Float>(26);
163
164         if (name().equalsIgnoreCase(KYTE_DOOLITTLE))
165         {
166            mAAMap.put(AminoAcid.ALANINE,        1.8f);
167            mAAMap.put(AminoAcid.CYSTEINE,       2.5f);
168            mAAMap.put(AminoAcid.ASPARTIC_ACID, -3.5f);
169            mAAMap.put(AminoAcid.GLUTAMIC_ACID, -3.5f);
170            mAAMap.put(AminoAcid.PHENYLALANINE,  2.7f);
171            mAAMap.put(AminoAcid.GLYCINE,       -0.4f);
172            mAAMap.put(AminoAcid.HISTIDINE,     -3.2f);
173            mAAMap.put(AminoAcid.ISOLEUCINE,     4.5f);
174            mAAMap.put(AminoAcid.LYSINE,        -3.9f);
175            mAAMap.put(AminoAcid.LEUCINE,        3.7f);
176            mAAMap.put(AminoAcid.METHIONINE,     1.9f);
177            mAAMap.put(AminoAcid.ASPARAGINE,    -3.5f);
178            mAAMap.put(AminoAcid.PROLINE,       -1.6f);
179            mAAMap.put(AminoAcid.GLUTAMINE,     -3.5f);
180            mAAMap.put(AminoAcid.ARGININE,      -4.5f);
181            mAAMap.put(AminoAcid.SERINE,        -0.9f);
182            mAAMap.put(AminoAcid.THREONIE,      -0.7f);
183            mAAMap.put(AminoAcid.VALINE,         4.2f);
184            mAAMap.put(AminoAcid.TRYPTOPHAN,    -0.9f);
185            mAAMap.put(AminoAcid.TYROSINE,      -1.3f);
186         }
187         else if (name().equalsIgnoreCase(CHOTHIA))
188         {
189            mAAMap.put(AminoAcid.ALANINE,        0.38f);
190            mAAMap.put(AminoAcid.CYSTEINE,       0.50f);
191            mAAMap.put(AminoAcid.ASPARTIC_ACID,  0.15f);
192            mAAMap.put(AminoAcid.GLUTAMIC_ACID,  0.18f);
193            mAAMap.put(AminoAcid.PHENYLALANINE,  0.50f);
194            mAAMap.put(AminoAcid.GLYCINE,        0.36f);
195            mAAMap.put(AminoAcid.HISTIDINE,      0.17f);
196            mAAMap.put(AminoAcid.ISOLEUCINE,     0.60f);
197            mAAMap.put(AminoAcid.LYSINE,         0.03f);
198            mAAMap.put(AminoAcid.LEUCINE,        0.45f);
199            mAAMap.put(AminoAcid.METHIONINE,     0.40f);
200            mAAMap.put(AminoAcid.ASPARAGINE,     0.12f);
201            mAAMap.put(AminoAcid.PROLINE,        0.18f);
202            mAAMap.put(AminoAcid.GLUTAMINE,      0.07f);
203            mAAMap.put(AminoAcid.ARGININE,       0.01f);
204            mAAMap.put(AminoAcid.SERINE,         0.22f);
205            mAAMap.put(AminoAcid.THREONIE,       0.23f);
206            mAAMap.put(AminoAcid.VALINE,         0.54f);
207            mAAMap.put(AminoAcid.TRYPTOPHAN,     0.27f);
208            mAAMap.put(AminoAcid.TYROSINE,       0.15f);
209         }
210         else if (name().equalsIgnoreCase(ENGLEMAN))
211         {
212            mAAMap.put(AminoAcid.ALANINE,        1.6f);
213            mAAMap.put(AminoAcid.CYSTEINE,       2.0f);
214            mAAMap.put(AminoAcid.ASPARTIC_ACID, -9.2f);
215            mAAMap.put(AminoAcid.GLUTAMIC_ACID, -8.2f);
216            mAAMap.put(AminoAcid.PHENYLALANINE,  3.7f);
217            mAAMap.put(AminoAcid.GLYCINE,        1.0f);
218            mAAMap.put(AminoAcid.HISTIDINE,     -3.0f);
219            mAAMap.put(AminoAcid.ISOLEUCINE,     3.1f);
220            mAAMap.put(AminoAcid.LYSINE,        -8.8f);
221            mAAMap.put(AminoAcid.LEUCINE,        2.8f);
222            mAAMap.put(AminoAcid.METHIONINE,     3.4f);
223            mAAMap.put(AminoAcid.ASPARAGINE,    -4.8f);
224            mAAMap.put(AminoAcid.PROLINE,       -0.2f);
225            mAAMap.put(AminoAcid.GLUTAMINE,     -4.1f);
226            mAAMap.put(AminoAcid.ARGININE,      12.3f);
227            mAAMap.put(AminoAcid.SERINE,         0.6f);
228            mAAMap.put(AminoAcid.THREONIE,       1.2f);
229            mAAMap.put(AminoAcid.VALINE,         2.6f);
230            mAAMap.put(AminoAcid.TRYPTOPHAN,     1.9f);
231            mAAMap.put(AminoAcid.TYROSINE,      -0.7f);
232         }
233         else if (name().equalsIgnoreCase(EISENBERG))
234         {
235            mAAMap.put(AminoAcid.ALANINE,        0.62f);
236            mAAMap.put(AminoAcid.CYSTEINE,       0.29f);
237            mAAMap.put(AminoAcid.ASPARTIC_ACID, -0.90f);
238            mAAMap.put(AminoAcid.GLUTAMIC_ACID, -0.74f);
239            mAAMap.put(AminoAcid.PHENYLALANINE,  1.19f);
240            mAAMap.put(AminoAcid.GLYCINE,        0.48f);
241            mAAMap.put(AminoAcid.HISTIDINE,     -0.40f);
242            mAAMap.put(AminoAcid.ISOLEUCINE,     1.38f);
243            mAAMap.put(AminoAcid.LYSINE,        -1.50f);
244            mAAMap.put(AminoAcid.LEUCINE,        1.06f);
245            mAAMap.put(AminoAcid.METHIONINE,     0.64f);
246            mAAMap.put(AminoAcid.ASPARAGINE,    -0.78f);
247            mAAMap.put(AminoAcid.PROLINE,        0.12f);
248            mAAMap.put(AminoAcid.GLUTAMINE,     -0.85f);
249            mAAMap.put(AminoAcid.ARGININE,      -2.53f);
250            mAAMap.put(AminoAcid.SERINE,        -0.18f);
251            mAAMap.put(AminoAcid.THREONIE,      -0.05f);
252            mAAMap.put(AminoAcid.VALINE,         1.08f);
253            mAAMap.put(AminoAcid.TRYPTOPHAN,     0.81f);
254            mAAMap.put(AminoAcid.TYROSINE,       0.26f);
255         }
256         else if (name().equalsIgnoreCase(RAGONE))
257         {
258            mAAMap.put(AminoAcid.ALANINE,       -0.91f);
259            mAAMap.put(AminoAcid.CYSTEINE,      -0.17f);
260            mAAMap.put(AminoAcid.ASPARTIC_ACID, -0.68f);
261            mAAMap.put(AminoAcid.GLUTAMIC_ACID, -0.68f);
262            mAAMap.put(AminoAcid.PHENYLALANINE,  1.37f);
263            mAAMap.put(AminoAcid.GLYCINE,       -1.40f);
264            mAAMap.put(AminoAcid.HISTIDINE,      0.25f);
265            mAAMap.put(AminoAcid.ISOLEUCINE,     1.09f);
266            mAAMap.put(AminoAcid.LYSINE,        -0.13f);
267            mAAMap.put(AminoAcid.LEUCINE,        0.89f);
268            mAAMap.put(AminoAcid.METHIONINE,     0.83f);
269            mAAMap.put(AminoAcid.ASPARAGINE,    -0.42f);
270            mAAMap.put(AminoAcid.PROLINE,       -0.52f);
271            mAAMap.put(AminoAcid.GLUTAMINE,      0.06f);
272            mAAMap.put(AminoAcid.ARGININE,       0.71f);
273            mAAMap.put(AminoAcid.SERINE,        -1.01f);
274            mAAMap.put(AminoAcid.THREONIE,      -0.58f);
275            mAAMap.put(AminoAcid.VALINE,         0.52f);
276            mAAMap.put(AminoAcid.TRYPTOPHAN,     2.00f);
277            mAAMap.put(AminoAcid.TYROSINE,       1.21f);
278         }
279         else if (name().equalsIgnoreCase(HOPP_WOODS))
280         {
281            mAAMap.put(AminoAcid.ALANINE,        0.5f);
282            mAAMap.put(AminoAcid.CYSTEINE,       1.0f);
283            mAAMap.put(AminoAcid.ASPARTIC_ACID, -3.0f);
284            mAAMap.put(AminoAcid.GLUTAMIC_ACID, -3.0f);
285            mAAMap.put(AminoAcid.PHENYLALANINE,  2.5f);
286            mAAMap.put(AminoAcid.GLYCINE,        0.0f);
287            mAAMap.put(AminoAcid.HISTIDINE,      0.5f);
288            mAAMap.put(AminoAcid.ISOLEUCINE,     1.8f);
289            mAAMap.put(AminoAcid.LYSINE,        -3.0f);
290            mAAMap.put(AminoAcid.LEUCINE,        1.8f);
291            mAAMap.put(AminoAcid.METHIONINE,     1.3f);
292            mAAMap.put(AminoAcid.ASPARAGINE,    -0.2f);
293            mAAMap.put(AminoAcid.PROLINE,        0.0f);
294            mAAMap.put(AminoAcid.GLUTAMINE,     -0.2f);
295            mAAMap.put(AminoAcid.ARGININE,      -3.0f);
296            mAAMap.put(AminoAcid.SERINE,        -0.3f);
297            mAAMap.put(AminoAcid.THREONIE,      -0.4f);
298            mAAMap.put(AminoAcid.VALINE,         1.5f);
299            mAAMap.put(AminoAcid.TRYPTOPHAN,     3.4f);
300            mAAMap.put(AminoAcid.TYROSINE,       2.3f);
301         }
302         else if (name().equalsIgnoreCase(BULL_BREESE))
303         {
304            mAAMap.put(AminoAcid.ALANINE,        0.61f);
305            mAAMap.put(AminoAcid.CYSTEINE,       0.36f);
306            mAAMap.put(AminoAcid.ASPARTIC_ACID,  0.61f);
307            mAAMap.put(AminoAcid.GLUTAMIC_ACID,  0.51f);
308            mAAMap.put(AminoAcid.PHENYLALANINE, -1.52f);
309            mAAMap.put(AminoAcid.GLYCINE,        0.81f);
310            mAAMap.put(AminoAcid.HISTIDINE,      0.69f);
311            mAAMap.put(AminoAcid.ISOLEUCINE,    -1.45f);
312            mAAMap.put(AminoAcid.LYSINE,         0.46f);
313            mAAMap.put(AminoAcid.LEUCINE,       -1.65f);
314            mAAMap.put(AminoAcid.METHIONINE,    -0.66f);
315            mAAMap.put(AminoAcid.ASPARAGINE,     0.89f);
316            mAAMap.put(AminoAcid.PROLINE,       -0.17f);
317            mAAMap.put(AminoAcid.GLUTAMINE,      0.97f);
318            mAAMap.put(AminoAcid.ARGININE,       0.69f);
319            mAAMap.put(AminoAcid.SERINE,         0.42f);
320            mAAMap.put(AminoAcid.THREONIE,       0.29f);
321            mAAMap.put(AminoAcid.VALINE,        -0.75f);
322            mAAMap.put(AminoAcid.TRYPTOPHAN,    -1.20f);
323            mAAMap.put(AminoAcid.TYROSINE,      -1.43f);
324         }
325         else
326         {
327            throw new ProgrammingException("Residue values have not been specified for " + StringUtil.singleQuote(name()) + "!");
328         }
329      }
330
331      return mAAMap;
332   }
333}