001package com.hfg.bio.proteinproperty; 002 003 004import java.util.Collection; 005import java.util.HashMap; 006import java.util.Map; 007 008import com.hfg.bio.AminoAcid; 009import com.hfg.bio.seq.AminoAcidComposition; 010import com.hfg.bio.seq.Protein; 011import com.hfg.exception.ProgrammingException; 012import com.hfg.util.StringUtil; 013import com.hfg.util.collection.OrderedMap; 014 015//------------------------------------------------------------------------------ 016/** 017 Simple container for sequence mapping stats. 018 <div> 019 See http://en.wikipedia.org/wiki/Hydrophobicity_scales 020 </div> 021 <div> 022 See http://web.expasy.org/protscale/ 023 </div> 024 <div> 025 @author J. Alex Taylor, hairyfatguy.com 026 </div> 027 */ 028//------------------------------------------------------------------------------ 029// com.hfg XML/HTML Coding Library 030// 031// This library is free software; you can redistribute it and/or 032// modify it under the terms of the GNU Lesser General Public 033// License as published by the Free Software Foundation; either 034// version 2.1 of the License, or (at your option) any later version. 035// 036// This library is distributed in the hope that it will be useful, 037// but WITHOUT ANY WARRANTY; without even the implied warranty of 038// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 039// Lesser General Public License for more details. 040// 041// You should have received a copy of the GNU Lesser General Public 042// License along with this library; if not, write to the Free Software 043// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 044// 045// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 046// jataylor@hairyfatguy.com 047//------------------------------------------------------------------------------ 048 049// TODO: Move data to gzipped XML resource files? 050 051public class Hydrophobicity extends SimpleProteinProperty<SimpleProteinPropertyCalcSettings, Float> implements WindowBasedProteinProperty 052{ 053 private Map<AminoAcid, Float> mAAMap; 054 055 private static Map<String, Hydrophobicity> sUniqueMap = new OrderedMap<>(); 056 057 private static String KYTE_DOOLITTLE = "Kyte Doolittle"; 058 private static String BULL_BREESE = "Bull & Breese"; 059 private static String EISENBERG = "Eisenberg"; 060 private static String ENGLEMAN = "Engleman"; 061 private static String RAGONE = "Ragone"; 062 private static String HOPP_WOODS = "Hopp Woods"; 063 private static String CHOTHIA = "Chothia"; 064 065 public static final Hydrophobicity KyteDoolittle = new Hydrophobicity(KYTE_DOOLITTLE, 066 "Hydropathicity", 067 "Kyte J, Doolittle RF (May 1982). \"A simple method for displaying the hydropathic character of a protein\". J. Mol. Biol. 157 (1): 105-132"); 068 069 public static final Hydrophobicity BullBreese = new Hydrophobicity(BULL_BREESE, 070 "Hydrophobicity (free energy of transfer to surface in kcal/mole)", 071 "Bull H.B., Breese K (1974). Arch. Biochem. Biophys. 161: 665-670."); 072 073 public static final Hydrophobicity Eisenberg = new Hydrophobicity(EISENBERG, 074 "Normalized consensus hydrophobicity scale", 075 "Eisenberg D (July 1984). \"Three-dimensional structure of membrane and surface proteins\". Ann. Rev. Biochem. 53: 595-623."); 076 077 public static final Hydrophobicity Engleman = new Hydrophobicity(ENGLEMAN, 078 "Identifying nonpolar transbilayer helices", 079 "Engleman DM, Steitz TA, Goldman A (1986). \"Identifying nonpolar transbilayer helices in amino acid sequences of menbrane proteins\". Ann. Rev. Biophys. Chem. 15: 321-353"); 080 081 public static final Hydrophobicity Ragone = new Hydrophobicity(RAGONE, 082 "Flexibility", 083 "Ragone R, Facchiano F, Facchiano A, Facchiano AM, Colonna G (1989). \"Flexibility plot of proteins\". Prot. Engineering 2 (7): 497-504"); 084 085 public static final Hydrophobicity HoppWoods = new Hydrophobicity(HOPP_WOODS, 086 "Hydrophilicity", 087 "Hopp TP, Woods KR (1981). Proc. Natl. Acad. Sci. U.S.A. 78: 3824-3828."); 088 089 public static final Hydrophobicity Chothia = new Hydrophobicity(CHOTHIA, 090 "Proportion of residues 95% buried (in 12 proteins).", 091 "Chothia C (1976). \"The nature of the accessible and buried surfaces in proteins\". J. Mol. Biol. 105: 1-14"); 092 093 094 //########################################################################### 095 // CONSTRUCTORS 096 //########################################################################### 097 098 //--------------------------------------------------------------------------- 099 protected Hydrophobicity(String inName, String inDescription, String inReference) 100 { 101 super(inName); 102 setDescription(inDescription); 103 setReference(inReference); 104 105 sUniqueMap.put(inName, this); 106 } 107 108 //########################################################################### 109 // PUBLIC METHODS 110 //########################################################################### 111 112 //--------------------------------------------------------------------------- 113 public static Collection<Hydrophobicity> values() 114 { 115 return sUniqueMap != null ? sUniqueMap.values() : null; 116 } 117 118 //-------------------------------------------------------------------------- 119 public String getType() 120 { 121 return "Hydrophobicity"; 122 } 123 124 //--------------------------------------------------------------------------- 125 public Float calculate(Protein inProtein) 126 { 127 return calculate(inProtein, null); 128 } 129 130 //--------------------------------------------------------------------------- 131 public Float calculate(Protein inProtein, SimpleProteinPropertyCalcSettings inSettings) 132 { 133 Map<AminoAcid, Float> aaMap = getMap(); 134 135 float total = 0.0f; 136 int totalNumAAs = 0; 137 AminoAcidComposition aaComposition = inProtein.getAminoAcidComposition(); 138 for (AminoAcid aa : aaComposition.keySet()) 139 { 140 Float value = aaMap.get(aa); 141 if (value != null) 142 { 143 int aaCount = aaComposition.get(aa); 144 totalNumAAs += aaCount; 145 total += (value * aaCount); 146 } 147 } 148 // Avoid divide by zero 149 float value = (float) (totalNumAAs > 0 ? total / (float) totalNumAAs : 0.0); 150 151 // Round to three digits 152 value = (float) (Math.round(value * 1000)/1000.0); 153 154 return value; 155 } 156 157 //--------------------------------------------------------------------------- 158 private Map<AminoAcid, Float> getMap() 159 { 160 if (null == mAAMap) 161 { 162 mAAMap = new HashMap<AminoAcid, Float>(26); 163 164 if (name().equalsIgnoreCase(KYTE_DOOLITTLE)) 165 { 166 mAAMap.put(AminoAcid.ALANINE, 1.8f); 167 mAAMap.put(AminoAcid.CYSTEINE, 2.5f); 168 mAAMap.put(AminoAcid.ASPARTIC_ACID, -3.5f); 169 mAAMap.put(AminoAcid.GLUTAMIC_ACID, -3.5f); 170 mAAMap.put(AminoAcid.PHENYLALANINE, 2.7f); 171 mAAMap.put(AminoAcid.GLYCINE, -0.4f); 172 mAAMap.put(AminoAcid.HISTIDINE, -3.2f); 173 mAAMap.put(AminoAcid.ISOLEUCINE, 4.5f); 174 mAAMap.put(AminoAcid.LYSINE, -3.9f); 175 mAAMap.put(AminoAcid.LEUCINE, 3.7f); 176 mAAMap.put(AminoAcid.METHIONINE, 1.9f); 177 mAAMap.put(AminoAcid.ASPARAGINE, -3.5f); 178 mAAMap.put(AminoAcid.PROLINE, -1.6f); 179 mAAMap.put(AminoAcid.GLUTAMINE, -3.5f); 180 mAAMap.put(AminoAcid.ARGININE, -4.5f); 181 mAAMap.put(AminoAcid.SERINE, -0.9f); 182 mAAMap.put(AminoAcid.THREONIE, -0.7f); 183 mAAMap.put(AminoAcid.VALINE, 4.2f); 184 mAAMap.put(AminoAcid.TRYPTOPHAN, -0.9f); 185 mAAMap.put(AminoAcid.TYROSINE, -1.3f); 186 } 187 else if (name().equalsIgnoreCase(CHOTHIA)) 188 { 189 mAAMap.put(AminoAcid.ALANINE, 0.38f); 190 mAAMap.put(AminoAcid.CYSTEINE, 0.50f); 191 mAAMap.put(AminoAcid.ASPARTIC_ACID, 0.15f); 192 mAAMap.put(AminoAcid.GLUTAMIC_ACID, 0.18f); 193 mAAMap.put(AminoAcid.PHENYLALANINE, 0.50f); 194 mAAMap.put(AminoAcid.GLYCINE, 0.36f); 195 mAAMap.put(AminoAcid.HISTIDINE, 0.17f); 196 mAAMap.put(AminoAcid.ISOLEUCINE, 0.60f); 197 mAAMap.put(AminoAcid.LYSINE, 0.03f); 198 mAAMap.put(AminoAcid.LEUCINE, 0.45f); 199 mAAMap.put(AminoAcid.METHIONINE, 0.40f); 200 mAAMap.put(AminoAcid.ASPARAGINE, 0.12f); 201 mAAMap.put(AminoAcid.PROLINE, 0.18f); 202 mAAMap.put(AminoAcid.GLUTAMINE, 0.07f); 203 mAAMap.put(AminoAcid.ARGININE, 0.01f); 204 mAAMap.put(AminoAcid.SERINE, 0.22f); 205 mAAMap.put(AminoAcid.THREONIE, 0.23f); 206 mAAMap.put(AminoAcid.VALINE, 0.54f); 207 mAAMap.put(AminoAcid.TRYPTOPHAN, 0.27f); 208 mAAMap.put(AminoAcid.TYROSINE, 0.15f); 209 } 210 else if (name().equalsIgnoreCase(ENGLEMAN)) 211 { 212 mAAMap.put(AminoAcid.ALANINE, 1.6f); 213 mAAMap.put(AminoAcid.CYSTEINE, 2.0f); 214 mAAMap.put(AminoAcid.ASPARTIC_ACID, -9.2f); 215 mAAMap.put(AminoAcid.GLUTAMIC_ACID, -8.2f); 216 mAAMap.put(AminoAcid.PHENYLALANINE, 3.7f); 217 mAAMap.put(AminoAcid.GLYCINE, 1.0f); 218 mAAMap.put(AminoAcid.HISTIDINE, -3.0f); 219 mAAMap.put(AminoAcid.ISOLEUCINE, 3.1f); 220 mAAMap.put(AminoAcid.LYSINE, -8.8f); 221 mAAMap.put(AminoAcid.LEUCINE, 2.8f); 222 mAAMap.put(AminoAcid.METHIONINE, 3.4f); 223 mAAMap.put(AminoAcid.ASPARAGINE, -4.8f); 224 mAAMap.put(AminoAcid.PROLINE, -0.2f); 225 mAAMap.put(AminoAcid.GLUTAMINE, -4.1f); 226 mAAMap.put(AminoAcid.ARGININE, 12.3f); 227 mAAMap.put(AminoAcid.SERINE, 0.6f); 228 mAAMap.put(AminoAcid.THREONIE, 1.2f); 229 mAAMap.put(AminoAcid.VALINE, 2.6f); 230 mAAMap.put(AminoAcid.TRYPTOPHAN, 1.9f); 231 mAAMap.put(AminoAcid.TYROSINE, -0.7f); 232 } 233 else if (name().equalsIgnoreCase(EISENBERG)) 234 { 235 mAAMap.put(AminoAcid.ALANINE, 0.62f); 236 mAAMap.put(AminoAcid.CYSTEINE, 0.29f); 237 mAAMap.put(AminoAcid.ASPARTIC_ACID, -0.90f); 238 mAAMap.put(AminoAcid.GLUTAMIC_ACID, -0.74f); 239 mAAMap.put(AminoAcid.PHENYLALANINE, 1.19f); 240 mAAMap.put(AminoAcid.GLYCINE, 0.48f); 241 mAAMap.put(AminoAcid.HISTIDINE, -0.40f); 242 mAAMap.put(AminoAcid.ISOLEUCINE, 1.38f); 243 mAAMap.put(AminoAcid.LYSINE, -1.50f); 244 mAAMap.put(AminoAcid.LEUCINE, 1.06f); 245 mAAMap.put(AminoAcid.METHIONINE, 0.64f); 246 mAAMap.put(AminoAcid.ASPARAGINE, -0.78f); 247 mAAMap.put(AminoAcid.PROLINE, 0.12f); 248 mAAMap.put(AminoAcid.GLUTAMINE, -0.85f); 249 mAAMap.put(AminoAcid.ARGININE, -2.53f); 250 mAAMap.put(AminoAcid.SERINE, -0.18f); 251 mAAMap.put(AminoAcid.THREONIE, -0.05f); 252 mAAMap.put(AminoAcid.VALINE, 1.08f); 253 mAAMap.put(AminoAcid.TRYPTOPHAN, 0.81f); 254 mAAMap.put(AminoAcid.TYROSINE, 0.26f); 255 } 256 else if (name().equalsIgnoreCase(RAGONE)) 257 { 258 mAAMap.put(AminoAcid.ALANINE, -0.91f); 259 mAAMap.put(AminoAcid.CYSTEINE, -0.17f); 260 mAAMap.put(AminoAcid.ASPARTIC_ACID, -0.68f); 261 mAAMap.put(AminoAcid.GLUTAMIC_ACID, -0.68f); 262 mAAMap.put(AminoAcid.PHENYLALANINE, 1.37f); 263 mAAMap.put(AminoAcid.GLYCINE, -1.40f); 264 mAAMap.put(AminoAcid.HISTIDINE, 0.25f); 265 mAAMap.put(AminoAcid.ISOLEUCINE, 1.09f); 266 mAAMap.put(AminoAcid.LYSINE, -0.13f); 267 mAAMap.put(AminoAcid.LEUCINE, 0.89f); 268 mAAMap.put(AminoAcid.METHIONINE, 0.83f); 269 mAAMap.put(AminoAcid.ASPARAGINE, -0.42f); 270 mAAMap.put(AminoAcid.PROLINE, -0.52f); 271 mAAMap.put(AminoAcid.GLUTAMINE, 0.06f); 272 mAAMap.put(AminoAcid.ARGININE, 0.71f); 273 mAAMap.put(AminoAcid.SERINE, -1.01f); 274 mAAMap.put(AminoAcid.THREONIE, -0.58f); 275 mAAMap.put(AminoAcid.VALINE, 0.52f); 276 mAAMap.put(AminoAcid.TRYPTOPHAN, 2.00f); 277 mAAMap.put(AminoAcid.TYROSINE, 1.21f); 278 } 279 else if (name().equalsIgnoreCase(HOPP_WOODS)) 280 { 281 mAAMap.put(AminoAcid.ALANINE, 0.5f); 282 mAAMap.put(AminoAcid.CYSTEINE, 1.0f); 283 mAAMap.put(AminoAcid.ASPARTIC_ACID, -3.0f); 284 mAAMap.put(AminoAcid.GLUTAMIC_ACID, -3.0f); 285 mAAMap.put(AminoAcid.PHENYLALANINE, 2.5f); 286 mAAMap.put(AminoAcid.GLYCINE, 0.0f); 287 mAAMap.put(AminoAcid.HISTIDINE, 0.5f); 288 mAAMap.put(AminoAcid.ISOLEUCINE, 1.8f); 289 mAAMap.put(AminoAcid.LYSINE, -3.0f); 290 mAAMap.put(AminoAcid.LEUCINE, 1.8f); 291 mAAMap.put(AminoAcid.METHIONINE, 1.3f); 292 mAAMap.put(AminoAcid.ASPARAGINE, -0.2f); 293 mAAMap.put(AminoAcid.PROLINE, 0.0f); 294 mAAMap.put(AminoAcid.GLUTAMINE, -0.2f); 295 mAAMap.put(AminoAcid.ARGININE, -3.0f); 296 mAAMap.put(AminoAcid.SERINE, -0.3f); 297 mAAMap.put(AminoAcid.THREONIE, -0.4f); 298 mAAMap.put(AminoAcid.VALINE, 1.5f); 299 mAAMap.put(AminoAcid.TRYPTOPHAN, 3.4f); 300 mAAMap.put(AminoAcid.TYROSINE, 2.3f); 301 } 302 else if (name().equalsIgnoreCase(BULL_BREESE)) 303 { 304 mAAMap.put(AminoAcid.ALANINE, 0.61f); 305 mAAMap.put(AminoAcid.CYSTEINE, 0.36f); 306 mAAMap.put(AminoAcid.ASPARTIC_ACID, 0.61f); 307 mAAMap.put(AminoAcid.GLUTAMIC_ACID, 0.51f); 308 mAAMap.put(AminoAcid.PHENYLALANINE, -1.52f); 309 mAAMap.put(AminoAcid.GLYCINE, 0.81f); 310 mAAMap.put(AminoAcid.HISTIDINE, 0.69f); 311 mAAMap.put(AminoAcid.ISOLEUCINE, -1.45f); 312 mAAMap.put(AminoAcid.LYSINE, 0.46f); 313 mAAMap.put(AminoAcid.LEUCINE, -1.65f); 314 mAAMap.put(AminoAcid.METHIONINE, -0.66f); 315 mAAMap.put(AminoAcid.ASPARAGINE, 0.89f); 316 mAAMap.put(AminoAcid.PROLINE, -0.17f); 317 mAAMap.put(AminoAcid.GLUTAMINE, 0.97f); 318 mAAMap.put(AminoAcid.ARGININE, 0.69f); 319 mAAMap.put(AminoAcid.SERINE, 0.42f); 320 mAAMap.put(AminoAcid.THREONIE, 0.29f); 321 mAAMap.put(AminoAcid.VALINE, -0.75f); 322 mAAMap.put(AminoAcid.TRYPTOPHAN, -1.20f); 323 mAAMap.put(AminoAcid.TYROSINE, -1.43f); 324 } 325 else 326 { 327 throw new ProgrammingException("Residue values have not been specified for " + StringUtil.singleQuote(name()) + "!"); 328 } 329 } 330 331 return mAAMap; 332 } 333}