001package com.hfg.bio; 002 003import java.util.*; 004 005import com.hfg.exception.UnmodifyableObjectException; 006import com.hfg.util.CompareUtil; 007import com.hfg.xml.XMLNode; 008import com.hfg.xml.XMLTag; 009import com.hfg.util.StringUtil; 010 011 012 013//------------------------------------------------------------------------------ 014/** 015 Mapping of sequence characters to AminoAcids. 016 <div> 017 @author J. Alex Taylor, hairyfatguy.com 018 </div> 019 */ 020//------------------------------------------------------------------------------ 021// com.hfg XML/HTML Coding Library 022// 023// This library is free software; you can redistribute it and/or 024// modify it under the terms of the GNU Lesser General Public 025// License as published by the Free Software Foundation; either 026// version 2.1 of the License, or (at your option) any later version. 027// 028// This library is distributed in the hope that it will be useful, 029// but WITHOUT ANY WARRANTY; without even the implied warranty of 030// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 031// Lesser General Public License for more details. 032// 033// You should have received a copy of the GNU Lesser General Public 034// License along with this library; if not, write to the Free Software 035// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 036// 037// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 038// jataylor@hairyfatguy.com 039//------------------------------------------------------------------------------ 040 041public class AminoAcidSet implements Set<AminoAcid>, Cloneable 042{ 043 //########################################################################## 044 // PUBLIC FIELDS 045 //########################################################################## 046 047 public static final AminoAcidSet STANDARD_LC = new AminoAcidSet(); 048 public static final AminoAcidSet STANDARD_UC = new AminoAcidSet(); 049 050 /** 051 Contains both upper and lowercase standard mappings. 052 */ 053 public static final AminoAcidSet STANDARD = new AminoAcidSet(); 054 055 /** 056 Contains both upper and lowercase standard mappings plus B, J, and Z ambiguity codes. 057 */ 058 public static final AminoAcidSet EXTENDED = new AminoAcidSet(); 059 060 static 061 { 062 STANDARD_LC.setMapping('a', AminoAcid.ALANINE); 063 STANDARD_LC.setMapping('c', AminoAcid.CYSTEINE); 064 STANDARD_LC.setMapping('d', AminoAcid.ASPARTIC_ACID); 065 STANDARD_LC.setMapping('e', AminoAcid.GLUTAMIC_ACID); 066 STANDARD_LC.setMapping('f', AminoAcid.PHENYLALANINE); 067 STANDARD_LC.setMapping('g', AminoAcid.GLYCINE); 068 STANDARD_LC.setMapping('h', AminoAcid.HISTIDINE); 069 STANDARD_LC.setMapping('i', AminoAcid.ISOLEUCINE); 070 STANDARD_LC.setMapping('k', AminoAcid.LYSINE); 071 STANDARD_LC.setMapping('l', AminoAcid.LEUCINE); 072 STANDARD_LC.setMapping('m', AminoAcid.METHIONINE); 073 STANDARD_LC.setMapping('n', AminoAcid.ASPARAGINE); 074 STANDARD_LC.setMapping('p', AminoAcid.PROLINE); 075 STANDARD_LC.setMapping('q', AminoAcid.GLUTAMINE); 076 STANDARD_LC.setMapping('r', AminoAcid.ARGININE); 077 STANDARD_LC.setMapping('s', AminoAcid.SERINE); 078 STANDARD_LC.setMapping('t', AminoAcid.THREONIE); 079 STANDARD_LC.setMapping('v', AminoAcid.VALINE); 080 STANDARD_LC.setMapping('w', AminoAcid.TRYPTOPHAN); 081 STANDARD_LC.setMapping('y', AminoAcid.TYROSINE); 082 STANDARD_LC.setMapping('x', AminoAcid.UNDEFINED); 083 STANDARD_LC.setMapping('*', AminoAcid.STOP); 084 STANDARD_LC.setName("Standard (lower case)"); 085 STANDARD_LC.lock(); 086 087 STANDARD_UC.setMapping('A', AminoAcid.ALANINE); 088 STANDARD_UC.setMapping('C', AminoAcid.CYSTEINE); 089 STANDARD_UC.setMapping('D', AminoAcid.ASPARTIC_ACID); 090 STANDARD_UC.setMapping('E', AminoAcid.GLUTAMIC_ACID); 091 STANDARD_UC.setMapping('F', AminoAcid.PHENYLALANINE); 092 STANDARD_UC.setMapping('G', AminoAcid.GLYCINE); 093 STANDARD_UC.setMapping('H', AminoAcid.HISTIDINE); 094 STANDARD_UC.setMapping('I', AminoAcid.ISOLEUCINE); 095 STANDARD_UC.setMapping('K', AminoAcid.LYSINE); 096 STANDARD_UC.setMapping('L', AminoAcid.LEUCINE); 097 STANDARD_UC.setMapping('M', AminoAcid.METHIONINE); 098 STANDARD_UC.setMapping('N', AminoAcid.ASPARAGINE); 099 STANDARD_UC.setMapping('P', AminoAcid.PROLINE); 100 STANDARD_UC.setMapping('Q', AminoAcid.GLUTAMINE); 101 STANDARD_UC.setMapping('R', AminoAcid.ARGININE); 102 STANDARD_UC.setMapping('S', AminoAcid.SERINE); 103 STANDARD_UC.setMapping('T', AminoAcid.THREONIE); 104 STANDARD_UC.setMapping('V', AminoAcid.VALINE); 105 STANDARD_UC.setMapping('W', AminoAcid.TRYPTOPHAN); 106 STANDARD_UC.setMapping('Y', AminoAcid.TYROSINE); 107 STANDARD_UC.setMapping('X', AminoAcid.UNDEFINED); 108 STANDARD_UC.setMapping('*', AminoAcid.STOP); 109 STANDARD_UC.setName("Standard (upper case)"); 110 STANDARD_UC.lock(); 111 112 STANDARD.setMappings(STANDARD_LC); 113 STANDARD.setMappings(STANDARD_UC); 114 STANDARD.setName("Standard"); 115 STANDARD.lock(); 116 117 EXTENDED.setMappings(STANDARD); 118 EXTENDED.setMapping('B', AminoAcid.ASP_ASN_AVG); 119 EXTENDED.setMapping('b', AminoAcid.ASP_ASN_AVG); 120 EXTENDED.setMapping('J', AminoAcid.ILE_LEU_AVG); 121 EXTENDED.setMapping('j', AminoAcid.ILE_LEU_AVG); 122 EXTENDED.setMapping('Z', AminoAcid.GLU_GLN_AVG); 123 EXTENDED.setMapping('z', AminoAcid.GLU_GLN_AVG); 124 EXTENDED.setName("Extended"); 125 EXTENDED.lock(); 126 } 127 128 129 //########################################################################## 130 // PRIVATE FIELDS 131 //########################################################################## 132 133 private String mName; 134 private NTerminalGroup mNTerminalGroup = NTerminalGroup.UNMODIFIED_N_TERMINUS; 135 private CTerminalGroup mCTerminalGroup = CTerminalGroup.UNMODIFIED_C_TERMINUS; 136 private boolean mLocked; 137 private Map<Character, AminoAcid> mMap = new HashMap<>(); 138 139 // Since the map values may have the same AA mapped to different characters, 140 // we will cache the unique set of AA's for performance. 141 private Collection<AminoAcid> mCachedSet; 142 143 //########################################################################## 144 // CONSTRUCTORS 145 //########################################################################## 146 147 //-------------------------------------------------------------------------- 148 public AminoAcidSet() 149 { 150 151 } 152 153 //-------------------------------------------------------------------------- 154 public AminoAcidSet(AminoAcidSet inAASet) 155 { 156 setMappings(inAASet); 157 setNTerminalGroup(inAASet.getNTerminalGroup()); 158 setCTerminalGroup(inAASet.getCTerminalGroup()); 159 } 160 161 //-------------------------------------------------------------------------- 162 public static AminoAcidSet instantiate(XMLNode inXMLNode) 163 { 164 if (!inXMLNode.getTagName().equals(HfgBioXML.AASET_TAG)) 165 { 166 throw new RuntimeException("Cannot construct an " + AminoAcidSet.class.getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!"); 167 } 168 169 AminoAcidSet aaSet = null; 170 171 String name = inXMLNode.getAttributeValue(HfgBioXML.NAME_ATT); 172 173 XMLNode aminoAcidsTag = inXMLNode.getOptionalSubtagByName(HfgBioXML.AMINO_ACIDS_TAG); 174 if (aminoAcidsTag != null) 175 { 176 aaSet = new AminoAcidSet().setName(name); 177 for (XMLNode subtag : aminoAcidsTag.getXMLNodeSubtags()) 178 { 179 aaSet.setMapping(subtag.getAttributeValue(HfgBioXML.MAPPING_ATT).charAt(0), new AminoAcid(subtag)); 180 } 181 182 aaSet.setNTerminalGroup(new NTerminalGroup((XMLNode) inXMLNode.getRequiredSubtagByName(HfgBioXML.NTERM_TAG))); 183 aaSet.setCTerminalGroup(new CTerminalGroup((XMLNode) inXMLNode.getRequiredSubtagByName(HfgBioXML.CTERM_TAG))); 184 } 185 else 186 { 187 // Pre-defined amino acid sets can be specified with just a name. 188 for (AminoAcidSet predefinedAASet : new AminoAcidSet[] { STANDARD, STANDARD_LC, STANDARD_UC }) 189 { 190 if (name.equals(predefinedAASet.getName())) 191 { 192 aaSet = predefinedAASet; 193 break; 194 } 195 } 196 } 197 198 return aaSet; 199 } 200 201 //########################################################################## 202 // PUBLIC METHODS 203 //########################################################################## 204 205 //-------------------------------------------------------------------------- 206 public XMLNode toXMLNode() 207 { 208 XMLNode node = new XMLTag(HfgBioXML.AASET_TAG); 209 if (StringUtil.isSet(getName())) 210 { 211 node.setAttribute(HfgBioXML.NAME_ATT, getName()); 212 } 213 214 if (! isPredefinedSet()) 215 { 216 XMLNode aminoAcidsTag = new XMLTag(HfgBioXML.AMINO_ACIDS_TAG); 217 node.addSubtag(aminoAcidsTag); 218 for (Map.Entry<Character, AminoAcid> mapping : mMap.entrySet()) 219 { 220 XMLNode aaTag = mapping.getValue().toXMLNode(); 221 aaTag.setAttribute(HfgBioXML.MAPPING_ATT, mapping.getKey() + ""); 222 aminoAcidsTag.addSubtag(aaTag); 223 } 224 225 node.addSubtag(mNTerminalGroup.toXMLNode()); 226 node.addSubtag(mCTerminalGroup.toXMLNode()); 227 } 228 229 return node; 230 } 231 232 //-------------------------------------------------------------------------- 233 public AminoAcidSet clone() 234 { 235 AminoAcidSet newObj; 236 try 237 { 238 newObj = (AminoAcidSet) super.clone(); 239 } 240 catch (CloneNotSupportedException e) 241 { 242 throw new RuntimeException(e); 243 } 244 245 newObj.mMap = new HashMap<>(mMap); 246 newObj.mLocked = false; 247 248 return newObj; 249 } 250 251 //-------------------------------------------------------------------------- 252 public boolean isLocked() 253 { 254 return mLocked; 255 } 256 257 //-------------------------------------------------------------------------- 258 public void lock() 259 { 260 mLocked = true; 261 } 262 263 //-------------------------------------------------------------------------- 264 public AminoAcidSet setName(String inValue) 265 { 266 if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); 267 mName = inValue; 268 269 return this; 270 } 271 272 //-------------------------------------------------------------------------- 273 public String getName() 274 { 275 return mName; 276 } 277 278 //-------------------------------------------------------------------------- 279 public AminoAcidSet setNTerminalGroup(NTerminalGroup inValue) 280 { 281 if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); 282 mNTerminalGroup = inValue; 283 return this; 284 } 285 286 //-------------------------------------------------------------------------- 287 public NTerminalGroup getNTerminalGroup() 288 { 289 return mNTerminalGroup; 290 } 291 292 //-------------------------------------------------------------------------- 293 public AminoAcidSet setCTerminalGroup(CTerminalGroup inValue) 294 { 295 if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); 296 mCTerminalGroup = inValue; 297 return this; 298 } 299 300 //-------------------------------------------------------------------------- 301 public CTerminalGroup getCTerminalGroup() 302 { 303 return mCTerminalGroup; 304 } 305 306 //-------------------------------------------------------------------------- 307 public boolean setMapping(char inChar, AminoAcid inAA) 308 { 309 if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); 310 311 boolean result = mMap.containsKey(inChar); 312 313 mMap.put(inChar, inAA); 314 clearCachedValues(); 315 316 return result; 317 } 318 319 //-------------------------------------------------------------------------- 320 public void setMappings(AminoAcidSet inAASet) 321 { 322 if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); 323 324 if (inAASet != null) 325 { 326 Iterator iter = inAASet.mapIterator(); 327 while (iter.hasNext()) 328 { 329 Character letter = (Character) iter.next(); 330 mMap.put(letter, inAASet.getAA(letter)); 331 } 332 333 clearCachedValues(); 334 } 335 } 336 337 //-------------------------------------------------------------------------- 338 public Collection<AminoAcid> getAminoAcids() 339 { 340 if (null == mCachedSet) 341 { 342 List<AminoAcid> aaList = new ArrayList<>(new HashSet<>(mMap.values())); 343 Collections.sort(aaList, AminoAcid.AA_ORDINAL_COMPARATOR); 344 345 mCachedSet = Collections.unmodifiableCollection(aaList);; 346 } 347 348 return mCachedSet; 349 } 350 351 //-------------------------------------------------------------------------- 352 /** 353 Returns an iteration of the Characters mapped to amino acids 354 @return iteration of the Characters mapped to amino acids 355 */ 356 public Iterator<Character> mapIterator() 357 { 358 return mMap.keySet().iterator(); 359 } 360 361 //-------------------------------------------------------------------------- 362 public Set<Character> getResidueChars() 363 { 364 return Collections.unmodifiableSet(mMap.keySet()); 365 } 366 367 //-------------------------------------------------------------------------- 368 public Set<Character> getMapping(AminoAcid inAA) 369 { 370 Set<Character> residues = new HashSet<>(20); 371 for (Character mappedResidue : mMap.keySet()) 372 { 373 if (mMap.get(mappedResidue).equals(inAA)) 374 { 375 residues.add(mappedResidue); 376 } 377 } 378 379 return residues; 380 } 381 382 //-------------------------------------------------------------------------- 383 public AminoAcid getAA(char inResidue) 384 { 385 return getAA(new Character(inResidue)); 386 } 387 388 //-------------------------------------------------------------------------- 389 public AminoAcid getAA(Character inResidue) 390 { 391 return mMap.get(inResidue); 392 } 393 394 395 //-------------------------------------------------------------------------- 396 public int size() 397 { 398 return mMap.size(); 399 } 400 401 //-------------------------------------------------------------------------- 402 @Override 403 public boolean isEmpty() 404 { 405 return mMap.isEmpty(); 406 } 407 408 //-------------------------------------------------------------------------- 409 @Override 410 public boolean contains(Object inObject) 411 { 412 return inObject instanceof AminoAcid ? mMap.values().contains(inObject) : false; 413 } 414 415 //-------------------------------------------------------------------------- 416 @Override 417 public Iterator<AminoAcid> iterator() 418 { 419 return getAminoAcids().iterator(); // Want to reduce values to the unique set before iterating. 420 } 421 422 //-------------------------------------------------------------------------- 423 @Override 424 public Object[] toArray() 425 { 426 return (Object[]) getAminoAcids().toArray(); 427 } 428 429 //-------------------------------------------------------------------------- 430 @Override 431 public <T> T[] toArray(T[] inArray) 432 { 433 if (inArray.length < size()) 434 { 435 return (T[]) Arrays.copyOf(getAminoAcids().toArray(), size(), inArray.getClass()); 436 } 437 438 System.arraycopy(getAminoAcids().toArray(), 0, inArray, 0, size()); 439 return inArray; 440 } 441 442 //-------------------------------------------------------------------------- 443 @Override 444 public boolean add(AminoAcid inAminoAcid) 445 { 446 clearCachedValues(); 447 return setMapping(inAminoAcid.getOneLetterCode(), inAminoAcid); 448 } 449 450 //-------------------------------------------------------------------------- 451 @Override 452 public boolean remove(Object inObj) 453 { 454 if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); 455 456 boolean result = false; 457 if (inObj instanceof AminoAcid) 458 { 459 for (Character key : mMap.keySet()) 460 { 461 if (mMap.get(key).equals(inObj)) 462 { 463 mMap.remove(key); 464 result = true; 465 clearCachedValues(); 466 } 467 } 468 } 469 470 return result; 471 } 472 473 //-------------------------------------------------------------------------- 474 @Override 475 public boolean containsAll(Collection<?> inCollection) 476 { 477 boolean result = true; 478 for (Object obj : inCollection) 479 { 480 if (! contains(obj)) 481 { 482 result = false; 483 break; 484 } 485 } 486 487 return result; 488 } 489 490 //-------------------------------------------------------------------------- 491 @Override 492 public boolean addAll(Collection<? extends AminoAcid> inCollection) 493 { 494 boolean result = false; 495 for (AminoAcid obj : inCollection) 496 { 497 if (add(obj)) 498 { 499 result = true; 500 } 501 } 502 503 return result; 504 } 505 506 //-------------------------------------------------------------------------- 507 @Override 508 public boolean retainAll(Collection<?> inCollection) 509 { 510 if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); 511 512 boolean result = false; 513 for (Character key : mMap.keySet()) 514 { 515 if (! inCollection.contains(mMap.get(key))) 516 { 517 mMap.remove(key); 518 result = true; 519 clearCachedValues(); 520 } 521 } 522 523 return result; 524 } 525 526 //-------------------------------------------------------------------------- 527 @Override 528 public boolean removeAll(Collection<?> inCollection) 529 { 530 boolean result = false; 531 for (Object obj : inCollection) 532 { 533 if (remove(obj)) 534 { 535 result = true; 536 } 537 } 538 539 return result; 540 } 541 542 //-------------------------------------------------------------------------- 543 @Override 544 public void clear() 545 { 546 if (mLocked) throw new UnmodifyableObjectException(mName + " is locked and cannot be modified!"); 547 548 mMap.clear(); 549 550 clearCachedValues(); 551 } 552 553 //-------------------------------------------------------------------------- 554 @Override 555 public boolean equals(Object inObj2) 556 { 557 boolean result = true; 558 if (inObj2 instanceof AminoAcidSet) 559 { 560 AminoAcidSet aaSet2 = (AminoAcidSet) inObj2; 561 562 if (aaSet2.size() != size()) 563 { 564 result = false; 565 } 566 else 567 { 568 for (Character residue : mMap.keySet()) 569 { 570 if (! getAA(residue).equals(aaSet2.getAA(residue))) 571 { 572 result = false; 573 break; 574 } 575 } 576 577 if (result 578 && ! getNTerminalGroup().equals(aaSet2.getNTerminalGroup())) 579 { 580 result = false; 581 } 582 583 if (result 584 && ! getAminoAcids().containsAll(aaSet2.getAminoAcids())) 585 { 586 result = false; 587 } 588 } 589 } 590 else 591 { 592 result = false; 593 } 594 595 return result; 596 } 597 598 //-------------------------------------------------------------------------- 599 @Override 600 public int hashCode() 601 { 602 int hashCode = 0; 603 for (AminoAcid aa : getAminoAcids()) 604 { 605 hashCode += 31 * aa.hashCode(); 606 } 607 608 hashCode += 31 * getNTerminalGroup().hashCode(); 609 hashCode += 31 * getCTerminalGroup().hashCode(); 610 611 return hashCode; 612 } 613 614 //-------------------------------------------------------------------------- 615 public AminoAcid remove(Character inChar) 616 { 617 clearCachedValues(); 618 619 return mMap.remove(inChar); 620 } 621 622 //-------------------------------------------------------------------------- 623 public AminoAcidSet subtract(AminoAcidSet inAminoAcidSet2) 624 { 625 AminoAcidSet subtractedSet = clone(); 626 627 if (inAminoAcidSet2 != null) 628 { 629 for (Character aaChar : mMap.keySet()) 630 { 631 AminoAcid currentAA = mMap.get(aaChar); 632 AminoAcid oldAA = inAminoAcidSet2.getAA(aaChar); 633 634 if (currentAA != null 635 && oldAA != null) 636 { 637 int comparison = CompareUtil.compare(currentAA.getElementalComposition(), oldAA.getElementalComposition()); 638 if (0 == comparison) 639 { 640 if (StringUtil.isSet(currentAA.getChemicalFormula()) 641 || StringUtil.isSet(oldAA.getChemicalFormula()) 642 // Compare the names if neither has a composition 643 || currentAA.name().equals(oldAA.name())) 644 { 645 subtractedSet.remove(aaChar); 646 } 647 } 648 } 649 } 650 651 if (getNTerminalGroup() != null 652 && inAminoAcidSet2.getNTerminalGroup() != null 653 && getNTerminalGroup().equals(inAminoAcidSet2.getNTerminalGroup())) 654 { 655 subtractedSet.setNTerminalGroup(null); 656 } 657 658 if (getCTerminalGroup() != null 659 && inAminoAcidSet2.getCTerminalGroup() != null 660 && getCTerminalGroup().equals(inAminoAcidSet2.getCTerminalGroup())) 661 { 662 subtractedSet.setCTerminalGroup(null); 663 } 664 665 clearCachedValues(); 666 } 667 668 return subtractedSet; 669 } 670 671 //-------------------------------------------------------------------------- 672 private void clearCachedValues() 673 { 674 mCachedSet = null; 675 } 676 677 //-------------------------------------------------------------------------- 678 private boolean isPredefinedSet() 679 { 680 return (this == STANDARD 681 || this == STANDARD_LC 682 || this == STANDARD_UC); 683 } 684}