001package com.hfg.bio.seq; 002 003import java.io.Reader; 004import java.math.BigDecimal; 005import java.math.MathContext; 006import java.util.*; 007import java.util.regex.Pattern; 008import java.util.regex.Matcher; 009 010import com.hfg.bio.*; 011import com.hfg.bio.glyco.Glycan; 012import com.hfg.bio.proteinproperty.ExtinctionCoeff; 013import com.hfg.bio.proteinproperty.IsoelectricPoint; 014import com.hfg.bio.proteinproperty.PctExtinctionCoeff; 015import com.hfg.bio.proteinproperty.ProteinAnalysisMode; 016import com.hfg.bio.proteinproperty.ReducedAnalysisMode; 017import com.hfg.bio.proteinproperty.SimpleProteinPropertyCalcSettings; 018import com.hfg.bio.taxonomy.ncbi.NCBITaxon; 019import com.hfg.chem.Element; 020import com.hfg.chem.IonizableGroup; 021import com.hfg.chem.Molecule; 022import com.hfg.util.ChecksumUtil; 023import com.hfg.util.CompareUtil; 024import com.hfg.util.StringBuilderPlus; 025import com.hfg.util.collection.CollectionUtil; 026import com.hfg.util.StringUtil; 027import com.hfg.util.collection.OrderedMap; 028import com.hfg.xml.XMLNode; 029import com.hfg.xml.XMLTag; 030 031//------------------------------------------------------------------------------ 032/** 033 Biological protein sequence. 034 <div> 035 @author J. Alex Taylor, hairyfatguy.com 036 </div> 037 */ 038//------------------------------------------------------------------------------ 039// com.hfg XML/HTML Coding Library 040// 041// This library is free software; you can redistribute it and/or 042// modify it under the terms of the GNU Lesser General Public 043// License as published by the Free Software Foundation; either 044// version 2.1 of the License, or (at your option) any later version. 045// 046// This library is distributed in the hope that it will be useful, 047// but WITHOUT ANY WARRANTY; without even the implied warranty of 048// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 049// Lesser General Public License for more details. 050// 051// You should have received a copy of the GNU Lesser General Public 052// License along with this library; if not, write to the Free Software 053// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 054// 055// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 056// jataylor@hairyfatguy.com 057//------------------------------------------------------------------------------ 058 059public class Protein extends BioSequencePlusImpl 060{ 061 062 //########################################################################## 063 // PRIVATE FIELDS 064 //########################################################################## 065 066 private AminoAcidSet mAASet; 067 068 // Properties 069 private AminoAcidComposition mAAComposition; 070 private Float mIsoelectricPoint; 071 private KaSet mIsoelectricPointKaSet; 072 private Integer mExtinctionCoeff; 073 private Float mPercentExtinctionCoeff; 074 075 private List<Protein> mChains; 076 private Integer mNumDisulfideBonds; 077 private Set<ProteinXLink> mXLinks; 078 private List<Glycan> mGlycans; 079 080 private Protein mParent; 081 private Map<String, Protein> mChainIdMap = new OrderedMap<>(4); 082 083 private static Pattern sChainIdPattern = Pattern.compile("_(\\w)$"); 084 085 //########################################################################## 086 // CONSTRUCTORS 087 //########################################################################## 088 089 //-------------------------------------------------------------------------- 090 public Protein() 091 { 092 093 } 094 095 //-------------------------------------------------------------------------- 096 public Protein(XMLNode inXML) 097 { 098 super(inXML); 099 100 if (! inXML.getTagName().equals(HfgBioXML.HFGBIOSEQ_TAG)) 101 { 102 throw new RuntimeException("Cannot construct an " + this.getClass().getSimpleName() + " from a " + inXML.getTagName() + " tag!"); 103 } 104 105 XMLNode aminoAcidSetTag = inXML.getOptionalSubtagByName(HfgBioXML.AASET_TAG); 106 if (aminoAcidSetTag != null) 107 { 108 mAASet = AminoAcidSet.instantiate(aminoAcidSetTag); 109 } 110 111 XMLNode chainsTag = inXML.getOptionalSubtagByName(HfgBioXML.CHAINS_TAG); 112 if (chainsTag != null) 113 { 114 for (XMLNode subtag : chainsTag.getXMLNodeSubtags()) 115 { 116 addChain(new Protein(subtag)); 117 } 118 } 119 120 // This needs to be set AFTER the chains have been added. 121 if (inXML.hasAttribute(HfgBioXML.DISULFIDE_CNT_ATT)) 122 { 123 setNumDisulfideBonds(Integer.parseInt(inXML.getAttributeValue(HfgBioXML.DISULFIDE_CNT_ATT))); 124 } 125 126 XMLNode xlinksTag = inXML.getOptionalSubtagByName(HfgBioXML.XLINKS_TAG); 127 if (xlinksTag != null) 128 { 129 for (XMLNode subtag : chainsTag.getXMLNodeSubtags()) 130 { 131 addXLink(new ProteinXLink(subtag)); 132 } 133 } 134 } 135 136 //########################################################################## 137 // PUBLIC METHODS 138 //########################################################################## 139 140 //-------------------------------------------------------------------------- 141 @Override 142 public BioSequenceType getType() 143 { 144 return BioSequenceType.PROTEIN; 145 } 146 147 //-------------------------------------------------------------------------- 148 @Override 149 public Protein clone() 150 { 151 clearCalculatedProperties(); 152 153 Protein theClone = (Protein) super.clone(); 154 155 if (mAAComposition != null) 156 { 157 theClone.mAAComposition = mAAComposition.clone(); 158 } 159 160 if (mChains != null) 161 { 162 theClone.mChains = new ArrayList<>(mChains.size()); 163 for (Protein chain : mChains) 164 { 165 Protein chainClone = chain.clone(); 166 // Can't call addChain() here because it will mess with chain naming 167 theClone.mChains.add(chainClone); 168 chainClone.mParent = theClone; 169 } 170 171 theClone.mChainIdMap = new HashMap<>(mChains.size()); 172 for (Protein chain : theClone.mChains) 173 { 174 theClone.mChainIdMap.put(chain.getID(), chain); 175 } 176 } 177 178 theClone.mParent = null; 179 180 if (mXLinks != null) 181 { 182 theClone.mXLinks = new HashSet<>(mXLinks.size()); 183 for (ProteinXLink xlink : mXLinks) 184 { 185 theClone.mXLinks.add(xlink.clone()); 186 } 187 } 188 189 return theClone; 190 } 191 192 //--------------------------------------------------------------------------- 193 // The hashcode is based on the sequence and not the id. 194 @Override 195 public int hashCode() 196 { 197 byte[] md5 = getMD5Checksum(); 198 199 return md5 != null ? new String(md5).hashCode() : 0; 200 } 201 202 //--------------------------------------------------------------------------- 203 @Override 204 public int compareTo(Object inObj2) 205 { 206 int result = -1; 207 208 if (this == inObj2) 209 { 210 result = 0; 211 } 212 else if (inObj2 != null 213 && inObj2 instanceof Protein) 214 { 215 Protein protein2 = (Protein) inObj2; 216 217 // First compare the lengths 218 result = CompareUtil.compare(length(), protein2.length()); 219 220 if (0 == result) 221 { 222 // Second compare the number of chains 223 result = CompareUtil.compare(getNumChains(), protein2.getNumChains()); 224 } 225 226 if (0 == result) 227 { 228 // Third compare the sequences themselves. (We could use a checksum first but that causes calculation of the seq data string anyway.) 229 result = CompareUtil.compare(getSeqDataString(), protein2.getSeqDataString()); 230 } 231 } 232 233 return result; 234 } 235 236 //-------------------------------------------------------------------------- 237 @Override 238 public byte[] getMD5Checksum() 239 { 240 byte[] checksum = null; 241 if (getSequence() != null) 242 { 243 checksum = super.getMD5Checksum(); 244 } 245 else if (CollectionUtil.hasValues(getChains())) 246 { 247 checksum = ChecksumUtil.calculateMD5(getSeqDataString()); 248 } 249 250 return checksum; 251 } 252 253 //-------------------------------------------------------------------------- 254 @Override 255 public byte[] getSHA1Checksum() 256 { 257 byte[] checksum = null; 258 if (getSequence() != null) 259 { 260 checksum = super.getSHA1Checksum(); 261 } 262 else if (CollectionUtil.hasValues(getChains())) 263 { 264 checksum = ChecksumUtil.calculateSHA1(getSeqDataString()); 265 } 266 267 return checksum; 268 } 269 270 //--------------------------------------------------------------------------- 271 /** 272 Recursively calculates the number of chains in the protein. 273 @return the number of chains in the protein. 274 */ 275 public int getNumChains() 276 { 277 int numChains = 0; 278 279 if (getSequence() != null) 280 { 281 numChains = 1; 282 } 283 else if (CollectionUtil.hasValues(getChains())) 284 { 285 for (Protein chain : getChains()) 286 { 287 numChains += chain.getNumChains(); 288 } 289 } 290 291 return numChains; 292 } 293 294 //-------------------------------------------------------------------------- 295 /** 296 Specifies the protein's name / identifier. 297 @param inValue the name / identifier for the protein 298 @return this Protein object to enable method chaining 299 */ 300 @Override 301 public Protein setID(String inValue) 302 { 303 String oldId = getID(); 304 305 super.setID(inValue); 306 307 propogateIdChange(oldId, inValue); 308 309 if (mParent != null) 310 { 311 Protein topProtein = getTopProtein(); 312 topProtein.mChainIdMap.remove(oldId); 313 String newId = topProtein.assignChainId(this); 314 315 if (getID() != null 316 && !getID().equals(newId)) 317 { 318 super.setID(newId); 319 propogateIdChange(oldId, newId); 320 } 321 } 322 323 324 return this; 325 } 326 327 //-------------------------------------------------------------------------- 328 /** 329 Specifies the protein's description. 330 @param inValue the description of the protein 331 @return this Protein object to enable method chaining 332 */ 333 @Override 334 public Protein setDescription(CharSequence inValue) 335 { 336 super.setDescription(inValue); 337 return this; 338 } 339 340 //-------------------------------------------------------------------------- 341 /** 342 Specifies the protein's sequence. A Protein can contain either a sequence or 343 other Protein objects as chains but not both. 344 @param inValue the sequence of the protein 345 @return this Protein object to enable method chaining 346 */ 347 @Override 348 public Protein setSequence(CharSequence inValue) 349 { 350 if (CollectionUtil.hasValues(mChains)) 351 { 352 throw new RuntimeException("A Protein cannot have both chains and a sequence!"); 353 } 354 355 return (Protein) super.setSequence(inValue); 356 } 357 358 359 //-------------------------------------------------------------------------- 360 @Override 361 public Protein setNCBITaxon(NCBITaxon inValue) 362 { 363 return (Protein) super.setNCBITaxon(inValue); 364 } 365 366 367 //-------------------------------------------------------------------------- 368 /** 369 Specifies the protein's sequence. A Protein can contain either a sequence or 370 other Protein objects as chains but not both. 371 @param inReader the sequence of the protein specified via a Reader 372 @return this Protein object to enable method chaining 373 */ 374 @Override 375 public Protein setSequence(Reader inReader) 376 { 377 if (CollectionUtil.hasValues(mChains)) 378 { 379 throw new RuntimeException("A Protein cannot have both chains and a sequence!"); 380 } 381 382 return (Protein) super.setSequence(inReader); 383 } 384 385 //-------------------------------------------------------------------------- 386 /** 387 Specifies the protein's chains. A Protein can contain either a sequence or 388 other Protein objects as chains but not both. 389 @param inChains Protein objects that are chains of this Protein object 390 */ 391 public void setChains(Collection<Protein> inChains) 392 { 393 if (super.length() > 0) 394 { 395 throw new RuntimeException("A Protein cannot have both chains and a sequence!"); 396 } 397 398 mChains = null; 399 400 if (CollectionUtil.hasValues(inChains)) 401 { 402 for (Protein chain : inChains) 403 { 404 addChain(chain); 405 } 406 } 407 } 408 409 //-------------------------------------------------------------------------- 410 /** 411 Adds a specified protein chain to this Protein object (with a stoichiometry of one). 412 A Protein can contain either a sequence or other Protein objects as chains but not both. 413 @param inChain Protein object that is a chain of this Protein object 414 */ 415 public void addChain(Protein inChain) 416 { 417 if (super.length() > 0) 418 { 419 throw new RuntimeException("A Protein cannot have both chains and a sequence!"); 420 } 421 422 if (inChain != null) 423 { 424 if (null == mChains) mChains = new ArrayList<>(5); 425 426 if (inChain.mParent != null) 427 { 428 inChain = inChain.clone(); 429 } 430 431 mChains.add(inChain); 432 inChain.mParent = this; 433 434 clearElementalCompositionAndCalculatedProperties(); 435 436 inChain.checkId(); 437 438 // If we're already using this chain, clone it before adding. 439 if (CollectionUtil.hasValues(inChain.getChains())) 440 { 441 List<Protein> newList = new ArrayList<>(inChain.getChains().size()); 442 boolean duplicatesDetected = false; 443 for (Protein chain : inChain.getChains()) 444 { 445 if (getTopProtein().mChainIdMap.values().contains(chain)) 446 { 447 chain = chain.clone(); 448 newList.add(chain); 449 duplicatesDetected = true; 450 } 451 else 452 { 453 newList.add(chain); 454 } 455 456 chain.checkId(); 457 } 458 459 if (duplicatesDetected) inChain.setChains(newList); 460 } 461 } 462 } 463 464 //-------------------------------------------------------------------------- 465 /** 466 Adds specified protein chains to this Protein object (with a stoichiometry of one). 467 A Protein can contain either a sequence or other Protein objects as chains but not both. 468 @param inChains Protein objects that are chains of this Protein object 469 */ 470 public void addChains(Collection<Protein> inChains) 471 { 472 // Not the most efficient way to do it, but it keeps things simple. 473 if (CollectionUtil.hasValues(inChains)) 474 { 475 for (Protein chain : inChains) 476 { 477 addChain(chain); 478 } 479 } 480 } 481 482 //-------------------------------------------------------------------------- 483 /** 484 Adds a specified number of copies of a specified protein chain to this Protein object. 485 (Ex. 2 heavy or light chains in an antibody.) 486 A Protein can contain either a sequence or other Protein objects as chains but not both. 487 @param inChain Protein object that is a chain of this Protein object 488 @param inNumCopies the number of copies of the specified chain that should be added 489 */ 490 public void addChains(Protein inChain, int inNumCopies) 491 { 492 String baseId = null; 493 for (int i = 0; i < inNumCopies; i++) 494 { 495 Protein chain = inChain.clone(); 496 if (baseId != null) 497 { 498 chain.setID(baseId + "_" + (i + 1)); 499 } 500 501 addChain(chain); 502 503 if (null == baseId) 504 { 505 baseId = chain.getID(); 506 chain.setID(baseId + "_" + (i + 1)); 507 } 508 } 509 } 510 511 //-------------------------------------------------------------------------- 512 public boolean hasChains() 513 { 514 return CollectionUtil.hasValues(mChains); 515 } 516 517 //-------------------------------------------------------------------------- 518 public Collection<Protein> getChains() 519 { 520 return mChains; 521 } 522 523 //-------------------------------------------------------------------------- 524 /** 525 Returns the chain with the specified id. 526 @param inChainId the id of the chain to return 527 @return the requested chain 528 */ 529 public Protein getChain(String inChainId) 530 { 531 return mChainIdMap.get(inChainId); 532 } 533 534 //-------------------------------------------------------------------------- 535 public Collection<Protein> getDistinctChains() 536 { 537 Set<Protein> distinctChains = null; 538 539 if (CollectionUtil.hasValues(mChains)) 540 { 541 distinctChains = new HashSet<>(3); 542 for (Protein chain : mChains) 543 { 544 if (chain.hasChains()) 545 { 546 distinctChains.addAll(chain.getDistinctChains()); 547 } 548 else 549 { 550 distinctChains.add(chain); 551 } 552 } 553 } 554 555 return distinctChains; 556 } 557 558 //-------------------------------------------------------------------------- 559 /** 560 Returns chains of this Protein object organized into groups that are identical sequences. 561 @return the protein's chains grouped by sequence 562 */ 563 public Collection<Collection<Protein>> getChainStoichiometryGroups() 564 { 565 Collection<Collection<Protein>> stoichiometryGroups = new ArrayList<>(); 566 567 if (CollectionUtil.hasValues(getChains())) 568 { 569 for (Protein chain : getChains()) 570 { 571 boolean added = false; 572 for (Collection<Protein> existingGroup : stoichiometryGroups) 573 { 574 Protein comparisonChain = existingGroup.iterator().next(); 575 if (chain.length() == comparisonChain.length() 576 && new String(chain.getMD5Checksum()).equals(new String(comparisonChain.getMD5Checksum())) 577 && new String(chain.getSHA1Checksum()).equals(new String(comparisonChain.getSHA1Checksum()))) 578 { 579 existingGroup.add(chain); 580 added = true; 581 break; 582 } 583 } 584 585 if (! added) 586 { 587 Collection<Protein> newGroup = new ArrayList<>(5); 588 newGroup.add(chain); 589 stoichiometryGroups.add(newGroup); 590 } 591 } 592 } 593 594 return stoichiometryGroups; 595 } 596 597 //-------------------------------------------------------------------------- 598 public Set<String> getChainIds() 599 { 600 return (mChainIdMap != null ? mChainIdMap.keySet() : null); 601 } 602 603 //-------------------------------------------------------------------------- 604 /** 605 If this Protein object contains a sequence, the length of that sequence is returned. 606 If this Protein object contains other Protein chains, the sum of their lengths is returned. 607 @return the total sequence length 608 */ 609 @Override 610 public int length() 611 { 612 int length = 0; 613 if (CollectionUtil.hasValues(mChains)) 614 { 615 for (Protein chain : mChains) 616 { 617 length += chain.length(); 618 } 619 } 620 else 621 { 622 length = super.length(); 623 } 624 625 return length; 626 } 627 628 //-------------------------------------------------------------------------- 629 // Setup this way to avoid a stackoverflow if clearElementalComposition() is called within clearCalculatedProperties(). 630 @Override 631 public void clearElementalCompositionAndCalculatedProperties() 632 { 633 super.clearElementalComposition(); 634 635 if (mParent != null) 636 { 637 mParent.clearElementalCompositionAndCalculatedProperties(); 638 } 639 } 640 641 //-------------------------------------------------------------------------- 642 public void setAminoAcidSet(AminoAcidSet inValue) 643 { 644 mAASet = inValue; 645 if (CollectionUtil.hasValues(mChains)) 646 { 647 for (Protein chain : mChains) 648 { 649 chain.setAminoAcidSet(inValue); 650 } 651 } 652 653 clearElementalCompositionAndCalculatedProperties(); 654 } 655 656 //-------------------------------------------------------------------------- 657 public AminoAcidSet getAminoAcidSet() 658 { 659 if (null == mAASet) 660 { 661 if (getParent() != null) 662 { 663 Protein topProtein = getTopProtein(); 664 if (topProtein != null) 665 { 666 mAASet = topProtein.getAminoAcidSet(); 667 } 668 } 669 670 if (null == mAASet) 671 { 672 mAASet = AminoAcidSet.STANDARD; 673 } 674 } 675 676 return mAASet; 677 } 678 679 //-------------------------------------------------------------------------- 680 /** 681 Protein objects can be recursively composed of other Protein objects that represent 682 chains or subunits and this method returns the Protein object that contains this Protein object 683 or null if this Protein object is the top object. 684 @return the Protein object that contains this Protein object as a chain / subunit 685 or null if this Protein object is the top object. 686 */ 687 public Protein getParent() 688 { 689 return mParent; 690 } 691 692 //-------------------------------------------------------------------------- 693 // Need to override in order to work with chains. 694 @Override 695 protected void countGaps() 696 { 697 int count = 0; 698 int totalGapLength = 0; 699 700 if (CollectionUtil.hasValues(mChains)) 701 { 702 for (Protein chain : mChains) 703 { 704 count += chain.getNumGaps(); 705 totalGapLength += chain.getTotalGapLength(); 706 } 707 } 708 else 709 { 710 Matcher m = GAP_PATTERN.matcher(getSequence()); 711 while (m.find()) 712 { 713 count++; 714 totalGapLength += m.group(0).length(); 715 } 716 } 717 718 setNumGaps(count); 719 setTotalGapLength(totalGapLength); 720 } 721 722 //-------------------------------------------------------------------------- 723 /** 724 Returns a Map containing the amino acid composition of the protein including any subchains. 725 @return a Map with AminoAcids as keys and Integers as the values 726 */ 727 public AminoAcidComposition getAminoAcidComposition() 728 { 729 if (null == mAAComposition) 730 { 731 AminoAcidComposition aaComposition = new AminoAcidComposition(); 732 733 if (CollectionUtil.hasValues(mChains)) 734 { 735 for (Protein chain : mChains) 736 { 737 aaComposition.addAll(chain.getAminoAcidComposition()); 738 } 739 } 740 else 741 { 742 // AA Composition 743 Map<String, Integer> composition = getComposition(); 744 for (String aaString : composition.keySet()) 745 { 746 if (! aaString.equals("-")) // Ignore gaps 747 { 748 AminoAcid aa = getAminoAcidSet().getAA(aaString.charAt(0)); 749 if (null == aa) 750 { 751 aa = AminoAcid.UNDEFINED; 752 } 753 754 Integer value = composition.get(aaString); 755 756 aaComposition.increment(aa, value); 757 } 758 } 759 } 760 761 mAAComposition = aaComposition; 762 } 763 764 return mAAComposition; 765 } 766 767 //-------------------------------------------------------------------------- 768 /** 769 Returns an unmodifiable copy of the elemental composition Map adjusted (if necessary) 770 for the specified protein analysis mode. The keys are 771 Element objects and the values are Floats. Why Floats instead of Integers you 772 ask? Because some amino acid codes such as B and Z are ambiguous averages. 773 @return the elemental composition map 774 */ 775 public Map<Element, Float> getElementalComposition(ProteinAnalysisMode inMode) 776 { 777 Map<Element, Float> elementalCompositionMap = super.getElementalComposition(); 778 779 if (inMode instanceof ReducedAnalysisMode) 780 { 781 // Create a reduced version 782 Molecule reducedMol = new Molecule(elementalCompositionMap); 783 784 // Was an alkylated cysteine form provided? 785 AminoAcid alkylatedCys = ((ReducedAnalysisMode)inMode).getAlkylatedCysteine(); 786 if (alkylatedCys != null) 787 { 788 AminoAcid currentCysForm = getAminoAcidSet().getAA('C'); 789 // Adjust the elemental composition 790 reducedMol.remove(currentCysForm, getAminoAcidComposition().get(currentCysForm)); 791 reducedMol.add(alkylatedCys, getAminoAcidComposition().get(currentCysForm)); 792 } 793 794 // Reduce any disulfide crosslinks. S-S goes to SH HS which adds 2 hydrogens 795 if (CollectionUtil.hasValues(mXLinks)) 796 { 797 for (ProteinXLink xlink : mXLinks) 798 { 799 if (xlink.getType() == ProteinXLinkType.DISULFIDE) 800 { 801 reducedMol.add(Element.HYDROGEN, 2); 802 } 803 } 804 } 805 else if (mNumDisulfideBonds != null) 806 { 807 reducedMol.add(Element.HYDROGEN, 2 * mNumDisulfideBonds); 808 } 809 810 elementalCompositionMap = reducedMol.getElementalComposition(); 811 } 812 813 return elementalCompositionMap; 814 } 815 816 //-------------------------------------------------------------------------- 817 @Override 818 public Double getMonoisotopicMass() 819 { 820 return getMonoisotopicMass(ProteinAnalysisMode.NATIVE); 821 } 822 823 //-------------------------------------------------------------------------- 824 public Double getMonoisotopicMass(ProteinAnalysisMode inMode) 825 { 826 Double mass = 0.0; 827 if (inMode.equals(ProteinAnalysisMode.NATIVE)) 828 { 829 mass = super.getMonoisotopicMass(); 830 } 831 else if (length() > 0) 832 { 833 Molecule organicMatter = new Molecule(getElementalComposition(inMode)); 834 mass = organicMatter.getMonoisotopicMass(); 835 } 836 837 return mass; 838 } 839 840 //-------------------------------------------------------------------------- 841 @Override 842 public Double getAverageMass() 843 { 844 return getAverageMass(ProteinAnalysisMode.NATIVE); 845 } 846 847 //-------------------------------------------------------------------------- 848 public Double getAverageMass(ProteinAnalysisMode inMode) 849 { 850 Double mass = 0.0; 851 if (inMode.equals(ProteinAnalysisMode.NATIVE)) 852 { 853 mass = super.getAverageMass(); 854 } 855 else if (length() > 0) 856 { 857 Molecule organicMatter = new Molecule(getElementalComposition(inMode)); 858 mass = organicMatter.getAverageMass(); 859 } 860 861 return mass; 862 } 863 864 //-------------------------------------------------------------------------- 865 @Override 866 public Double getOrganicAverageMass() 867 { 868 return getOrganicAverageMass(ProteinAnalysisMode.NATIVE); 869 } 870 871 //-------------------------------------------------------------------------- 872 public Double getOrganicAverageMass(ProteinAnalysisMode inMode) 873 { 874 Double mass = 0.0; 875 if (inMode.equals(ProteinAnalysisMode.NATIVE)) 876 { 877 mass = super.getOrganicAverageMass(); 878 } 879 else if (length() > 0) 880 { 881 Molecule organicMatter = new Molecule(getElementalComposition(inMode)); 882 mass = organicMatter.getOrganicAverageMass(); 883 } 884 885 return mass; 886 } 887 888 //-------------------------------------------------------------------------- 889 /** 890 Determines the isoelectric point (the pH at which the net charge is zero) for the protein. 891 Uses KaSet.BJELLQVIST by default. 892 @return the isoelectric point for the protein 893 */ 894 public Float getIsoelectricPoint() 895 { 896 return getIsoelectricPoint(KaSet.BJELLQVIST); 897 } 898 899 //-------------------------------------------------------------------------- 900 /** 901 Determines the isoelectric point (the pH at which the net charge is zero) 902 for the protein assuming reducing conditions. 903 @param inKaSet the specific set of pKa values to use in calculating the isoelectric point 904 @return the isoelectric point for the protein 905 */ 906 public Float getIsoelectricPoint(KaSet inKaSet) 907 { 908 return getIsoelectricPoint(inKaSet, inKaSet.getDefaultProteinAnalysisMode()); 909 } 910 911 //-------------------------------------------------------------------------- 912 /** 913 Determines the isoelectric point (the pH at which the net charge is zero) for the protein. 914 @param inKaSet the specific set of pKa values to use in calculating the isoelectric point 915 @param inMode the anlysis mode conditions to apply to the calculation 916 @return the isoelectric point for the protein 917 */ 918 public Float getIsoelectricPoint(KaSet inKaSet, ProteinAnalysisMode inMode) 919 { 920 return IsoelectricPoint.valueOf(inKaSet).calculate(this, new SimpleProteinPropertyCalcSettings().setProteinAnalysisMode(inMode)); 921 } 922 923 //-------------------------------------------------------------------------- 924 /** 925 Estimates the protein's net charge at the specified pH assuming reducing conditions. 926 @param pH the specific pH value at which to calculate the protein's net charge 927 @param inKaSet the specific set of pKa values to use in calculating the isoelectric point 928 @return the net charge of the protein at the specified pH 929 */ 930 public Double getNetCharge(double pH, KaSet inKaSet) 931 { 932 return getNetCharge(pH, inKaSet, inKaSet.getDefaultProteinAnalysisMode()); 933 } 934 935 //-------------------------------------------------------------------------- 936 /** 937 Estimates the protein's net charge at the specified pH. 938 @param pH the specific pH value at which to calculate the protein's net charge 939 @param inKaSet the specific set of pKa values to use in calculating the isoelectric point 940 @param inMode the anlysis mode conditions to apply to the calculation 941 @return the net charge of the protein at the specified pH 942 */ 943 public Double getNetCharge(double pH, KaSet inKaSet, ProteinAnalysisMode inMode) 944 { 945 return getNetCharge(pH, constructIonizableGroupMap(inKaSet, inMode)); 946 } 947 948 //-------------------------------------------------------------------------- 949 /** 950 Returns the total number of specified disulfide bonds or null if the number 951 of disulfides has not been set at any chain level. 952 @return the total number of specified disulfide bonds 953 */ 954 public Integer getTotalNumDisulfideBonds() 955 { 956 // Possibilities: - set for 'parent' protein, null in individual chains 957 // - set for 'parent' protein and individual chains 958 // - null for 'parent' protein and individual chains 959 // - wouldn't really make sense for it to be null for the 'parent' protein and set for individual chains 960 961 int count = 0; 962 boolean allNull = true; 963 964 if (CollectionUtil.hasValues(mChains)) 965 { 966 for (Protein chain : mChains) 967 { 968 Integer chainCount = chain.getTotalNumDisulfideBonds(); 969 if (chainCount != null) 970 { 971 count += chainCount; 972 allNull = false; 973 } 974 } 975 } 976 977 if (CollectionUtil.hasValues(mXLinks)) 978 { 979 for (ProteinXLink xlink : mXLinks) 980 { 981 if (xlink.getType() == ProteinXLinkType.DISULFIDE) 982 { 983 count++; 984 allNull = false; 985 } 986 } 987 } 988 else if (mNumDisulfideBonds != null) 989 { 990 count += mNumDisulfideBonds; 991 allNull = false; 992 } 993 994 return (allNull ? null : count); 995 } 996 997 //-------------------------------------------------------------------------- 998 public Protein setNumDisulfideBonds(int inValue) 999 { 1000 // Possibilities: - set for 'parent' protein, null in individual chains 1001 // - set for 'parent' protein and individual chains 1002 // - null for 'parent' protein and individual chains 1003 // - wouldn't really make sense for it to be null for the 'parent' protein and set for individual chains 1004 1005 // TODO: If it has already been set for chains below this protein what should I do? 1006 1007 int cysCount = getAminoAcidComposition().get(AminoAcid.CYSTEINE); 1008 if (inValue > cysCount / 2) 1009 { 1010 throw new RuntimeException("There are not enough cysteines for " + inValue + " disulfide bonds!"); 1011 } 1012 1013 mNumDisulfideBonds = inValue; 1014 1015 clearElementalCompositionAndCalculatedProperties(); 1016 1017 return this; 1018 } 1019 1020 //-------------------------------------------------------------------------- 1021 /** 1022 Returns the total number of free cysteines (or the total number of cysteines if the number 1023 of disulfides has not been set at any chain level. 1024 @return the total number of free cysteines 1025 */ 1026 public int getTotalNumFreeCysteines() 1027 { 1028 int numCys = getAminoAcidComposition().get(AminoAcid.CYSTEINE); 1029 Integer numDisulfides = getTotalNumDisulfideBonds(); 1030 if (numDisulfides != null) numCys -= (numDisulfides * 2); 1031 1032 return numCys; 1033 } 1034 1035 //-------------------------------------------------------------------------- 1036 /** 1037 Returns the estimated molar extinction coefficient at A<sub>280</sub>. If the number of 1038 disulfide bonds has not been specified, it assumes that all cysteines are disulfide-linked. 1039 <p> 1040 This method utilizes the coefficients derived by <i>Pace et al. (1995) Protein Science 4:2411-2423.</i> 1041 </p> 1042 @return the estimated molar extinction coefficient for the protein 1043 */ 1044 public int getExtinctionCoeff() 1045 { 1046 if (null == mExtinctionCoeff) 1047 { 1048 mExtinctionCoeff = ExtinctionCoeff.PROPERTY.calculate(this); 1049 } 1050 1051 return mExtinctionCoeff.intValue(); 1052 } 1053 1054 //-------------------------------------------------------------------------- 1055 /** 1056 Returns the estimated mass attenuation coefficient (ml mg<sup>-1</sup> cm<sup>-1</sup>) at A<sub>280</sub>. If the number of 1057 disulfide bonds has not been specified, it assumes that all cysteines are disulfide-linked. 1058 <p> 1059 This method utilizes the coefficients derived by <i>Pace et al. (1995) Protein Science 4:2411-2423.</i> 1060 </p> 1061 @return the estimated percent molar extinction coefficient for the protein 1062 */ 1063 public float getPercentExtinctionCoeff() 1064 { 1065 if (null == mPercentExtinctionCoeff) 1066 { 1067 mPercentExtinctionCoeff = PctExtinctionCoeff.PROPERTY.calculate(this); 1068 } 1069 1070 return mPercentExtinctionCoeff; 1071 } 1072 1073 1074 //-------------------------------------------------------------------------- 1075 /** 1076 Returns the concentration (mM) of the protein solution by using the Beer Lambert Law. 1077 <pre> 1078 Abs = PCE 1079 1080 Where: Abs = Absorbance at a specific wavelength 1081 P = path length of the cell (assumed to be 1 cm) 1082 C = concentration in moles / liter 1083 E = Molar extinction coeff at a specific wavelength 1084 1085 </pre> 1086 @param inAbsorbance the observed absorbance at 280nm 1087 @return the estimated protein concentration in mM 1088 */ 1089 public float getMillimolarConcFromAbsorbance280(float inAbsorbance) 1090 { 1091 // Add a tiny amount to avoid divide by zero errors 1092 return (float) (1000 * inAbsorbance / (getExtinctionCoeff() + 0.0000001)); 1093 } 1094 1095 1096 //-------------------------------------------------------------------------- 1097 public AminoAcid aminoAcidAt(int inPosition) 1098 { 1099 return getAminoAcidSet().getAA(residueAt(inPosition)); 1100 } 1101 1102 //-------------------------------------------------------------------------- 1103 /** 1104 Convenience method for setting the N-terminal group as pyro-glu based on 1105 whether the N-terminal residue is Glu or Gln. 1106 */ 1107 public void createNTerminalPyroGlu() 1108 { 1109 if (length() > 0) 1110 { 1111 AminoAcid nTerminalResidue = aminoAcidAt(1); 1112 NTerminalGroup nTerminalGroup; 1113 if (nTerminalResidue.equals(AminoAcid.GLUTAMIC_ACID)) 1114 { 1115 nTerminalGroup = NTerminalGroup.PYRO_GLU_N_TERM_GLU; 1116 } 1117 else if (nTerminalResidue.equals(AminoAcid.GLUTAMINE)) 1118 { 1119 nTerminalGroup = NTerminalGroup.PYRO_GLU_N_TERM_GLN; 1120 } 1121 else 1122 { 1123 throw new RuntimeException("The N-Terminal residue must be Glu or Gln in order to form pyro-glutamic acid!"); 1124 } 1125 1126 if (getAminoAcidSet().isLocked()) 1127 { 1128 setAminoAcidSet(getAminoAcidSet().clone()); 1129 } 1130 1131 getAminoAcidSet().setNTerminalGroup(nTerminalGroup); 1132 } 1133 } 1134 1135 //-------------------------------------------------------------------------- 1136 /** 1137 This method converts the asparagine residue of each putative N-link site 1138 into aspartic acid, mimicing enzymatic treatment with PNGase F to remove N-linked carbohydrates. 1139 This method is not reversible and assumes that all putative N-link sites have attached carbohydrate structures. 1140 */ 1141 public void treatWithPNGaseF() 1142 { 1143 List<SeqLocation> nLinkSites = findNLinkedSites(); 1144 if (CollectionUtil.hasValues(nLinkSites)) 1145 { 1146 for (SeqLocation seqLocation : nLinkSites) 1147 { 1148 Protein chain = (seqLocation.getChainId() != null && ! seqLocation.getChainId().equals(getID()) ? getChain(seqLocation.getChainId()) : this); 1149 if (! chain.aminoAcidAt(seqLocation.getStart()).equals(AminoAcid.ASPARAGINE)) 1150 { 1151 throw new RuntimeException("The residue at position " + seqLocation.getStart() + " isn't an asparagine as expected!"); 1152 } 1153 1154 chain.setResidueAt(seqLocation.getStart(), AminoAcid.ASPARTIC_ACID.getOneLetterCode()); 1155 } 1156 } 1157 } 1158 1159 //-------------------------------------------------------------------------- 1160 public List<SeqLocation> findNLinkedSites() 1161 { 1162 List<SeqLocation> sites = new ArrayList<>(); 1163 1164 if (CollectionUtil.hasValues(mChains)) 1165 { 1166 for (Protein chain : mChains) 1167 { 1168 sites.addAll(chain.findNLinkedSites()); 1169 } 1170 } 1171 else 1172 { 1173 Pattern pattern = Pattern.compile("N[^P][ST]([^P]|$)", Pattern.CASE_INSENSITIVE); 1174 Matcher m = pattern.matcher(this.getSequence()); 1175 int start = 0; 1176 while (m.find(start)) 1177 { 1178 sites.add(new SeqLocation(getID(), m.start() + 1, m.start() + 3)); 1179 start = m.start() + 1; 1180 } 1181 } 1182 1183 return sites; 1184 } 1185 1186 //-------------------------------------------------------------------------- 1187 public XMLNode toXMLNode() 1188 { 1189 XMLNode node = super.toXMLNode(); 1190 //node.setTagName(HfgBioXML.PROTEIN_TAG); 1191 if (mNumDisulfideBonds != null) 1192 { 1193 node.setAttribute(HfgBioXML.DISULFIDE_CNT_ATT, mNumDisulfideBonds); 1194 } 1195 1196 if (null == mParent 1197 || getAminoAcidSet() != getTopProtein().getAminoAcidSet()) 1198 { 1199 node.addSubtag(getAminoAcidSet().toXMLNode()); 1200 } 1201 1202 if (CollectionUtil.hasValues(mChains)) 1203 { 1204 XMLNode chainsTag = new XMLTag(HfgBioXML.CHAINS_TAG); 1205 node.addSubtag(chainsTag); 1206 for (Protein chain : mChains) 1207 { 1208 chainsTag.addSubtag(chain.toXMLNode()); 1209 } 1210 } 1211 1212 if (CollectionUtil.hasValues(mXLinks)) 1213 { 1214 XMLNode xlinksTag = new XMLTag(HfgBioXML.XLINKS_TAG); 1215 node.addSubtag(xlinksTag); 1216 for (ProteinXLink xlink : mXLinks) 1217 { 1218 xlinksTag.addSubtag(xlink.toXMLNode()); 1219 } 1220 } 1221 1222 return node; 1223 } 1224 1225 // TODO: Site-specific glycan attachment? 1226 1227 //-------------------------------------------------------------------------- 1228 public Protein addGlycans(Glycan inValue, int inCount) 1229 { 1230 if (inValue != null 1231 && inCount > 0) 1232 { 1233 for (int i = 0; i < inCount; i++) 1234 { 1235 addGlycan(inValue); 1236 } 1237 } 1238 1239 return this; 1240 } 1241 1242 //-------------------------------------------------------------------------- 1243 public Protein addGlycan(Glycan inValue) 1244 { 1245 if (inValue != null) 1246 { 1247 if (null == mGlycans) mGlycans = new ArrayList<>(5); 1248 1249 mGlycans.add(inValue); 1250 clearElementalCompositionAndCalculatedProperties(); 1251 } 1252 1253 return this; 1254 } 1255 1256 //-------------------------------------------------------------------------- 1257 public List<Glycan> getGlycans() 1258 { 1259 List<Glycan> glycans = null; 1260 if (CollectionUtil.hasValues(mGlycans)) 1261 { 1262 glycans = new ArrayList<>(mGlycans); 1263 } 1264 1265 if (CollectionUtil.hasValues(mChains)) 1266 { 1267 for (Protein chain : mChains) 1268 { 1269 List<Glycan> chainGlycans = chain.getGlycans(); 1270 if (CollectionUtil.hasValues(chainGlycans)) 1271 { 1272 if (null == glycans) 1273 { 1274 glycans = new ArrayList<>(chainGlycans); 1275 } 1276 else 1277 { 1278 glycans.addAll(chainGlycans); 1279 } 1280 } 1281 } 1282 } 1283 1284 return glycans; 1285 } 1286 1287 //-------------------------------------------------------------------------- 1288 public void addXLink(ProteinXLink inXLink) 1289 { 1290 if (inXLink != null) 1291 { 1292 if (null == mXLinks) mXLinks = new HashSet<>(); 1293 1294 inXLink.setParentProtein(this); 1295 mXLinks.add(inXLink); 1296 clearElementalCompositionAndCalculatedProperties(); 1297 } 1298 } 1299 1300 //-------------------------------------------------------------------------- 1301 public Set<ProteinXLink> getXLinks() 1302 { 1303 Set<ProteinXLink> xLinks = new HashSet<>(10); 1304 if (CollectionUtil.hasValues(mXLinks)) 1305 { 1306 xLinks.addAll(mXLinks); 1307 } 1308 1309 if (CollectionUtil.hasValues(mChains)) 1310 { 1311 for (Protein chain : mChains) 1312 { 1313 chain.getXLinks(xLinks); 1314 } 1315 } 1316 1317 return xLinks; 1318 } 1319 1320 //-------------------------------------------------------------------------- 1321 public void removeXLink(ProteinXLink inXLink) 1322 { 1323 if (CollectionUtil.hasValues(mXLinks)) 1324 { 1325 for (ProteinXLink xlink : mXLinks) 1326 { 1327 if (xlink.equals(inXLink)) 1328 { 1329 mXLinks.remove(xlink); 1330 break; 1331 } 1332 } 1333 } 1334 1335 if (CollectionUtil.hasValues(mChains)) 1336 { 1337 for (Protein chain : mChains) 1338 { 1339 chain.removeXLink(inXLink); 1340 } 1341 } 1342 1343 } 1344 1345 //-------------------------------------------------------------------------- 1346 public Set<ProteinXLink> removeXLinks() 1347 { 1348 return removeXLinks(null); 1349 } 1350 1351 //-------------------------------------------------------------------------- 1352 public Set<ProteinXLink> removeXLinks(ProteinXLinkType inXLinkType) 1353 { 1354 Set<ProteinXLink> removedXLinks = new HashSet<>(10); 1355 if (CollectionUtil.hasValues(mXLinks)) 1356 { 1357 for (ProteinXLink xlink : mXLinks) 1358 { 1359 if (null == inXLinkType 1360 || xlink.getType().equals(inXLinkType)) 1361 { 1362 removedXLinks.add(xlink); 1363 } 1364 } 1365 1366 for (ProteinXLink xlink : removedXLinks) 1367 { 1368 mXLinks.remove(xlink); 1369 } 1370 } 1371 1372 if (CollectionUtil.hasValues(mChains)) 1373 { 1374 for (Protein chain : mChains) 1375 { 1376 removedXLinks.addAll(chain.removeXLinks(inXLinkType)); 1377 } 1378 } 1379 1380 return removedXLinks; 1381 } 1382 1383 //-------------------------------------------------------------------------- 1384 @Override 1385 public void clearCalculatedProperties() 1386 { 1387 super.clearCalculatedProperties(); 1388 mAAComposition = null; 1389 mIsoelectricPoint = null; 1390 mIsoelectricPointKaSet = null; 1391 mExtinctionCoeff = null; 1392 mPercentExtinctionCoeff = null; 1393 } 1394 1395 //########################################################################## 1396 // PROTECTED METHODS 1397 //########################################################################## 1398 1399 1400 //-------------------------------------------------------------------------- 1401 protected void getXLinks(Set<ProteinXLink> inXLinkList) 1402 { 1403 if (CollectionUtil.hasValues(mXLinks)) 1404 { 1405 inXLinkList.addAll(mXLinks); 1406 } 1407 } 1408 1409 1410 //-------------------------------------------------------------------------- 1411 /** 1412 Returns a map with AminoAcids as keys and Integers as the values. 1413 */ 1414 @Override 1415 protected Map<String, Integer> getComposition() 1416 { 1417 Map<String, Integer> map; 1418 1419 if (CollectionUtil.hasValues(mChains)) 1420 { 1421 map = new HashMap<>(20); 1422 for (Protein chain : mChains) 1423 { 1424 Map<String, Integer> chainMap = chain.getComposition(); 1425 for (String key : chainMap.keySet()) 1426 { 1427 Integer oldValue = map.get(key); 1428 map.put(key, (oldValue != null ? oldValue : 0) + chainMap.get(key)); 1429 } 1430 } 1431 } 1432 else 1433 { 1434 map = super.getComposition(); 1435 } 1436 1437 return map; 1438 } 1439 1440 //-------------------------------------------------------------------------- 1441 @Override 1442 protected Map<Molecule, Integer> getResidueComposition() 1443 { 1444 Map<Molecule, Integer> residueComposition = new HashMap<>(25); 1445 1446 AminoAcidComposition aaComposition = getAminoAcidComposition(); 1447 for (AminoAcid aa : aaComposition.keySet()) 1448 { 1449 residueComposition.put(aa, aaComposition.get(aa)); 1450 } 1451 1452 return residueComposition; 1453 } 1454 1455 //-------------------------------------------------------------------------- 1456 @Override 1457 protected Map<Molecule, Integer> getTerminiComposition() 1458 { 1459 Map<Molecule, Integer> terminiComposition = new HashMap<>(5); 1460 1461 if (CollectionUtil.hasValues(mChains)) 1462 { 1463 for (Protein chain : mChains) 1464 { 1465 Molecule nTerminus = chain.getAminoAcidSet().getNTerminalGroup(); 1466 Integer oldCount = terminiComposition.get(nTerminus); 1467 int newCount = 1 + (oldCount != null ? oldCount : 0); 1468 terminiComposition.put(nTerminus, newCount); 1469 1470 Molecule cTerminus = chain.getAminoAcidSet().getCTerminalGroup(); 1471 oldCount = terminiComposition.get(cTerminus); 1472 newCount = 1 + (oldCount != null ? oldCount : 0); 1473 terminiComposition.put(cTerminus, newCount); 1474 } 1475 } 1476 else 1477 { 1478 terminiComposition.put(getAminoAcidSet().getNTerminalGroup(), 1); 1479 terminiComposition.put(getAminoAcidSet().getCTerminalGroup(), 1); 1480 } 1481 1482 return terminiComposition; 1483 } 1484 1485 //-------------------------------------------------------------------------- 1486 @Override 1487 protected Map<ProteinXLinkType, Integer> getXLinkComposition() 1488 { 1489 Map<ProteinXLinkType, Integer> xLinkComposition = new HashMap<>(5); 1490 1491 if (CollectionUtil.hasValues(mChains)) 1492 { 1493 for (Protein chain : mChains) 1494 { 1495 Map<ProteinXLinkType, Integer> chainXLinkComposition = chain.getXLinkComposition(); 1496 if (CollectionUtil.hasValues(chainXLinkComposition)) 1497 { 1498 for (ProteinXLinkType xlinkType : chainXLinkComposition.keySet()) 1499 { 1500 Integer oldCount = xLinkComposition.get(xlinkType); 1501 int newCount = chainXLinkComposition.get(xlinkType) + (oldCount != null ? oldCount : 0); 1502 xLinkComposition.put(xlinkType, newCount); 1503 } 1504 } 1505 } 1506 } 1507 1508 if (CollectionUtil.hasValues(mXLinks)) 1509 { 1510 for (ProteinXLink xlink : mXLinks) 1511 { 1512 Integer oldCount = xLinkComposition.get(xlink.getType()); 1513 int newCount = 1 + (oldCount != null ? oldCount : 0); 1514 xLinkComposition.put(xlink.getType(), newCount); 1515 } 1516 } 1517 else 1518 { 1519 Integer disulfideCount = getTotalNumDisulfideBonds(); 1520 if (disulfideCount != null) 1521 { 1522 Integer oldCount = xLinkComposition.get(ProteinXLinkType.DISULFIDE); 1523 int newCount = disulfideCount + (oldCount != null ? oldCount : 0); 1524 xLinkComposition.put(ProteinXLinkType.DISULFIDE, newCount); 1525 } 1526 } 1527 1528 return xLinkComposition; 1529 } 1530 1531 //-------------------------------------------------------------------------- 1532 @Override 1533 protected void recalculateElementalComposition() 1534 { 1535 super.recalculateElementalComposition(); 1536 1537 List<Glycan> glycans = getGlycans(); 1538 if (CollectionUtil.hasValues(glycans)) 1539 { 1540 for (Glycan glycan : glycans) 1541 { 1542 addElementalComposition(glycan.getElementalComposition()); 1543 remove(Molecule.H2O); // Subtract a water lost in the bonding 1544 } 1545 } 1546 } 1547 1548 //########################################################################## 1549 // PRIVATE METHODS 1550 //########################################################################## 1551 1552 //-------------------------------------------------------------------------- 1553 private Protein getTopProtein() 1554 { 1555 return mParent != null ? mParent.getTopProtein() : this; 1556 } 1557 1558 //-------------------------------------------------------------------------- 1559 private void checkId() 1560 { 1561 if (mParent != null) 1562 { 1563 Protein topProtein = getTopProtein(); 1564 1565// if (CollectionUtil.hasValues(mChains)) 1566// { 1567// for (Protein chain : mChains) 1568// { 1569// chain.checkId(); 1570// } 1571// } 1572// else 1573// { 1574 String newId = topProtein.assignChainId(this); 1575 1576 if (null == getID() 1577 || ! getID().equals(newId)) 1578 { 1579 String oldId = getID(); 1580 super.setID(newId); 1581 propogateIdChange(oldId, newId); 1582 } 1583// } 1584 } 1585 } 1586 1587 //-------------------------------------------------------------------------- 1588 private void propogateIdChange(String inOldId, String inNewId) 1589 { 1590 if (mXLinks != null) 1591 { 1592 for (ProteinXLink xlink : mXLinks) 1593 { 1594 if (xlink.getDonorChainId() != null 1595 && xlink.getDonorChainId().equals(inOldId)) 1596 { 1597 xlink.setDonorChainId(inNewId); 1598 } 1599 1600 if (xlink.getAcceptorChainId() != null 1601 && xlink.getAcceptorChainId().equals(inOldId)) 1602 { 1603 xlink.setAcceptorChainId(inNewId); 1604 } 1605 } 1606 } 1607 1608 Protein parent = mParent; 1609 Protein topParent = getTopProtein(); 1610 while (parent != null 1611 && parent != topParent) 1612 { 1613 if (parent.mChainIdMap != null) 1614 { 1615 for (String id : parent.mChainIdMap.keySet()) 1616 { 1617 if (parent.mChainIdMap.get(id).equals(this)) 1618 { 1619 parent.mChainIdMap.remove(id); 1620 parent.mChainIdMap.put(getID(), this); 1621 break; 1622 } 1623 } 1624 } 1625 1626 if (parent.mXLinks != null) 1627 { 1628 for (ProteinXLink xlink : parent.mXLinks) 1629 { 1630 if (xlink.getDonorChainId() != null 1631 && xlink.getDonorChainId().equals(inOldId)) 1632 { 1633 xlink.setDonorChainId(inNewId); 1634 } 1635 1636 if (xlink.getAcceptorChainId() != null 1637 && xlink.getAcceptorChainId().equals(inOldId)) 1638 { 1639 xlink.setAcceptorChainId(inNewId); 1640 } 1641 } 1642 } 1643 1644 parent = parent.mParent; 1645 } 1646 } 1647 1648 //-------------------------------------------------------------------------- 1649 private String assignChainId(Protein inChain) 1650 { 1651 String chainId = inChain.getID(); 1652 1653 if (inChain.mParent != null) 1654 { 1655 if (! StringUtil.isSet(chainId)) 1656 { 1657 chainId = "" + (char)('A' + mChainIdMap.size()); 1658 } 1659 1660 while (mChainIdMap.containsKey(chainId)) 1661 { 1662 if (chainId.length() == 1 1663 && Character.isLetter(chainId.charAt(0))) 1664 { 1665 if (StringUtil.isSet(inChain.mParent.getID()) 1666 && inChain.mParent != this) 1667 { 1668 chainId = inChain.mParent.getID() + " chain_" + chainId; 1669 } 1670 else 1671 { 1672 chainId = "" + (char)((int)chainId.charAt(0) + 1); 1673 } 1674 } 1675 else 1676 { 1677 Matcher matcher = sChainIdPattern.matcher(chainId); 1678 if (matcher.find()) 1679 { 1680 chainId = matcher.replaceFirst("_" + (char) ((int)matcher.group(1).charAt(0) + 1)); 1681 } 1682 else 1683 { 1684 chainId += "_B"; 1685 } 1686 } 1687 } 1688 1689 // We'll go into an infinite loop if we try inChain.setID() here. 1690 mChainIdMap.put(chainId, inChain); 1691 } 1692 1693 return chainId; 1694 } 1695 1696 //-------------------------------------------------------------------------- 1697 private Map<IonizableGroup, Integer> constructIonizableGroupMap(KaSet inKaSet, ProteinAnalysisMode inMode) 1698 { 1699 Map<IonizableGroup, Integer> ionizableGroupMap = new HashMap<>(); 1700 1701 if (CollectionUtil.hasValues(mChains)) 1702 { 1703 for (Protein chain : mChains) 1704 { 1705 Map<IonizableGroup, Integer> chainMap = chain.constructIonizableGroupMap(inKaSet, ProteinAnalysisMode.REDUCED); 1706 for (IonizableGroup group : chainMap.keySet()) 1707 { 1708 Integer oldValue = ionizableGroupMap.get(group); 1709 int newValue = (oldValue != null ? oldValue : 0) + chainMap.get(group); 1710 ionizableGroupMap.put(group, newValue); 1711 } 1712 } 1713 1714 if (inMode == ProteinAnalysisMode.NATIVE) 1715 { 1716 // Exclude disulfide-linked cysteines 1717 List<IonizableGroup> cysGroups = inKaSet.getIonizableGroups(AminoAcid.CYSTEINE); 1718 if (cysGroups != null) 1719 { 1720 ionizableGroupMap.put(cysGroups.get(0), getTotalNumFreeCysteines()); 1721 } 1722 } 1723 1724 } 1725 else if (length() > 0) 1726 { 1727 AminoAcid cTerminalResidue = aminoAcidAt(length()); 1728 AminoAcidComposition aaComposition = getAminoAcidComposition(); 1729 for (AminoAcid aa : aaComposition.keySet()) 1730 { 1731 Integer aaCount = aaComposition.get(aa); 1732 if (aaCount != null && aaCount > 0) 1733 { 1734 if (aa == cTerminalResidue 1735 && inKaSet.getCTerminalSidechainKa(cTerminalResidue) != null 1736 && getAminoAcidSet().getCTerminalGroup().equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) 1737 { 1738 IonizableGroup group = inKaSet.getCTerminalSidechainKa(cTerminalResidue); 1739 if (group != null) 1740 { 1741 ionizableGroupMap.put(group, 1); 1742 aaCount--; 1743 } 1744 } 1745 1746 List<IonizableGroup> groups = inKaSet.getIonizableGroups(aa); 1747 if (groups != null) 1748 { 1749 if (inMode == ProteinAnalysisMode.NATIVE) 1750 { 1751 // Exclude disulfide-linked cysteines 1752 if (aa.equals(AminoAcid.CYSTEINE)) 1753 { 1754 aaCount = getTotalNumFreeCysteines(); 1755 } 1756 } 1757 1758 for (IonizableGroup group : groups) 1759 { 1760 ionizableGroupMap.put(group, aaCount); 1761 } 1762 } 1763 } 1764 } 1765 1766 IonizableGroup group = inKaSet.getNTerminalKa(getAminoAcidSet().getNTerminalGroup(), aminoAcidAt(1)); 1767 if (group != null) ionizableGroupMap.put(group, 1); 1768 1769 group = inKaSet.getCTerminalKa(getAminoAcidSet().getCTerminalGroup(), aminoAcidAt(length())); 1770 if (group != null) ionizableGroupMap.put(group, 1); 1771 } 1772 1773 return ionizableGroupMap; 1774 } 1775 1776 //-------------------------------------------------------------------------- 1777 /** 1778 Estimates the protein's net charge at the specified pH. 1779 */ 1780 private double getNetCharge(double pH, Map<IonizableGroup, Integer> inIonizableGroupMap) 1781 { 1782 double netCharge = 0; 1783 1784 double concOfHIions = Math.pow(10, -pH); 1785 1786 if (inIonizableGroupMap != null) 1787 { 1788 for (IonizableGroup group : inIonizableGroupMap.keySet()) 1789 { 1790 netCharge += group.getCharge(inIonizableGroupMap.get(group), concOfHIions); 1791 } 1792 } 1793 1794 return netCharge; 1795 } 1796 1797 //--------------------------------------------------------------------------- 1798 private void recursivelyBuildSequenceInstanceMap(Map<String, Integer> inSequenceInstanceMap) 1799 { 1800 if (getSequence() != null) 1801 { 1802 String refinedChain = getSequence().toUpperCase(); 1803 // Remove any trailing stops 1804 if (refinedChain.endsWith("*")) 1805 { 1806 refinedChain = refinedChain.substring(0, refinedChain.length() - 1); 1807 } 1808 1809 if (inSequenceInstanceMap.containsKey(refinedChain)) 1810 { 1811 inSequenceInstanceMap.put(refinedChain, inSequenceInstanceMap.get(refinedChain) + 1); 1812 } 1813 else 1814 { 1815 inSequenceInstanceMap.put(refinedChain, 1); 1816 } 1817 } 1818 else if (CollectionUtil.hasValues(getChains())) 1819 { 1820 for (Protein chain : getChains()) 1821 { 1822 chain.recursivelyBuildSequenceInstanceMap(inSequenceInstanceMap); 1823 } 1824 } 1825 } 1826 1827 //--------------------------------------------------------------------------- 1828 private String getSeqDataString() 1829 { 1830 // Build a chain map 1831 Map<String, Integer> sequenceInstanceMap = new HashMap<>(5); 1832 recursivelyBuildSequenceInstanceMap(sequenceInstanceMap); 1833 1834 List<String> sortedChains = new ArrayList<>(sequenceInstanceMap.keySet()); 1835 Collections.sort(sortedChains); 1836 1837 StringBuilderPlus seqData = new StringBuilderPlus().setDelimiter("/"); 1838 for (String chain : sortedChains) 1839 { 1840 seqData.delimitedAppend(sequenceInstanceMap.get(chain)); 1841 seqData.append("_"); 1842 seqData.append(chain); 1843 } 1844 1845 return seqData.toString(); 1846 } 1847 1848}