001package com.hfg.bio.seq; 002 003import java.io.IOException; 004import java.io.Reader; 005import java.util.ArrayList; 006import java.util.Collection; 007import java.util.Collections; 008import java.util.HashSet; 009import java.util.List; 010import java.util.Set; 011 012import com.hfg.bio.AminoAcidSet; 013import com.hfg.bio.CTerminalGroup; 014import com.hfg.bio.DigestFragment; 015import com.hfg.bio.DigestSettings; 016import com.hfg.bio.NTerminalGroup; 017import com.hfg.bio.Protease; 018import com.hfg.util.collection.CollectionUtil; 019import com.hfg.util.collection.OrderedSet; 020 021//------------------------------------------------------------------------------ 022/** 023 Performs a digestion of a protein via protease(s). 024 <div> 025 @author J. Alex Taylor, hairyfatguy.com 026 </div> 027 */ 028//------------------------------------------------------------------------------ 029// com.hfg XML/HTML Coding Library 030// 031// This library is free software; you can redistribute it and/or 032// modify it under the terms of the GNU Lesser General Public 033// License as published by the Free Software Foundation; either 034// version 2.1 of the License, or (at your option) any later version. 035// 036// This library is distributed in the hope that it will be useful, 037// but WITHOUT ANY WARRANTY; without even the implied warranty of 038// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 039// Lesser General Public License for more details. 040// 041// You should have received a copy of the GNU Lesser General Public 042// License along with this library; if not, write to the Free Software 043// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 044// 045// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 046// jataylor@hairyfatguy.com 047//------------------------------------------------------------------------------ 048 049public class ProteinDigest 050{ 051 private DigestSettings mSettings; 052 private Protein mProtein; 053 private Set<Protease> mProteases; 054 055 //########################################################################### 056 // CONSTRUCTORS 057 //########################################################################### 058 059 //--------------------------------------------------------------------------- 060 public ProteinDigest(Protein inProtein, DigestSettings inSettings) 061 { 062 mProtein = inProtein; 063 mSettings = inSettings; 064 065 mProteases = inSettings.getProteases(); 066 if (! CollectionUtil.hasValues(mProteases)) 067 { 068 throw new RuntimeException("No protease(s) specified!"); 069 } 070 } 071 072 //########################################################################### 073 // PUBLIC METHODS 074 //########################################################################### 075 076 //--------------------------------------------------------------------------- 077 public DigestSettings getSettings() 078 { 079 return mSettings; 080 } 081 082 //--------------------------------------------------------------------------- 083 public Protein getProtein() 084 { 085 return mProtein; 086 } 087 088 //--------------------------------------------------------------------------- 089 public List<DigestFragment> getFragments() 090 { 091 List<DigestFragment> outFrags; 092 093 if (getSettings().getAlkylatedCys() != null) 094 { 095 Protein proteinCopy = getProtein().clone(); 096 proteinCopy.removeXLinks(ProteinXLinkType.DISULFIDE); 097 098 // Reflect that the cysteines in the protein are alkylated. 099 AminoAcidSet aaSet = new AminoAcidSet(getProtein().getAminoAcidSet()); 100 aaSet.setMapping('c', getSettings().getAlkylatedCys()); 101 aaSet.setMapping('C', getSettings().getAlkylatedCys()); 102 proteinCopy.setAminoAcidSet(aaSet); 103 104 // Are there other types of x-links? 105 if (CollectionUtil.hasValues(proteinCopy.getXLinks())) 106 { 107 outFrags = complexDigestion(proteinCopy, getSettings()); 108 } 109 else 110 { 111 outFrags = simpleDigestion(proteinCopy, getSettings()); 112 } 113 } 114 else 115 { 116 // Native digest. Deal with X-links 117 outFrags = complexDigestion(getProtein(), getSettings()); 118 } 119/* TODO: What was this for? 120 Set<DigestFragment> fragSet = new OrderedSet<>(outFrags); 121 outFrags.clear(); 122 outFrags.addAll(fragSet); 123*/ 124 return outFrags; 125 } 126 127 //########################################################################### 128 // PRIVATE METHODS 129 //########################################################################### 130 131 //-------------------------------------------------------------------------- 132 public boolean isCleavageSite(char inP1Residue, char inP1PrimeResidue) 133 { 134 char p1Residue = Character.toUpperCase(inP1Residue); 135 char p1PrimeResidue = Character.toUpperCase(inP1PrimeResidue); 136 137 boolean isCleavageSite = false; 138 for (Protease protease : mProteases) 139 { 140 isCleavageSite = ((protease.getP1Specificity().indexOf(p1Residue) >= 0 141 && protease.getExcludedP1PrimeResidues().indexOf(p1PrimeResidue) == -1) 142 || (protease.getP1PrimeSpecificity().indexOf(p1PrimeResidue) >= 0 143 && protease.getExcludedP1Residues().indexOf(p1Residue) == -1)); 144 145 if (isCleavageSite) break; 146 } 147 148 return isCleavageSite; 149 } 150 151 //-------------------------------------------------------------------------- 152 private void addPrecedingAndTrailingResidues(List<DigestFragment> inFragments, Protein inProtein) 153 { 154 for (DigestFragment fragment : inFragments) 155 { 156 int position = fragment.getBegin() - 1; 157 if (position > 0) 158 { 159 fragment.setPrecedingResidue(inProtein.residueAt(position)); 160 } 161 162 position = fragment.getEnd() + 1; 163 if (position <= inProtein.length()) 164 { 165 fragment.setTrailingResidue(inProtein.residueAt(position)); 166 } 167 } 168 } 169 170 //-------------------------------------------------------------------------- 171 private List<DigestFragment> simpleDigestion(Protein inProtein, DigestSettings inSettings) 172 { 173 List<DigestFragment> outFrags = new ArrayList<>(); 174 175 if (CollectionUtil.hasValues(inProtein.getChains())) 176 { 177 for (Protein chain : inProtein.getChains()) 178 { 179 outFrags.addAll(simpleDigestion(chain, inSettings)); 180 } 181 } 182 else 183 { 184 AminoAcidSet aaSet; 185 if (inSettings.getAlkylatedCys() != null) 186 { 187 aaSet = new AminoAcidSet(inProtein.getAminoAcidSet()); 188 aaSet.setMapping('c', inSettings.getAlkylatedCys()); 189 aaSet.setMapping('C', inSettings.getAlkylatedCys()); 190 } 191 else 192 { 193 aaSet = inProtein.getAminoAcidSet().clone(); 194 } 195 196 NTerminalGroup nTerminalGroup = inProtein.getAminoAcidSet().getNTerminalGroup(); 197 if (! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS)) 198 { 199 aaSet.setNTerminalGroup(NTerminalGroup.UNMODIFIED_N_TERMINUS); 200 } 201 202 CTerminalGroup cTerminalGroup = inProtein.getAminoAcidSet().getCTerminalGroup(); 203 if (! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) 204 { 205 aaSet.setCTerminalGroup(CTerminalGroup.UNMODIFIED_C_TERMINUS); 206 } 207 208 try 209 { 210 Reader seqReader = null; 211 try 212 { 213 seqReader = inProtein.getSequenceReader(); 214 215 SlidingWindow fragWindow = new SlidingWindow(inProtein.getID(), aaSet, inSettings); 216 217 int p1Residue = seqReader.read(); 218 if (p1Residue != -1) 219 { 220 StringBuilder frag = new StringBuilder((char)p1Residue + ""); 221 int p1PrimeResidue; 222 while ((p1PrimeResidue = seqReader.read()) != -1) 223 { 224 if (isCleavageSite((char)p1Residue, (char)p1PrimeResidue)) 225 { 226 List<DigestFragment> fragments = fragWindow.push(frag.toString()); 227 if (fragments != null) 228 { 229 for (DigestFragment fragment : fragments) 230 { 231 fragment.setSrcChainId(inProtein.getID()); 232 233 // Make sure any custom termini are passed along 234 235 if (1 == fragment.getBegin() 236 && ! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS)) 237 { 238 AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone(); 239 fragAASet.setNTerminalGroup(nTerminalGroup); 240 fragment.setAminoAcidSet(fragAASet); 241 } 242 243 if (inProtein.length() == fragment.getEnd() 244 && ! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) 245 { 246 AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone(); 247 fragAASet.setCTerminalGroup(cTerminalGroup); 248 fragment.setAminoAcidSet(fragAASet); 249 } 250 } 251 252 addPrecedingAndTrailingResidues(fragments, inProtein); 253 outFrags.addAll(fragments); 254 } 255 frag.setLength(0); 256 frag.append((char)p1PrimeResidue); 257 } 258 else 259 { 260 frag.append((char)p1PrimeResidue); 261 } 262 p1Residue = p1PrimeResidue; 263 } 264 265 List<DigestFragment> fragments = fragWindow.lastPush(frag.toString()); 266 if (fragments != null) 267 { 268 for (DigestFragment fragment : fragments) 269 { 270 fragment.setSrcChainId(inProtein.getID()); 271 272 // Make sure any custom termini are passed along 273 274 if (1 == fragment.getBegin() 275 && ! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS)) 276 { 277 AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone(); 278 fragAASet.setNTerminalGroup(nTerminalGroup); 279 fragment.setAminoAcidSet(fragAASet); 280 } 281 282 if (inProtein.length() == fragment.getEnd() 283 && ! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) 284 { 285 AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone(); 286 fragAASet.setCTerminalGroup(cTerminalGroup); 287 fragment.setAminoAcidSet(fragAASet); 288 } 289 } 290 291 addPrecedingAndTrailingResidues(fragments, inProtein); 292 outFrags.addAll(fragments); 293 } 294 } 295 } 296 finally 297 { 298 if (seqReader != null) seqReader.close(); 299 } 300 } 301 catch (IOException e) 302 { 303 throw new RuntimeException(e); 304 } 305 } 306 307 // Apply the DigestSettings limits. 308 for (int i = 0; i < outFrags.size(); i++) 309 { 310 if (! inSettings.meetsCriteria(outFrags.get(i))) 311 { 312 outFrags.remove(i--); 313 } 314 } 315 316 return outFrags; 317 } 318 319 //-------------------------------------------------------------------------- 320 private List<DigestFragment> complexDigestion(Protein inProtein, DigestSettings inSettings) 321 { 322 // Initially disable limits when finding fragments. 323 // We'll apply the desired settings after all the fragments have been linked up. 324 DigestSettings settingsWithoutLimits = inSettings.clone(); 325 settingsWithoutLimits.setMinFragmentLength(null); 326 settingsWithoutLimits.setMaxFragmentLength(null); 327 settingsWithoutLimits.setMinFragmentMass(null); 328 settingsWithoutLimits.setMaxFragmentMass(null); 329 330 List<DigestFragment> rawFrags = simpleDigestion(inProtein, settingsWithoutLimits); 331 332 if (CollectionUtil.hasValues(inProtein.getXLinks())) 333 { 334 // For ea. x-link, bind it to the combinations of raw fragments 335 List<ProteinXLink> orderedXLinks = new ArrayList<>(inProtein.getXLinks()); 336 Collections.sort(orderedXLinks); 337 338 for (ProteinXLink xlink : orderedXLinks) 339 { 340 List<Integer> donorFragIndexes = new ArrayList<>(5); 341 List<Integer> acceptorFragIndexes = new ArrayList<>(5); 342 Set<DigestFragment> fragsToAdd = new HashSet<>(10); 343 Set<Integer> fragsIndexesToRemove = new HashSet<>(10); 344 345 for (int fragIdx = 0; fragIdx < rawFrags.size(); fragIdx++) 346 { 347 DigestFragment frag = rawFrags.get(fragIdx); 348 349 boolean hasDonorSite = false; 350 if (frag.hasChains()) 351 { 352 for (DigestFragment fragChain : (List<DigestFragment>) (Object) frag.getChains()) 353 { 354 if (fragChain.getSrcChainId().equals(xlink.getDonorChainId()) 355 && xlink.getDonorPosition() >= fragChain.getBegin() 356 && xlink.getDonorPosition() <= fragChain.getEnd()) 357 { 358 // Donor site is within this frag. 359 hasDonorSite = true; 360 break; 361 } 362 } 363 } 364 else if (frag.getSrcChainId().equals(xlink.getDonorChainId()) 365 && xlink.getDonorPosition() >= frag.getBegin() 366 && xlink.getDonorPosition() <= frag.getEnd()) 367 { 368 hasDonorSite = true; 369 } 370 371 372 boolean hasAcceptorSite = false; 373 if (CollectionUtil.hasValues(frag.getChains())) 374 { 375 for (DigestFragment fragChain : (List<DigestFragment>) (Object) frag.getChains()) 376 { 377 if (fragChain.getSrcChainId().equals(xlink.getAcceptorChainId()) 378 && xlink.getAcceptorPosition() >= fragChain.getBegin() 379 && xlink.getAcceptorPosition() <= fragChain.getEnd()) 380 { 381 // Acceptor site is within this frag. 382 hasAcceptorSite = true; 383 break; 384 } 385 } 386 } 387 else if (frag.getSrcChainId().equals(xlink.getAcceptorChainId()) 388 && xlink.getAcceptorPosition() >= frag.getBegin() 389 && xlink.getAcceptorPosition() <= frag.getEnd()) 390 { 391 hasAcceptorSite = true; 392 } 393 394 395 if (hasDonorSite && hasAcceptorSite) 396 { 397 DigestFragment linkedFrag; 398 if (CollectionUtil.hasValues(frag.getChains())) 399 { 400 linkedFrag = frag; 401 } 402 else 403 { 404 linkedFrag = new DigestFragment(); 405 linkedFrag.addChain(frag.clone()); 406 // Add it back to the pool 407 fragsToAdd.add(linkedFrag); 408 409 // Flag the original unlinked frag for removal 410 fragsIndexesToRemove.add(fragIdx); 411 } 412 413 String donorFragId = null; 414 String acceptorFragId = null; 415 for (DigestFragment fragChain : (List<DigestFragment>) (Object) linkedFrag.getChains()) 416 { 417 if (fragChain.getSrcChainId().equals(xlink.getDonorChainId()) 418 && xlink.getDonorPosition() >= fragChain.getBegin() 419 && xlink.getDonorPosition() <= fragChain.getEnd()) 420 { 421 donorFragId = fragChain.getID(); 422 } 423 424 if (fragChain.getSrcChainId().equals(xlink.getAcceptorChainId()) 425 && xlink.getAcceptorPosition() >= fragChain.getBegin() 426 && xlink.getAcceptorPosition() <= fragChain.getEnd()) 427 { 428 acceptorFragId = fragChain.getID(); 429 } 430 } 431 432 linkedFrag.addXLink(xlink.clone() 433 .setDonorChainId(donorFragId) 434 .setAcceptorChainId(acceptorFragId)); 435 } 436 else 437 { 438 if (hasDonorSite) donorFragIndexes.add(fragIdx); 439 if (hasAcceptorSite) acceptorFragIndexes.add(fragIdx); 440 } 441 } 442 443 // Link the donors & acceptors in all possible combinations 444 for (int donorFragIndex : donorFragIndexes) 445 { 446 DigestFragment donorFrag = rawFrags.get(donorFragIndex); 447 String donorFragId = null; 448 if (CollectionUtil.hasValues(donorFrag.getChains())) 449 { 450 for (DigestFragment linkedChain : (Collection<DigestFragment>) (Object) donorFrag.getChains()) 451 { 452 if (linkedChain.getSrcChainId().equals(xlink.getDonorChainId()) 453 && xlink.getDonorPosition() >= linkedChain.getBegin() 454 && xlink.getDonorPosition() <= linkedChain.getEnd()) 455 { 456 donorFragId = linkedChain.getID(); 457 break; 458 } 459 } 460 } 461 else 462 { 463 donorFragId = donorFrag.getID(); 464 } 465 466 for (int acceptorFragIndex : acceptorFragIndexes) 467 { 468 DigestFragment acceptorFrag = rawFrags.get(acceptorFragIndex); 469 String acceptorFragId = null; 470 471 DigestFragment linkedFrag; 472 if (CollectionUtil.hasValues(donorFrag.getChains())) 473 { 474 linkedFrag = (DigestFragment) donorFrag.clone(); 475 } 476 else 477 { 478 linkedFrag = new DigestFragment(); 479 donorFrag = (DigestFragment) donorFrag.clone() 480 .setID(donorFrag.getID() + ":" + donorFrag.getBeginFragIndex() + (donorFrag.getEndFragIndex() == donorFrag.getBeginFragIndex() ? "" : ".." + donorFrag.getEndFragIndex())); 481 donorFragId = donorFrag.getID(); 482 linkedFrag.addChain(donorFrag); 483 } 484 485 // Add the acceptor chain (if it isn't already present) 486 boolean containsAcceptorFrag = false; 487 for (DigestFragment linkedChain : (Collection<DigestFragment>) (Object) linkedFrag.getChains()) 488 { 489 if (linkedChain.getSrcChainId().equals(xlink.getAcceptorChainId()) 490 && acceptorFrag.getBegin() == linkedChain.getBegin() 491 && acceptorFrag.getEnd() == linkedChain.getEnd()) 492 { 493 acceptorFragId = linkedChain.getID(); 494 containsAcceptorFrag = true; 495 break; 496 } 497 } 498 499 if (! containsAcceptorFrag) 500 { 501 acceptorFrag = (DigestFragment) acceptorFrag.clone() 502 .setID(acceptorFrag.getID() + ":" + acceptorFrag.getBeginFragIndex() + (acceptorFrag.getEndFragIndex() == acceptorFrag.getBeginFragIndex() ? "" : ".." + acceptorFrag.getEndFragIndex())); 503 acceptorFragId = acceptorFrag.getID(); 504 linkedFrag.addChain(acceptorFrag); 505 } 506 507 508 linkedFrag.addXLink(xlink.clone() 509 .setDonorChainId(donorFragId) 510 .setAcceptorChainId(acceptorFragId)); 511 512 // Add it back to the pool 513 rawFrags.add(linkedFrag); 514 } 515 } 516 517 // Now remove the raw frags that were linked. 518 519 // We have to remove them in reverse order for all the indices to be valid 520 fragsIndexesToRemove.addAll(donorFragIndexes); 521 fragsIndexesToRemove.addAll(acceptorFragIndexes); 522 523 if (CollectionUtil.hasValues(fragsIndexesToRemove)) 524 { 525 List<Integer> orderedFragIndices = new ArrayList<>(fragsIndexesToRemove); 526 Collections.sort(orderedFragIndices); 527 Collections.reverse(orderedFragIndices); 528 529 for (int fragIndex : orderedFragIndices) 530 { 531 rawFrags.remove(fragIndex); 532 } 533 } 534 535 rawFrags.addAll(fragsToAdd); 536 } 537 } 538 539 // Apply the DigestSettings limits. 540 for (int i = 0; i < rawFrags.size(); i++) 541 { 542 if (! inSettings.meetsCriteria(rawFrags.get(i))) 543 { 544 rawFrags.remove(i--); 545 } 546 } 547 548 return rawFrags; 549 } 550 551 //########################################################################## 552 // INNER CLASS 553 //########################################################################## 554 555 protected class SlidingWindow 556 { 557 private String mChainId; 558 private AminoAcidSet mAminoAcidSet; 559 private StringBuilder[] mFrags; 560 private DigestSettings mDigestSettings; 561 private int mIndex = 1; 562 private int mLength; 563 private int mCurrentFragIndex = 0; 564 565 //----------------------------------------------------------------------- 566 public SlidingWindow(String inChainId, AminoAcidSet inAASet, DigestSettings inSettings) 567 { 568 mChainId = inChainId; 569 mAminoAcidSet = inAASet; 570 mDigestSettings = inSettings; 571 mFrags = new StringBuilder[inSettings.getMaxMissedCleavages() + 3]; 572 mFrags[0] = new StringBuilder(); 573 mFrags[1] = new StringBuilder(); 574 mLength = 2; 575 } 576 577 //----------------------------------------------------------------------- 578 public List<DigestFragment> push(String inFrag) 579 { 580 List<DigestFragment> outFrags = null; 581 582 if (mLength < mFrags.length) 583 { 584 // Still filling the window 585 mFrags[mLength++] = new StringBuilder(inFrag); 586 } 587 else 588 { 589 StringBuilder tmp = mFrags[0]; 590 591 for (int i = 1; i < mFrags.length; i++) 592 { 593 mFrags[i - 1] = mFrags[i]; 594 } 595 596 mFrags[mFrags.length - 1] = tmp; 597 mFrags[mFrags.length - 1].setLength(0); 598 mFrags[mFrags.length - 1].append(inFrag); 599 600 mIndex += mFrags[0].length(); 601 602 mCurrentFragIndex++; 603 604 List<DigestFragment> frags = evaluateCurrentFrag(); 605 if (frags != null) 606 { 607 outFrags = new ArrayList<>(frags); 608 } 609 } 610 611 return outFrags; 612 } 613 614 //----------------------------------------------------------------------- 615 public List<DigestFragment> lastPush(String inFrag) 616 { 617 List<DigestFragment> outFrags = null; 618 619 List<DigestFragment> frags = push(inFrag); 620 if (frags != null) 621 { 622 outFrags = new ArrayList<>(frags); 623 } 624 625 for (int i = 0; i <= mDigestSettings.getMaxMissedCleavages(); i++) 626 { 627 frags = push(""); 628 if (frags != null) 629 { 630 if (null == outFrags) 631 { 632 outFrags = new ArrayList<>(frags.size()); 633 } 634 outFrags.addAll(frags); 635 } 636 } 637 638 return outFrags; 639 } 640 641 //----------------------------------------------------------------------- 642 private List<DigestFragment> evaluateCurrentFrag() 643 { 644 List<DigestFragment> fragments = null; 645 646 int maxMissedCleavages = 0; 647 if (mDigestSettings != null 648 && mDigestSettings.getMaxMissedCleavages() != null) 649 { 650 maxMissedCleavages = mDigestSettings.getMaxMissedCleavages(); 651 } 652 653 if (null == mDigestSettings 654 || null == mDigestSettings.getMaxFragmentLength() 655 || mFrags[1].length() <= mDigestSettings.getMaxFragmentLength()) 656 { 657 StringBuilder frag = new StringBuilder(); 658 for (int i = 1; i < mFrags.length - 1; i++) 659 { 660 if (mFrags[i].length() == 0) break; 661 662 frag.append(mFrags[i]); 663 664 if (null == mDigestSettings 665 || null == mDigestSettings.getMinFragmentLength() 666 || frag.length() >= mDigestSettings.getMinFragmentLength()) 667 { 668 // Don't get too big 669 if (mDigestSettings != null 670 && mDigestSettings.getMaxFragmentLength() != null 671 && frag.length() > mDigestSettings.getMaxFragmentLength()) 672 { 673 break; 674 } 675 676 DigestFragment digestFrag = allocateNewDigestFragment(); 677 digestFrag.setSequence(frag.toString()); 678 digestFrag.setBegin(mIndex); 679 digestFrag.setEnd(mIndex + frag.length() - 1); 680 digestFrag.setNumUncleavedSites(i - 1); 681 digestFrag.setBeginFragIndex(mCurrentFragIndex); 682 digestFrag.setEndFragIndex(mCurrentFragIndex + (i - 1)); 683 684 if (null == fragments) 685 { 686 fragments = new ArrayList<>(maxMissedCleavages); 687 } 688 fragments.add(digestFrag); 689 } 690 } 691 } 692 693 return fragments; 694 } 695 696 //----------------------------------------------------------------------- 697 private DigestFragment allocateNewDigestFragment() 698 { 699 DigestFragment frag = new DigestFragment(); 700 frag.setID(mChainId); 701 frag.setAminoAcidSet(mAminoAcidSet); 702 703 return frag; 704 } 705 706 } 707}