001package com.hfg.bio; 002 003import java.util.*; 004import java.io.Reader; 005import java.io.IOException; 006 007import com.hfg.bio.seq.Protein; 008import com.hfg.bio.seq.ProteinXLink; 009import com.hfg.bio.seq.ProteinXLinkType; 010import com.hfg.exception.UnmodifyableObjectException; 011import com.hfg.util.CompareUtil; 012import com.hfg.util.StringUtil; 013import com.hfg.util.collection.CollectionUtil; 014import com.hfg.util.collection.OrderedSet; 015 016//------------------------------------------------------------------------------ 017/** 018 * Chemical or biological proteolytic agent which can be used to theoretically 019 * digest a Protein. 020 * 021 * @author J. Alex Taylor, hairyfatguy.com 022 */ 023//------------------------------------------------------------------------------ 024// com.hfg XML/HTML Coding Library 025// 026// This library is free software; you can redistribute it and/or 027// modify it under the terms of the GNU Lesser General Public 028// License as published by the Free Software Foundation; either 029// version 2.1 of the License, or (at your option) any later version. 030// 031// This library is distributed in the hope that it will be useful, 032// but WITHOUT ANY WARRANTY; without even the implied warranty of 033// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 034// Lesser General Public License for more details. 035// 036// You should have received a copy of the GNU Lesser General Public 037// License along with this library; if not, write to the Free Software 038// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 039// 040// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 041// jataylor@hairyfatguy.com 042//------------------------------------------------------------------------------ 043 044/* 045 046 What to do if... 047 048 - X-links are defined? 049 050 - Should the DigestSettings include an optional cys-alkylation form? This would 051 allow the user to specify whether the digest should be a 'native digest' or 052 whether the cysteines (including those specified as x-linked) should be considered 053 reduced and alkylated. 054 055 - Multiple identical chains exist? 056 057 - If the fragments aren't x-linked, concatenate the chain ids in the DigestFragment: 'H1/H2' 058 059*/ 060 061public class Protease implements Comparable 062{ 063 //########################################################################## 064 // PRIVATE FIELDS 065 //########################################################################## 066 067 private static final Set<Protease> sValues = new OrderedSet<>(12); 068 069 private final String mName; 070 071 private String mP6Specificity = ""; 072 private String mExcludedP6Residues = ""; 073 074 private String mP5Specificity = ""; 075 private String mExcludedP5Residues = ""; 076 077 private String mP4Specificity = ""; 078 private String mExcludedP4Residues = ""; 079 080 private String mP3Specificity = ""; 081 private String mExcludedP3Residues = ""; 082 083 private String mP2Specificity = ""; 084 private String mExcludedP2Residues = ""; 085 086 private String mP1Specificity = ""; 087 private String mExcludedP1Residues = ""; 088 089 090 private String mP1PrimeSpecificity = ""; 091 private String mExcludedP1PrimeResidues = ""; 092 093 private String mP2PrimeSpecificity = ""; 094 private String mExcludedP2PrimeResidues = ""; 095 096 private String mP3PrimeSpecificity = ""; 097 private String mExcludedP3PrimeResidues = ""; 098 099 private boolean mLocked; 100 101 private static final int NUM_N_TERMINAL_POSITIONS = 6; 102 private static final int NUM_C_TERMINAL_POSITIONS = 3; 103 104 //########################################################################## 105 // PUBLIC FIELDS 106 //########################################################################## 107 108 public static final Protease TRYPSIN = new Protease("Trypsin"); 109 public static final Protease LYS_C = new Protease("Lys-C"); 110 public static final Protease CHYMOTRYPSIN = new Protease("Chymotrypsin"); 111 public static final Protease GLU_C = new Protease("Glu-C"); 112 public static final Protease ASP_N = new Protease("Asp-N"); 113 public static final Protease ASP_N_DE = new Protease("Asp-N (DE)"); 114 115 /** 116 Human thrombin protease. 117 <div> 118 Uses data from Gallwitz, M. et. al. (2012). "The extended cleavage specificity of human thrombin." 119 <i>PLoS ONE,</i> 7 (2). 120 </div> 121 Note: Experiments suggest a preference for P in P2 and R in P3'. 122 */ 123 public static final Protease THROMBIN = new Protease("Thrombin"); 124 125 /** 126 Tobacco Etch virus protease. 127 <div> 128 Uses data from Kostallas, G. et. al. (2011). "Substrate profiling of tobacco Etch virus protease using a novel Fluorescence-Assisted whole-cell assay." 129 <i>PLoS ONE,</i> 6 (1). 130 </div> 131 */ 132 public static final Protease TEV = new Protease("TEV"); 133 134 /** 135 Human rhinovirus 3C protease. 136 <div> 137 Uses data from Cordingley, M.G. et. al. (1990). "Substrate requirements of human rhinovirus 3C protease for peptide cleavage in vitro." 138 <i>Journal of Biological Chemistry,</i> 265 (16), 9062-9065. 139 </div> 140 */ 141 public static final Protease HRV_3C = new Protease("HRV 3C"); 142 143 /** 144 Factor Xa protease. 145 <div> 146 Uses data from Harris, J. et. al. (2000). "Rapid and general profiling of protease specificity by using combinatorial fluorogenic substrate libraries." 147 <i>PNAS,</i> 97 (14), 7754-7759. 148 </div> 149 */ 150 public static final Protease FACTOR_Xa = new Protease("Factor Xa"); 151 152 public static final Protease ENTEROKINASE = new Protease("Enterokinase"); 153 154 static 155 { 156 TRYPSIN.setP1Specificity("KR") 157 .setExcludedP1PrimeResidues("P") 158 .lock(); 159 160 LYS_C.setP1Specificity("K") 161 .lock(); 162 163 CHYMOTRYPSIN.setP1Specificity("YFWL") 164 .lock(); 165 166 GLU_C.setP1Specificity("E") 167 .lock(); 168 169 ASP_N.setP1PrimeSpecificity("D") 170 .lock(); 171 172 ASP_N_DE.setP1PrimeSpecificity("DE") 173 .lock(); 174 175 THROMBIN.setP1Specificity("R") 176 .setP1PrimeSpecificity("SAGT") 177 .setExcludedP2PrimeResidues("DE") 178 .lock(); 179 180 TEV.setP6Specificity("E") 181 .setP5Specificity("RSGLVWCEAPQKN") 182 .setP4Specificity("VLGARESWDTIKMNY") 183 .setP3Specificity("Y") 184 .setP2Specificity("FGASVCREQTHLPWDIN") 185 .setP1Specificity("Q") 186 .setP1PrimeSpecificity("GAVSRDECKMLQIT") 187 .lock(); 188 189 HRV_3C.setP5Specificity("EQRIDHF") 190 .setP4Specificity("TAVDF") 191 .setP3Specificity("L") 192 .setP2Specificity("F") 193 .setP1Specificity("Q") 194 .setP1PrimeSpecificity("G") 195 .setP2PrimeSpecificity("P") 196 .lock(); 197 198 FACTOR_Xa.setP4Specificity("ILPV") 199 .setExcludedP3Residues("P") 200 .setP2Specificity("GAFPSWY") 201 .setP1Specificity("R") 202 .lock(); 203 204 ENTEROKINASE 205 .setP5Specificity("D") 206 .setP4Specificity("D") 207 .setP3Specificity("D") 208 .setP2Specificity("D") 209 .setP1Specificity("K") 210 .lock(); 211 212 } 213 214 215 //########################################################################## 216 // CONSTRUCTORS 217 //########################################################################## 218 219 //-------------------------------------------------------------------------- 220 public Protease(String inName) 221 { 222 mName = inName; 223 sValues.add(this); 224 } 225 226 //########################################################################## 227 // PUBLIC METHODS 228 //########################################################################## 229 230 //-------------------------------------------------------------------------- 231 public static Protease[] values() 232 { 233 return sValues.toArray(new Protease[0]); 234 } 235 236 //-------------------------------------------------------------------------- 237 public static Protease valueOf(String inName) 238 { 239 Protease requestedProtease = null; 240 if (StringUtil.isSet(inName)) 241 { 242 for (Protease protease : sValues) 243 { 244 if (protease.name().equalsIgnoreCase(inName.trim())) 245 { 246 requestedProtease = protease; 247 break; 248 } 249 } 250 } 251 252 return requestedProtease; 253 } 254 255 //-------------------------------------------------------------------------- 256 @Override 257 public int hashCode() 258 { 259 return name().hashCode(); 260 } 261 262 //-------------------------------------------------------------------------- 263 @Override 264 public boolean equals(Object inObj2) 265 { 266 return 0 == compareTo(inObj2); 267 } 268 269 //-------------------------------------------------------------------------- 270 @Override 271 public int compareTo(Object inObj2) 272 { 273 return inObj2 instanceof Protease ? CompareUtil.compare(name(), ((Protease)inObj2).name()) : -1; 274 } 275 276 //-------------------------------------------------------------------------- 277 public String name() 278 { 279 return mName; 280 } 281 282 //-------------------------------------------------------------------------- 283 @Override 284 public String toString() 285 { 286 return name(); 287 } 288 289 //-------------------------------------------------------------------------- 290 public Protease setP6Specificity(String inValue) 291 { 292 checkLock(); 293 mP6Specificity = inValue; 294 return this; 295 } 296 297 //-------------------------------------------------------------------------- 298 public String getP6Specificity() 299 { 300 return mP6Specificity; 301 } 302 303 //-------------------------------------------------------------------------- 304 public Protease setExcludedP6Residues(String inValue) 305 { 306 checkLock(); 307 mExcludedP6Residues = inValue; 308 return this; 309 } 310 311 //-------------------------------------------------------------------------- 312 public String getExcludedP6Residues() 313 { 314 return mExcludedP6Residues; 315 } 316 317 318 //-------------------------------------------------------------------------- 319 public Protease setP5Specificity(String inValue) 320 { 321 checkLock(); 322 mP5Specificity = inValue; 323 return this; 324 } 325 326 //-------------------------------------------------------------------------- 327 public String getP5Specificity() 328 { 329 return mP5Specificity; 330 } 331 332 //-------------------------------------------------------------------------- 333 public Protease setExcludedP5Residues(String inValue) 334 { 335 checkLock(); 336 mExcludedP5Residues = inValue; 337 return this; 338 } 339 340 //-------------------------------------------------------------------------- 341 public String getExcludedP5Residues() 342 { 343 return mExcludedP5Residues; 344 } 345 346 347 //-------------------------------------------------------------------------- 348 public Protease setP4Specificity(String inValue) 349 { 350 checkLock(); 351 mP4Specificity = inValue; 352 return this; 353 } 354 355 //-------------------------------------------------------------------------- 356 public String getP4Specificity() 357 { 358 return mP4Specificity; 359 } 360 361 //-------------------------------------------------------------------------- 362 public Protease setExcludedP4Residues(String inValue) 363 { 364 checkLock(); 365 mExcludedP4Residues = inValue; 366 return this; 367 } 368 369 //-------------------------------------------------------------------------- 370 public String getExcludedP4Residues() 371 { 372 return mExcludedP4Residues; 373 } 374 375 376 //-------------------------------------------------------------------------- 377 public Protease setP3Specificity(String inValue) 378 { 379 checkLock(); 380 mP3Specificity = inValue; 381 return this; 382 } 383 384 //-------------------------------------------------------------------------- 385 public String getP3Specificity() 386 { 387 return mP3Specificity; 388 } 389 390 //-------------------------------------------------------------------------- 391 public Protease setExcludedP3Residues(String inValue) 392 { 393 checkLock(); 394 mExcludedP3Residues = inValue; 395 return this; 396 } 397 398 //-------------------------------------------------------------------------- 399 public String getExcludedP3Residues() 400 { 401 return mExcludedP3Residues; 402 } 403 404 405 //-------------------------------------------------------------------------- 406 public Protease setP2Specificity(String inValue) 407 { 408 checkLock(); 409 mP2Specificity = inValue; 410 return this; 411 } 412 413 //-------------------------------------------------------------------------- 414 public String getP2Specificity() 415 { 416 return mP2Specificity; 417 } 418 419 420 //-------------------------------------------------------------------------- 421 public Protease setExcludedP2Residues(String inValue) 422 { 423 checkLock(); 424 mExcludedP2Residues = inValue; 425 return this; 426 } 427 428 //-------------------------------------------------------------------------- 429 public String getExcludedP2Residues() 430 { 431 return mExcludedP2Residues; 432 } 433 434 435 //-------------------------------------------------------------------------- 436 public Protease setP1Specificity(String inValue) 437 { 438 checkLock(); 439 mP1Specificity = inValue; 440 return this; 441 } 442 443 //-------------------------------------------------------------------------- 444 public String getP1Specificity() 445 { 446 return mP1Specificity; 447 } 448 449 450 //-------------------------------------------------------------------------- 451 public Protease setExcludedP1PrimeResidues(String inValue) 452 { 453 checkLock(); 454 mExcludedP1PrimeResidues = inValue; 455 return this; 456 } 457 458 //-------------------------------------------------------------------------- 459 public String getExcludedP1PrimeResidues() 460 { 461 return mExcludedP1PrimeResidues; 462 } 463 464 465 //-------------------------------------------------------------------------- 466 public Protease setP1PrimeSpecificity(String inValue) 467 { 468 checkLock(); 469 mP1PrimeSpecificity = inValue; 470 return this; 471 } 472 473 //-------------------------------------------------------------------------- 474 public String getP1PrimeSpecificity() 475 { 476 return mP1PrimeSpecificity; 477 } 478 479 480 //-------------------------------------------------------------------------- 481 public Protease setExcludedP1Residues(String inValue) 482 { 483 checkLock(); 484 mExcludedP1Residues = inValue; 485 return this; 486 } 487 488 //-------------------------------------------------------------------------- 489 public String getExcludedP1Residues() 490 { 491 return mExcludedP1Residues; 492 } 493 494 495 //-------------------------------------------------------------------------- 496 public Protease setExcludedP2PrimeResidues(String inValue) 497 { 498 checkLock(); 499 mExcludedP2PrimeResidues = inValue; 500 return this; 501 } 502 503 //-------------------------------------------------------------------------- 504 public String getExcludedP2PrimeResidues() 505 { 506 return mExcludedP2PrimeResidues; 507 } 508 509 //-------------------------------------------------------------------------- 510 public Protease setP2PrimeSpecificity(String inValue) 511 { 512 checkLock(); 513 mP2PrimeSpecificity = inValue; 514 return this; 515 } 516 517 //-------------------------------------------------------------------------- 518 public String getP2PrimeSpecificity() 519 { 520 return mP2PrimeSpecificity; 521 } 522 523 524 //-------------------------------------------------------------------------- 525 public Protease setP3PrimeSpecificity(String inValue) 526 { 527 checkLock(); 528 mP3PrimeSpecificity = inValue; 529 return this; 530 } 531 532 //-------------------------------------------------------------------------- 533 public String getP3PrimeSpecificity() 534 { 535 return mP3PrimeSpecificity; 536 } 537 538 539 //-------------------------------------------------------------------------- 540 public Protease lock() 541 { 542 mLocked = true; 543 return this; 544 } 545 546 //-------------------------------------------------------------------------- 547 public List<DigestFragment> digest(Protein inProtein, DigestSettings inSettings) 548 { 549 List<DigestFragment> outFrags; 550 551 if (inSettings.getAlkylatedCys() != null) 552 { 553 Protein proteinCopy = inProtein.clone(); 554 proteinCopy.removeXLinks(ProteinXLinkType.DISULFIDE); 555 556 // Reflect that the cysteines in the protein are alkylated. 557 AminoAcidSet aaSet = new AminoAcidSet(inProtein.getAminoAcidSet()); 558 aaSet.setMapping('c', inSettings.getAlkylatedCys()); 559 aaSet.setMapping('C', inSettings.getAlkylatedCys()); 560 proteinCopy.setAminoAcidSet(aaSet); 561 562 // Are there other types of x-links? 563 if (CollectionUtil.hasValues(proteinCopy.getXLinks())) 564 { 565 outFrags = complexDigestion(proteinCopy, inSettings); 566 } 567 else 568 { 569 outFrags = simpleDigestion(proteinCopy, inSettings); 570 } 571 572 for (DigestFragment frag : outFrags) 573 { 574 frag.setAminoAcidSet(aaSet); 575 } 576 } 577 else 578 { 579 // Native digest. Deal with X-links 580 outFrags = complexDigestion(inProtein, inSettings); 581 } 582 583 return outFrags; 584 } 585 586 //-------------------------------------------------------------------------- 587 public boolean isCleavageSite(char inP1Residue, char inP1PrimeResidue) 588 { 589 char p1Residue = Character.toUpperCase(inP1Residue); 590 char p1PrimeResidue = Character.toUpperCase(inP1PrimeResidue); 591 592 return ((mP1Specificity.indexOf(p1Residue) >= 0 593 && mExcludedP1PrimeResidues.indexOf(p1PrimeResidue) == -1) 594 || (mP1PrimeSpecificity.indexOf(p1PrimeResidue) >= 0 595 && mExcludedP1Residues.indexOf(p1Residue) == -1)); 596 597 } 598 599 //-------------------------------------------------------------------------- 600 public boolean isCleavageSite(CharSequence inNTerminalResidues, CharSequence inCTerminalResidues) 601 { 602 String nTerminalResidues = inNTerminalResidues.toString().toUpperCase(); 603 String cTerminalResidues = inCTerminalResidues.toString().toUpperCase(); 604 605 return ( 606 ((mP6Specificity.length() == 0 && mExcludedP6Residues.length() == 0 ) 607 || nTerminalResidues.length() < 6 608 || mP6Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 6)) >= 0 609 || (mExcludedP6Residues.length() > 0 && mExcludedP6Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 6)) == -1)) 610 && ((mP5Specificity.length() == 0 && mExcludedP5Residues.length() == 0 ) 611 || nTerminalResidues.length() < 5 612 || mP5Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 5)) >= 0 613 || (mExcludedP5Residues.length() > 0 && mExcludedP5Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 5)) == -1)) 614 && ((mP4Specificity.length() == 0 && mExcludedP4Residues.length() == 0 ) 615 || nTerminalResidues.length() < 4 616 || mP4Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 4)) >= 0 617 || (mExcludedP4Residues.length() > 0 && mExcludedP4Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 4)) == -1)) 618 && ((mP3Specificity.length() == 0 && mExcludedP3Residues.length() == 0 ) 619 || nTerminalResidues.length() < 3 620 || mP3Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 3)) >= 0 621 || (mExcludedP3Residues.length() > 0 && mExcludedP3Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 3)) == -1)) 622 && ((mP2Specificity.length() == 0 && mExcludedP2Residues.length() == 0 ) 623 || nTerminalResidues.length() < 2 624 || mP2Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 2)) >= 0 625 || (mExcludedP2Residues.length() > 0 && mExcludedP2Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 2)) == -1)) 626 && ((mP1Specificity.length() == 0 && mExcludedP1Residues.length() == 0 ) 627 || nTerminalResidues.length() < 1 628 || (mP1Specificity.length() > 0 && mP1Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 1)) >= 0) 629 || (mExcludedP1Residues.length() > 0 && mExcludedP1Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 1)) == -1)) 630 && ((mP1PrimeSpecificity.length() == 0 && mExcludedP1PrimeResidues.length() == 0 ) 631 || cTerminalResidues.length() < 1 632 || (mP1PrimeSpecificity.length() > 0 && mP1PrimeSpecificity.indexOf(cTerminalResidues.charAt(0)) >= 0) 633 || (mExcludedP1PrimeResidues.length() > 0 && mExcludedP1PrimeResidues.indexOf(cTerminalResidues.charAt(0)) == -1)) 634 && ((mP2PrimeSpecificity.length() == 0 && mExcludedP2PrimeResidues.length() == 0) 635 || cTerminalResidues.length() < 2 636 || (mP2PrimeSpecificity.length() > 0 && mP2PrimeSpecificity.indexOf(cTerminalResidues.charAt(1)) >= 0) 637 || (mExcludedP2PrimeResidues.length() > 0 && mExcludedP2PrimeResidues.indexOf(cTerminalResidues.charAt(1)) == -1)) 638 && ((mP3PrimeSpecificity.length() == 0 && mExcludedP3PrimeResidues.length() == 0) 639 || cTerminalResidues.length() < 3 640 || (mP3PrimeSpecificity.length() > 0 && mP3PrimeSpecificity.indexOf(cTerminalResidues.charAt(2)) >= 0) 641 || (mExcludedP3PrimeResidues.length() > 0 && mExcludedP3PrimeResidues.indexOf(cTerminalResidues.charAt(2)) == -1))); 642 643 } 644 645 //########################################################################## 646 // PRIVATE METHODS 647 //########################################################################## 648 649 //-------------------------------------------------------------------------- 650 private void checkLock() 651 { 652 if (mLocked) throw new UnmodifyableObjectException("This object is locked and cannot be modified."); 653 } 654 655 //-------------------------------------------------------------------------- 656 private List<DigestFragment> simpleDigestion(Protein inProtein, DigestSettings inSettings) 657 { 658 List<DigestFragment> outFrags = new ArrayList<>(); 659 660 if (CollectionUtil.hasValues(inProtein.getChains())) 661 { 662 for (Protein chain : inProtein.getChains()) 663 { 664 outFrags.addAll(simpleDigestion(chain, inSettings)); 665 } 666 } 667 else 668 { 669 try 670 { 671 Reader seqReader = null; 672 try 673 { 674 seqReader = inProtein.getSequenceReader(); 675 676 SlidingWindow fragWindow = new SlidingWindow(inProtein.getID(), inProtein.getAminoAcidSet(), inSettings); 677 678 int bufferSize = NUM_N_TERMINAL_POSITIONS + NUM_C_TERMINAL_POSITIONS; 679 int[] site = new int[bufferSize]; 680 for (int i = 0; i < bufferSize; i++) 681 { 682 site[i] = -1; 683 } 684 685 StringBuilder nTerminalResidues = new StringBuilder(); 686 StringBuilder cTerminalResidues = new StringBuilder(); 687 688 int residue = seqReader.read(); 689 if (residue != -1) 690 { 691 StringBuilder frag = new StringBuilder((char) residue + ""); 692 693 site[bufferSize - 1] = residue; // Drops in on the right side and slides left 694 695 while ((residue = seqReader.read()) != -1) 696 { 697 for (int i = 0; i < bufferSize - 1; i++) 698 { 699 site[i] = site[i + 1]; 700 } 701 702 site[bufferSize - 1] = residue; 703 704 if (-1 == site[NUM_N_TERMINAL_POSITIONS - 1]) 705 { 706 continue; 707 } 708 709 nTerminalResidues.setLength(0); 710 for (int i = 0; i < NUM_N_TERMINAL_POSITIONS; i++) 711 { 712 nTerminalResidues.append((char) site[i]); 713 } 714 715 cTerminalResidues.setLength(0); 716 for (int i = NUM_N_TERMINAL_POSITIONS; i < bufferSize; i++) 717 { 718 cTerminalResidues.append((char) site[i]); 719 } 720 721 if (isCleavageSite(nTerminalResidues, cTerminalResidues)) 722 { 723 List<DigestFragment> fragments = fragWindow.push(frag.toString()); 724 if (fragments != null) 725 { 726 outFrags.addAll(fragments); 727 } 728 frag.setLength(0); 729 frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); 730 } 731 else 732 { 733 frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); 734 } 735 } 736 737 // Process residues remaining in the site buffer 738 while (hasUnprocessedBufferContent(site)) 739 { 740 for (int i = 0; i < bufferSize - 1; i++) 741 { 742 site[i] = site[i + 1]; 743 } 744 745 site[bufferSize - 1] = -1; 746 747 if (-1 == site[NUM_N_TERMINAL_POSITIONS - 1] 748 || -1 == site[NUM_N_TERMINAL_POSITIONS]) 749 { 750 continue; 751 } 752 753 nTerminalResidues.setLength(0); 754 for (int i = 0; i < NUM_N_TERMINAL_POSITIONS; i++) 755 { 756 int aa = site[i]; 757 nTerminalResidues.append(aa != -1 ? (char) aa : ""); 758 } 759 760 cTerminalResidues.setLength(0); 761 for (int i = NUM_N_TERMINAL_POSITIONS; i < bufferSize; i++) 762 { 763 int aa = site[i]; 764 cTerminalResidues.append(aa != -1 ? (char) aa : ""); 765 } 766 767 if (isCleavageSite(nTerminalResidues, cTerminalResidues)) 768 { 769 List<DigestFragment> fragments = fragWindow.push(frag.toString()); 770 if (fragments != null) 771 { 772 outFrags.addAll(fragments); 773 } 774 frag.setLength(0); 775 frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); 776 } 777 else 778 { 779 frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); 780 } 781 } 782 783 List<DigestFragment> fragments = fragWindow.lastPush(frag.toString()); 784 if (fragments != null) 785 { 786 outFrags.addAll(fragments); 787 } 788 } 789 } 790 finally 791 { 792 if (seqReader != null) seqReader.close(); 793 } 794 } 795 catch (IOException e) 796 { 797 throw new RuntimeException(e); 798 } 799 } 800 801 // Apply the DigestSettings limits. 802 for (int i = 0; i < outFrags.size(); i++) 803 { 804 if (! inSettings.meetsCriteria(outFrags.get(i))) 805 { 806 outFrags.remove(i--); 807 } 808 } 809 810 return outFrags; 811 } 812 813 //-------------------------------------------------------------------------- 814 private boolean hasUnprocessedBufferContent(int[] inBuffer) 815 { 816 boolean hasUnprocessedContent = false; 817 818 for (int i = NUM_N_TERMINAL_POSITIONS - 1; i < NUM_N_TERMINAL_POSITIONS + NUM_C_TERMINAL_POSITIONS; i++) 819 { 820 if (inBuffer[i] != -1) 821 { 822 hasUnprocessedContent = true; 823 break; 824 } 825 } 826 827 return hasUnprocessedContent; 828 } 829 830 //-------------------------------------------------------------------------- 831 private List<DigestFragment> complexDigestion(Protein inProtein, 832 DigestSettings inSettings) 833 { 834 // Initially disable limits when finding fragments. 835 // We'll apply the desired settings after all the fragments have been linked up. 836 DigestSettings settingsWithoutLimits = inSettings.clone(); 837 settingsWithoutLimits.setMinFragmentLength(null); 838 settingsWithoutLimits.setMaxFragmentLength(null); 839 settingsWithoutLimits.setMinFragmentMass(null); 840 settingsWithoutLimits.setMaxFragmentMass(null); 841 842 List<DigestFragment> rawFrags = simpleDigestion(inProtein, settingsWithoutLimits); 843 844 // For ea. x-link, bind it to the combinations of raw fragments 845 for (ProteinXLink xlink : inProtein.getXLinks()) 846 { 847 List<DigestFragment> donorFrags = new ArrayList<>(); 848 List<DigestFragment> acceptorFrags = new ArrayList<>(); 849 Set<DigestFragment> fragsToAdd = new HashSet<>(); 850 Set<DigestFragment> fragsToRemove = new HashSet<>(); 851 852 for (DigestFragment frag : rawFrags) 853 { 854 DigestFragment donorChain = null; 855 if (CollectionUtil.hasValues(frag.getChains())) 856 { 857 donorChain = (DigestFragment) frag.getChain(xlink.getDonorChainId()); 858 } 859 else if (xlink.getDonorChainId().equals(frag.getID())) 860 { 861 donorChain = frag; 862 } 863 864 boolean hasDonorSite = false; 865 if (donorChain != null 866 && xlink.getDonorPosition() >= donorChain.getBegin() 867 && xlink.getDonorPosition() <= donorChain.getEnd()) 868 { 869 // Donor site is within this frag. 870 hasDonorSite = true; 871 } 872 873 874 DigestFragment acceptorChain = null; 875 if (CollectionUtil.hasValues(frag.getChains())) 876 { 877 acceptorChain = (DigestFragment) frag.getChain(xlink.getAcceptorChainId()); 878 } 879 else if (xlink.getAcceptorChainId().equals(frag.getID())) 880 { 881 acceptorChain = frag; 882 } 883 884 boolean hasAcceptorSite = false; 885 if (acceptorChain != null 886 && xlink.getAcceptorPosition() >= acceptorChain.getBegin() 887 && xlink.getAcceptorPosition() <= acceptorChain.getEnd()) 888 { 889 // Acceptor site is within this frag. 890 hasAcceptorSite = true; 891 } 892 893 if (hasDonorSite && hasAcceptorSite) 894 { 895 DigestFragment linkedFrag; 896 if (CollectionUtil.hasValues(frag.getChains())) 897 { 898 linkedFrag = frag; 899 } 900 else 901 { 902 linkedFrag = new DigestFragment(); 903 linkedFrag.addChain(frag.clone()); 904 // Add it back to the pool 905 fragsToAdd.add(linkedFrag); 906 907 fragsToRemove.add(frag); 908 } 909 910 linkedFrag.addXLink(xlink); 911 } 912 else 913 { 914 if (hasDonorSite) donorFrags.add(frag); 915 if (hasAcceptorSite) acceptorFrags.add(frag); 916 } 917 } 918 919 // Link the donors & acceptors in all possible combinations 920 for (DigestFragment donorFrag : donorFrags) 921 { 922 for (DigestFragment acceptorFrag : acceptorFrags) 923 { 924 DigestFragment linkedFrag; 925 if (CollectionUtil.hasValues(donorFrag.getChains())) 926 { 927 linkedFrag = (DigestFragment) donorFrag.clone(); 928 } 929 else 930 { 931 linkedFrag = new DigestFragment(); 932 linkedFrag.addChain(donorFrag.clone()); 933 } 934 935 // Add the acceptor chain (if it isn't already present) 936 Protein acceptorChain = linkedFrag.getChain(xlink.getAcceptorChainId()); 937 if (null == acceptorChain) 938 { 939 linkedFrag.addChain(acceptorFrag.clone()); 940 } 941 942 linkedFrag.addXLink(xlink); 943 // Add it back to the pool 944 rawFrags.add(linkedFrag); 945 } 946 } 947 948 // Now remove the raw frags that were linked. 949 for (DigestFragment donorFrag : donorFrags) 950 { 951 rawFrags.remove(donorFrag); 952 } 953 954 for (DigestFragment acceptorFrag : acceptorFrags) 955 { 956 rawFrags.remove(acceptorFrag); 957 } 958 959 for (DigestFragment frag : fragsToRemove) 960 { 961 rawFrags.remove(frag); 962 } 963 964 rawFrags.addAll(fragsToAdd); 965 } 966 967 // Apply the DigestSettings limits. 968 for (int i = 0; i < rawFrags.size(); i++) 969 { 970 if (! inSettings.meetsCriteria(rawFrags.get(i))) 971 { 972 rawFrags.remove(i--); 973 } 974 } 975 976 return rawFrags; 977 } 978 979 //########################################################################## 980 // INNER CLASS 981 //########################################################################## 982 983 protected class SlidingWindow 984 { 985 private String mChainId; 986 private AminoAcidSet mAminoAcidSet; 987 private StringBuilder[] mFrags; 988 private DigestSettings mDigestSettings; 989 private int mIndex = 1; 990 private int mLength; 991 private int mCurrentFragIndex = 0; 992 993 //----------------------------------------------------------------------- 994 public SlidingWindow(String inChainId, AminoAcidSet inAASet, DigestSettings inSettings) 995 { 996 mChainId = inChainId; 997 mAminoAcidSet = inAASet; 998 mDigestSettings = inSettings; 999 mFrags = new StringBuilder[inSettings.getMaxMissedCleavages() + 3]; 1000 mFrags[0] = new StringBuilder(); 1001 mFrags[1] = new StringBuilder(); 1002 mLength = 2; 1003 } 1004 1005 //----------------------------------------------------------------------- 1006 public List<DigestFragment> push(String inFrag) 1007 { 1008 List<DigestFragment> outFrags = null; 1009 1010 if (mLength < mFrags.length) 1011 { 1012 // Still filling the window 1013 mFrags[mLength++] = new StringBuilder(inFrag); 1014 } 1015 else 1016 { 1017 StringBuilder tmp = mFrags[0]; 1018 1019 for (int i = 1; i < mFrags.length; i++) 1020 { 1021 mFrags[i - 1] = mFrags[i]; 1022 } 1023 1024 mFrags[mFrags.length - 1] = tmp; 1025 mFrags[mFrags.length - 1].setLength(0); 1026 mFrags[mFrags.length - 1].append(inFrag); 1027 1028 mIndex += mFrags[0].length(); 1029 1030 mCurrentFragIndex++; 1031 1032 List<DigestFragment> frags = evaluateCurrentFrag(); 1033 if (frags != null) 1034 { 1035 outFrags = new ArrayList<>(frags); 1036 } 1037 } 1038 1039 return outFrags; 1040 } 1041 1042 //----------------------------------------------------------------------- 1043 public List<DigestFragment> lastPush(String inFrag) 1044 { 1045 List<DigestFragment> outFrags = null; 1046 1047 List<DigestFragment> frags = push(inFrag); 1048 if (frags != null) 1049 { 1050 outFrags = new ArrayList<>(frags); 1051 } 1052 1053 for (int i = 0; i <= mDigestSettings.getMaxMissedCleavages(); i++) 1054 { 1055 frags = push(""); 1056 if (frags != null) 1057 { 1058 if (null == outFrags) 1059 { 1060 outFrags = new ArrayList<>(frags.size()); 1061 } 1062 outFrags.addAll(frags); 1063 } 1064 } 1065 1066 return outFrags; 1067 } 1068 1069 //----------------------------------------------------------------------- 1070 private List<DigestFragment> evaluateCurrentFrag() 1071 { 1072 List<DigestFragment> fragments = null; 1073 1074 int maxMissedCleavages = 0; 1075 if (mDigestSettings != null 1076 && mDigestSettings.getMaxMissedCleavages() != null) 1077 { 1078 maxMissedCleavages = mDigestSettings.getMaxMissedCleavages(); 1079 } 1080 1081 if (null == mDigestSettings 1082 || null == mDigestSettings.getMaxFragmentLength() 1083 || mFrags[1].length() <= mDigestSettings.getMaxFragmentLength()) 1084 { 1085 StringBuilder frag = new StringBuilder(); 1086 for (int i = 1; i < mFrags.length - 1; i++) 1087 { 1088 if (mFrags[i].length() == 0) break; 1089 1090 frag.append(mFrags[i]); 1091 1092 if (null == mDigestSettings 1093 || null == mDigestSettings.getMinFragmentLength() 1094 || frag.length() >= mDigestSettings.getMinFragmentLength()) 1095 { 1096 // Don't get too big 1097 if (mDigestSettings != null 1098 && mDigestSettings.getMaxFragmentLength() != null 1099 && frag.length() > mDigestSettings.getMaxFragmentLength()) 1100 { 1101 break; 1102 } 1103 1104 DigestFragment digestFrag = allocateNewDigestFragment(); 1105 digestFrag.setSequence(frag.toString()); 1106 digestFrag.setBegin(mIndex); 1107 digestFrag.setEnd(mIndex + frag.length() - 1); 1108 digestFrag.setNumUncleavedSites(i - 1); 1109 digestFrag.setBeginFragIndex(mCurrentFragIndex); 1110 digestFrag.setEndFragIndex(mCurrentFragIndex + (i - 1)); 1111 1112 if (null == fragments) 1113 { 1114 fragments = new ArrayList<>(maxMissedCleavages); 1115 } 1116 fragments.add(digestFrag); 1117 } 1118 } 1119 } 1120 1121 return fragments; 1122 } 1123 1124 //----------------------------------------------------------------------- 1125 private DigestFragment allocateNewDigestFragment() 1126 { 1127 DigestFragment frag = new DigestFragment(); 1128 frag.setID(mChainId); 1129 frag.setAminoAcidSet(mAminoAcidSet); 1130 1131 return frag; 1132 } 1133 1134 } 1135} 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195