001package com.hfg.bio;
002
003import java.util.*;
004import java.io.Reader;
005import java.io.IOException;
006
007import com.hfg.bio.seq.Protein;
008import com.hfg.bio.seq.ProteinXLink;
009import com.hfg.bio.seq.ProteinXLinkType;
010import com.hfg.exception.UnmodifyableObjectException;
011import com.hfg.util.CompareUtil;
012import com.hfg.util.StringUtil;
013import com.hfg.util.collection.CollectionUtil;
014import com.hfg.util.collection.OrderedSet;
015
016//------------------------------------------------------------------------------
017/**
018 * Chemical or biological proteolytic agent which can be used to theoretically
019 * digest a Protein.
020 *
021 * @author J. Alex Taylor, hairyfatguy.com
022 */
023//------------------------------------------------------------------------------
024// com.hfg XML/HTML Coding Library
025//
026// This library is free software; you can redistribute it and/or
027// modify it under the terms of the GNU Lesser General Public
028// License as published by the Free Software Foundation; either
029// version 2.1 of the License, or (at your option) any later version.
030//
031// This library is distributed in the hope that it will be useful,
032// but WITHOUT ANY WARRANTY; without even the implied warranty of
033// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
034// Lesser General Public License for more details.
035//
036// You should have received a copy of the GNU Lesser General Public
037// License along with this library; if not, write to the Free Software
038// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
039//
040// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
041// jataylor@hairyfatguy.com
042//------------------------------------------------------------------------------
043
044/*
045
046   What to do if...
047
048      - X-links are defined?
049
050          - Should the DigestSettings include an optional cys-alkylation form? This would
051            allow the user to specify whether the digest should be a 'native digest' or
052            whether the cysteines (including those specified as x-linked) should be considered
053            reduced and alkylated.
054
055      - Multiple identical chains exist?
056
057          - If the fragments aren't x-linked, concatenate the chain ids in the DigestFragment: 'H1/H2'
058
059*/
060
061public class Protease implements Comparable
062{
063   //##########################################################################
064   // PRIVATE FIELDS
065   //##########################################################################
066
067   private static final Set<Protease> sValues = new OrderedSet<>(12);
068
069   private final String mName;
070
071   private String mP6Specificity = "";
072   private String mExcludedP6Residues = "";
073
074   private String mP5Specificity = "";
075   private String mExcludedP5Residues = "";
076
077   private String mP4Specificity = "";
078   private String mExcludedP4Residues = "";
079
080   private String mP3Specificity = "";
081   private String mExcludedP3Residues = "";
082
083   private String mP2Specificity = "";
084   private String mExcludedP2Residues = "";
085   
086   private String mP1Specificity = "";
087   private String mExcludedP1Residues = "";
088   
089   
090   private String mP1PrimeSpecificity   = "";
091   private String mExcludedP1PrimeResidues = "";
092   
093   private String mP2PrimeSpecificity   = "";
094   private String mExcludedP2PrimeResidues = "";
095   
096   private String mP3PrimeSpecificity   = "";
097   private String mExcludedP3PrimeResidues = "";
098
099   private boolean mLocked;
100
101   private static final int NUM_N_TERMINAL_POSITIONS = 6;
102   private static final int NUM_C_TERMINAL_POSITIONS = 3;
103   
104   //##########################################################################
105   // PUBLIC FIELDS
106   //##########################################################################
107
108   public static final Protease TRYPSIN      = new Protease("Trypsin");
109   public static final Protease LYS_C        = new Protease("Lys-C");
110   public static final Protease CHYMOTRYPSIN = new Protease("Chymotrypsin");
111   public static final Protease GLU_C        = new Protease("Glu-C");
112   public static final Protease ASP_N        = new Protease("Asp-N");
113   public static final Protease ASP_N_DE     = new Protease("Asp-N (DE)");
114
115   /**
116    Human thrombin protease.
117    <div>
118    Uses data from Gallwitz, M. et. al. (2012). "The extended cleavage specificity of human thrombin."
119    <i>PLoS ONE,</i> 7 (2).
120    </div>
121    Note: Experiments suggest a preference for P in P2 and R in P3'.
122    */
123   public static final Protease THROMBIN     = new Protease("Thrombin");
124   
125   /**
126    Tobacco Etch virus protease.
127    <div>
128    Uses data from Kostallas, G. et. al. (2011). "Substrate profiling of tobacco Etch virus protease using a novel Fluorescence-Assisted whole-cell assay."
129    <i>PLoS ONE,</i> 6 (1).
130    </div>
131    */
132   public static final Protease TEV          = new Protease("TEV");
133   
134   /**
135    Human rhinovirus 3C protease.
136    <div>
137    Uses data from Cordingley, M.G. et. al. (1990). "Substrate requirements of human rhinovirus 3C protease for peptide cleavage in vitro."
138    <i>Journal of Biological Chemistry,</i> 265 (16), 9062-9065.
139    </div>
140    */
141   public static final Protease HRV_3C       = new Protease("HRV 3C");
142
143   /**
144    Factor Xa protease.
145    <div>
146    Uses data from Harris, J. et. al. (2000). "Rapid and general profiling of protease specificity by using combinatorial fluorogenic substrate libraries."
147    <i>PNAS,</i> 97 (14), 7754-7759.
148    </div>
149    */
150   public static final Protease FACTOR_Xa    = new Protease("Factor Xa");
151   
152   public static final Protease ENTEROKINASE = new Protease("Enterokinase");
153
154   static
155   {
156      TRYPSIN.setP1Specificity("KR")
157            .setExcludedP1PrimeResidues("P")
158            .lock();
159
160      LYS_C.setP1Specificity("K")
161            .lock();
162
163      CHYMOTRYPSIN.setP1Specificity("YFWL")
164            .lock();
165
166      GLU_C.setP1Specificity("E")
167            .lock();
168
169      ASP_N.setP1PrimeSpecificity("D")
170            .lock();
171
172      ASP_N_DE.setP1PrimeSpecificity("DE")
173            .lock();
174      
175      THROMBIN.setP1Specificity("R")
176            .setP1PrimeSpecificity("SAGT")
177            .setExcludedP2PrimeResidues("DE")
178            .lock();
179     
180      TEV.setP6Specificity("E")
181            .setP5Specificity("RSGLVWCEAPQKN")
182            .setP4Specificity("VLGARESWDTIKMNY")
183            .setP3Specificity("Y")
184            .setP2Specificity("FGASVCREQTHLPWDIN")
185            .setP1Specificity("Q")
186            .setP1PrimeSpecificity("GAVSRDECKMLQIT")
187            .lock();
188
189      HRV_3C.setP5Specificity("EQRIDHF")
190            .setP4Specificity("TAVDF")
191            .setP3Specificity("L")
192            .setP2Specificity("F")
193            .setP1Specificity("Q")
194            .setP1PrimeSpecificity("G")
195            .setP2PrimeSpecificity("P")
196            .lock();
197
198      FACTOR_Xa.setP4Specificity("ILPV")
199            .setExcludedP3Residues("P")
200            .setP2Specificity("GAFPSWY")
201            .setP1Specificity("R")
202            .lock();
203
204      ENTEROKINASE
205            .setP5Specificity("D")
206            .setP4Specificity("D")
207            .setP3Specificity("D")
208            .setP2Specificity("D")
209            .setP1Specificity("K")
210            .lock();
211
212   }
213
214
215   //##########################################################################
216   // CONSTRUCTORS
217   //##########################################################################
218
219   //--------------------------------------------------------------------------
220   public Protease(String inName)
221   {
222      mName = inName;
223      sValues.add(this);
224   }
225
226   //##########################################################################
227   // PUBLIC METHODS
228   //##########################################################################
229
230   //--------------------------------------------------------------------------
231   public static Protease[] values()
232   {
233      return sValues.toArray(new Protease[0]);
234   }
235
236   //--------------------------------------------------------------------------
237   public static Protease valueOf(String inName)
238   {
239      Protease requestedProtease = null;
240      if (StringUtil.isSet(inName))
241      {
242         for (Protease protease : sValues)
243         {
244            if (protease.name().equalsIgnoreCase(inName.trim()))
245            {
246               requestedProtease = protease;
247               break;
248            }
249         }
250      }
251
252      return requestedProtease;
253   }
254
255   //--------------------------------------------------------------------------
256   @Override
257   public int hashCode()
258   {
259      return name().hashCode();
260   }
261
262   //--------------------------------------------------------------------------
263   @Override
264   public boolean equals(Object inObj2)
265   {
266      return 0 == compareTo(inObj2);
267   }
268
269   //--------------------------------------------------------------------------
270   @Override
271   public int compareTo(Object inObj2)
272   {
273      return inObj2 instanceof Protease ? CompareUtil.compare(name(), ((Protease)inObj2).name()) : -1;
274   }
275
276   //--------------------------------------------------------------------------
277   public String name()
278   {
279      return mName;
280   }
281
282   //--------------------------------------------------------------------------
283   @Override
284   public String toString()
285   {
286      return name();
287   }
288
289   //--------------------------------------------------------------------------
290   public Protease setP6Specificity(String inValue)
291   {
292      checkLock();
293      mP6Specificity = inValue;
294      return this;
295   }
296
297   //--------------------------------------------------------------------------
298   public String getP6Specificity()
299   {
300      return mP6Specificity;
301   }
302
303   //--------------------------------------------------------------------------
304   public Protease setExcludedP6Residues(String inValue)
305   {
306      checkLock();
307      mExcludedP6Residues = inValue;
308      return this;
309   }
310
311   //--------------------------------------------------------------------------
312   public String getExcludedP6Residues()
313   {
314      return mExcludedP6Residues;
315   }
316
317
318   //--------------------------------------------------------------------------
319   public Protease setP5Specificity(String inValue)
320   {
321      checkLock();
322      mP5Specificity = inValue;
323      return this;
324   }
325
326   //--------------------------------------------------------------------------
327   public String getP5Specificity()
328   {
329      return mP5Specificity;
330   }
331
332   //--------------------------------------------------------------------------
333   public Protease setExcludedP5Residues(String inValue)
334   {
335      checkLock();
336      mExcludedP5Residues = inValue;
337      return this;
338   }
339
340   //--------------------------------------------------------------------------
341   public String getExcludedP5Residues()
342   {
343      return mExcludedP5Residues;
344   }
345
346   
347   //--------------------------------------------------------------------------
348   public Protease setP4Specificity(String inValue)
349   {
350      checkLock();
351      mP4Specificity = inValue;
352      return this;
353   }
354
355   //--------------------------------------------------------------------------
356   public String getP4Specificity()
357   {
358      return mP4Specificity;
359   }
360
361   //--------------------------------------------------------------------------
362   public Protease setExcludedP4Residues(String inValue)
363   {
364      checkLock();
365      mExcludedP4Residues = inValue;
366      return this;
367   }
368
369   //--------------------------------------------------------------------------
370   public String getExcludedP4Residues()
371   {
372      return mExcludedP4Residues;
373   }
374
375   
376   //--------------------------------------------------------------------------
377   public Protease setP3Specificity(String inValue)
378   {
379      checkLock();
380      mP3Specificity = inValue;
381      return this;
382   }
383
384   //--------------------------------------------------------------------------
385   public String getP3Specificity()
386   {
387      return mP3Specificity;
388   }
389
390   //--------------------------------------------------------------------------
391   public Protease setExcludedP3Residues(String inValue)
392   {
393      checkLock();
394      mExcludedP3Residues = inValue;
395      return this;
396   }
397
398   //--------------------------------------------------------------------------
399   public String getExcludedP3Residues()
400   {
401      return mExcludedP3Residues;
402   }
403
404   
405   //--------------------------------------------------------------------------
406   public Protease setP2Specificity(String inValue)
407   {
408      checkLock();
409      mP2Specificity = inValue;
410      return this;
411   }
412
413   //--------------------------------------------------------------------------
414   public String getP2Specificity()
415   {
416      return mP2Specificity;
417   }
418
419
420   //--------------------------------------------------------------------------
421   public Protease setExcludedP2Residues(String inValue)
422   {
423      checkLock();
424      mExcludedP2Residues = inValue;
425      return this;
426   }
427
428   //--------------------------------------------------------------------------
429   public String getExcludedP2Residues()
430   {
431      return mExcludedP2Residues;
432   }
433
434   
435   //--------------------------------------------------------------------------
436   public Protease setP1Specificity(String inValue)
437   {
438      checkLock();
439      mP1Specificity = inValue;
440      return this;
441   }
442
443   //--------------------------------------------------------------------------
444   public String getP1Specificity()
445   {
446      return mP1Specificity;
447   }
448
449
450   //--------------------------------------------------------------------------
451   public Protease setExcludedP1PrimeResidues(String inValue)
452   {
453      checkLock();
454      mExcludedP1PrimeResidues = inValue;
455      return this;
456   }
457
458   //--------------------------------------------------------------------------
459   public String getExcludedP1PrimeResidues()
460   {
461      return mExcludedP1PrimeResidues;
462   }
463
464
465   //--------------------------------------------------------------------------
466   public Protease setP1PrimeSpecificity(String inValue)
467   {
468      checkLock();
469      mP1PrimeSpecificity = inValue;
470      return this;
471   }
472
473   //--------------------------------------------------------------------------
474   public String getP1PrimeSpecificity()
475   {
476      return mP1PrimeSpecificity;
477   }
478
479
480   //--------------------------------------------------------------------------
481   public Protease setExcludedP1Residues(String inValue)
482   {
483      checkLock();
484      mExcludedP1Residues = inValue;
485      return this;
486   }
487
488   //--------------------------------------------------------------------------
489   public String getExcludedP1Residues()
490   {
491      return mExcludedP1Residues;
492   }
493
494
495   //--------------------------------------------------------------------------
496   public Protease setExcludedP2PrimeResidues(String inValue)
497   {
498      checkLock();
499      mExcludedP2PrimeResidues = inValue;
500      return this;
501   }
502
503   //--------------------------------------------------------------------------
504   public String getExcludedP2PrimeResidues()
505   {
506      return mExcludedP2PrimeResidues;
507   }
508   
509   //--------------------------------------------------------------------------
510   public Protease setP2PrimeSpecificity(String inValue)
511   {
512      checkLock();
513      mP2PrimeSpecificity = inValue;
514      return this;
515   }
516
517   //--------------------------------------------------------------------------
518   public String getP2PrimeSpecificity()
519   {
520      return mP2PrimeSpecificity;
521   }
522
523
524   //--------------------------------------------------------------------------
525   public Protease setP3PrimeSpecificity(String inValue)
526   {
527      checkLock();
528      mP3PrimeSpecificity = inValue;
529      return this;
530   }
531
532   //--------------------------------------------------------------------------
533   public String getP3PrimeSpecificity()
534   {
535      return mP3PrimeSpecificity;
536   }
537
538
539   //--------------------------------------------------------------------------
540   public Protease lock()
541   {
542      mLocked = true;
543      return this;
544   }
545
546   //--------------------------------------------------------------------------
547   public List<DigestFragment> digest(Protein inProtein, DigestSettings inSettings)
548   {
549      List<DigestFragment> outFrags;
550
551      if (inSettings.getAlkylatedCys() != null)
552      {
553         Protein proteinCopy = inProtein.clone();
554         proteinCopy.removeXLinks(ProteinXLinkType.DISULFIDE);
555
556         // Reflect that the cysteines in the protein are alkylated.
557         AminoAcidSet aaSet = new AminoAcidSet(inProtein.getAminoAcidSet());
558         aaSet.setMapping('c', inSettings.getAlkylatedCys());
559         aaSet.setMapping('C', inSettings.getAlkylatedCys());
560         proteinCopy.setAminoAcidSet(aaSet);
561
562         // Are there other types of x-links?
563         if (CollectionUtil.hasValues(proteinCopy.getXLinks()))
564         {
565            outFrags = complexDigestion(proteinCopy, inSettings);
566         }
567         else
568         {
569            outFrags = simpleDigestion(proteinCopy, inSettings);
570         }
571
572         for (DigestFragment frag : outFrags)
573         {
574            frag.setAminoAcidSet(aaSet);
575         }
576      }
577      else
578      {
579         // Native digest. Deal with X-links
580         outFrags = complexDigestion(inProtein, inSettings);
581      }
582
583      return outFrags;
584   }
585
586   //--------------------------------------------------------------------------
587   public boolean isCleavageSite(char inP1Residue, char inP1PrimeResidue)
588   {
589      char p1Residue      = Character.toUpperCase(inP1Residue);
590      char p1PrimeResidue = Character.toUpperCase(inP1PrimeResidue);
591
592      return ((mP1Specificity.indexOf(p1Residue) >= 0
593               && mExcludedP1PrimeResidues.indexOf(p1PrimeResidue) == -1)
594              || (mP1PrimeSpecificity.indexOf(p1PrimeResidue) >= 0
595                  && mExcludedP1Residues.indexOf(p1Residue) == -1));
596
597   }
598
599   //--------------------------------------------------------------------------
600   public boolean isCleavageSite(CharSequence inNTerminalResidues, CharSequence inCTerminalResidues)
601   {
602      String nTerminalResidues = inNTerminalResidues.toString().toUpperCase();
603      String cTerminalResidues = inCTerminalResidues.toString().toUpperCase();
604
605      return (
606              ((mP6Specificity.length() == 0  && mExcludedP6Residues.length() == 0 )
607                  || nTerminalResidues.length() < 6
608                  || mP6Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 6)) >= 0
609                  || (mExcludedP6Residues.length() > 0 && mExcludedP6Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 6)) == -1))
610              && ((mP5Specificity.length() == 0  && mExcludedP5Residues.length() == 0 )
611                  || nTerminalResidues.length() < 5
612                  || mP5Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 5)) >= 0
613                  || (mExcludedP5Residues.length() > 0 && mExcludedP5Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 5)) == -1))
614              && ((mP4Specificity.length() == 0  && mExcludedP4Residues.length() == 0 )
615                  || nTerminalResidues.length() < 4
616                  || mP4Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 4)) >= 0
617                  || (mExcludedP4Residues.length() > 0 && mExcludedP4Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 4)) == -1))
618              && ((mP3Specificity.length() == 0  && mExcludedP3Residues.length() == 0 )
619                  || nTerminalResidues.length() < 3
620                  || mP3Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 3)) >= 0
621                  || (mExcludedP3Residues.length() > 0 && mExcludedP3Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 3)) == -1))
622              && ((mP2Specificity.length() == 0  && mExcludedP2Residues.length() == 0 )
623                  || nTerminalResidues.length() < 2
624                  || mP2Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 2)) >= 0
625                  || (mExcludedP2Residues.length() > 0 && mExcludedP2Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 2)) == -1))
626              && ((mP1Specificity.length() == 0  && mExcludedP1Residues.length() == 0 )
627                  || nTerminalResidues.length() < 1
628                  || (mP1Specificity.length() > 0 && mP1Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 1)) >= 0)
629                  || (mExcludedP1Residues.length() > 0 && mExcludedP1Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 1)) == -1))
630              && ((mP1PrimeSpecificity.length() == 0  && mExcludedP1PrimeResidues.length() == 0 )
631                  || cTerminalResidues.length() < 1
632                  || (mP1PrimeSpecificity.length() > 0 && mP1PrimeSpecificity.indexOf(cTerminalResidues.charAt(0)) >= 0)
633                  || (mExcludedP1PrimeResidues.length() > 0 && mExcludedP1PrimeResidues.indexOf(cTerminalResidues.charAt(0)) == -1))
634              && ((mP2PrimeSpecificity.length() == 0 && mExcludedP2PrimeResidues.length() == 0)
635                  || cTerminalResidues.length() < 2
636                  || (mP2PrimeSpecificity.length() > 0 && mP2PrimeSpecificity.indexOf(cTerminalResidues.charAt(1)) >= 0)
637                  || (mExcludedP2PrimeResidues.length() > 0 && mExcludedP2PrimeResidues.indexOf(cTerminalResidues.charAt(1)) == -1))
638              && ((mP3PrimeSpecificity.length() == 0  && mExcludedP3PrimeResidues.length() == 0)
639                  || cTerminalResidues.length() < 3
640                  || (mP3PrimeSpecificity.length() > 0 && mP3PrimeSpecificity.indexOf(cTerminalResidues.charAt(2)) >= 0)
641                  || (mExcludedP3PrimeResidues.length() > 0 && mExcludedP3PrimeResidues.indexOf(cTerminalResidues.charAt(2)) == -1)));
642
643   }
644
645   //##########################################################################
646   // PRIVATE METHODS
647   //##########################################################################
648
649   //--------------------------------------------------------------------------
650   private void checkLock()
651   {
652      if (mLocked) throw new UnmodifyableObjectException("This object is locked and cannot be modified.");
653   }
654
655   //--------------------------------------------------------------------------
656   private List<DigestFragment> simpleDigestion(Protein inProtein, DigestSettings inSettings)
657   {
658      List<DigestFragment> outFrags = new ArrayList<>();
659      
660      if (CollectionUtil.hasValues(inProtein.getChains()))
661      {
662         for (Protein chain : inProtein.getChains())
663         {
664            outFrags.addAll(simpleDigestion(chain, inSettings));
665         }
666      }
667      else
668      {
669         try
670         {
671            Reader seqReader = null;
672            try
673            {
674               seqReader = inProtein.getSequenceReader();
675
676               SlidingWindow fragWindow = new SlidingWindow(inProtein.getID(), inProtein.getAminoAcidSet(), inSettings);
677
678               int bufferSize = NUM_N_TERMINAL_POSITIONS + NUM_C_TERMINAL_POSITIONS;
679               int[] site = new int[bufferSize];
680               for (int i = 0; i < bufferSize; i++)
681               {
682                  site[i] = -1;
683               }
684
685               StringBuilder nTerminalResidues = new StringBuilder();
686               StringBuilder cTerminalResidues = new StringBuilder();
687
688               int residue = seqReader.read();
689               if (residue != -1)
690               {
691                  StringBuilder frag = new StringBuilder((char) residue + "");
692
693                  site[bufferSize - 1] = residue; // Drops in on the right side and slides left
694                  
695                  while ((residue = seqReader.read()) != -1)
696                  {
697                     for (int i = 0; i < bufferSize - 1; i++)
698                     {
699                        site[i] = site[i + 1];
700                     }
701                     
702                     site[bufferSize - 1] = residue;
703
704                     if (-1 == site[NUM_N_TERMINAL_POSITIONS - 1])
705                     {
706                        continue;
707                     }
708
709                     nTerminalResidues.setLength(0);
710                     for (int i = 0; i < NUM_N_TERMINAL_POSITIONS; i++)
711                     {
712                        nTerminalResidues.append((char) site[i]);   
713                     }
714                     
715                     cTerminalResidues.setLength(0);
716                     for (int i = NUM_N_TERMINAL_POSITIONS; i < bufferSize; i++)
717                     {
718                        cTerminalResidues.append((char) site[i]);   
719                     }
720                      
721                     if (isCleavageSite(nTerminalResidues, cTerminalResidues))
722                     {
723                        List<DigestFragment> fragments = fragWindow.push(frag.toString());
724                        if (fragments != null)
725                        {
726                           outFrags.addAll(fragments);
727                        }
728                        frag.setLength(0);
729                        frag.append((char)site[NUM_N_TERMINAL_POSITIONS]);
730                     }
731                     else
732                     {
733                        frag.append((char)site[NUM_N_TERMINAL_POSITIONS]);
734                     }
735                  }
736
737                  // Process residues remaining in the site buffer
738                  while (hasUnprocessedBufferContent(site))
739                  {
740                     for (int i = 0; i < bufferSize - 1; i++)
741                     {
742                        site[i] = site[i + 1];
743                     }
744
745                     site[bufferSize - 1] = -1;
746
747                     if (-1 == site[NUM_N_TERMINAL_POSITIONS - 1] 
748                         || -1 == site[NUM_N_TERMINAL_POSITIONS])
749                     {
750                        continue;
751                     }
752
753                     nTerminalResidues.setLength(0);
754                     for (int i = 0; i < NUM_N_TERMINAL_POSITIONS; i++)
755                     {
756                        int aa = site[i];
757                        nTerminalResidues.append(aa != -1 ? (char) aa : "");
758                     }
759
760                     cTerminalResidues.setLength(0);
761                     for (int i = NUM_N_TERMINAL_POSITIONS; i < bufferSize; i++)
762                     {
763                        int aa = site[i];
764                        cTerminalResidues.append(aa != -1 ? (char) aa : "");
765                     }
766                     
767                     if (isCleavageSite(nTerminalResidues, cTerminalResidues))
768                     {
769                        List<DigestFragment> fragments = fragWindow.push(frag.toString());
770                        if (fragments != null)
771                        {
772                           outFrags.addAll(fragments);
773                        }
774                        frag.setLength(0);
775                        frag.append((char)site[NUM_N_TERMINAL_POSITIONS]);
776                     }
777                     else
778                     {
779                        frag.append((char)site[NUM_N_TERMINAL_POSITIONS]);
780                     }
781                  }
782                  
783                  List<DigestFragment> fragments = fragWindow.lastPush(frag.toString());
784                  if (fragments != null)
785                  {
786                     outFrags.addAll(fragments);
787                  }
788               }
789            }
790            finally
791            {
792               if (seqReader != null) seqReader.close();
793            }
794         }
795         catch (IOException e)
796         {
797            throw new RuntimeException(e);
798         }
799      }
800      
801      // Apply the DigestSettings limits.
802      for (int i = 0; i < outFrags.size(); i++)
803      {
804         if (! inSettings.meetsCriteria(outFrags.get(i)))
805         {
806            outFrags.remove(i--);
807         }
808      }
809
810      return outFrags;
811   }
812
813   //--------------------------------------------------------------------------
814   private boolean hasUnprocessedBufferContent(int[] inBuffer)
815   {
816      boolean hasUnprocessedContent = false;
817
818      for (int i = NUM_N_TERMINAL_POSITIONS - 1; i < NUM_N_TERMINAL_POSITIONS + NUM_C_TERMINAL_POSITIONS; i++)
819      {
820         if (inBuffer[i] != -1)
821         {
822            hasUnprocessedContent = true;
823            break;
824         }
825      }
826      
827      return hasUnprocessedContent;
828   }
829   
830   //--------------------------------------------------------------------------
831   private List<DigestFragment> complexDigestion(Protein inProtein,
832                                               DigestSettings inSettings)
833   {
834      // Initially disable limits when finding fragments.
835      // We'll apply the desired settings after all the fragments have been linked up.
836      DigestSettings settingsWithoutLimits = inSettings.clone();
837      settingsWithoutLimits.setMinFragmentLength(null);
838      settingsWithoutLimits.setMaxFragmentLength(null);
839      settingsWithoutLimits.setMinFragmentMass(null);
840      settingsWithoutLimits.setMaxFragmentMass(null);
841
842      List<DigestFragment> rawFrags = simpleDigestion(inProtein, settingsWithoutLimits);
843
844      // For ea. x-link, bind it to the combinations of raw fragments
845      for (ProteinXLink xlink : inProtein.getXLinks())
846      {
847         List<DigestFragment> donorFrags    = new ArrayList<>();
848         List<DigestFragment> acceptorFrags = new ArrayList<>();
849         Set<DigestFragment>  fragsToAdd    = new HashSet<>();
850         Set<DigestFragment>  fragsToRemove = new HashSet<>();
851
852         for (DigestFragment frag : rawFrags)
853         {
854            DigestFragment donorChain = null;
855            if (CollectionUtil.hasValues(frag.getChains()))
856            {
857               donorChain = (DigestFragment) frag.getChain(xlink.getDonorChainId());
858            }
859            else if (xlink.getDonorChainId().equals(frag.getID()))
860            {
861               donorChain = frag;
862            }
863
864            boolean hasDonorSite = false;
865            if (donorChain != null
866                && xlink.getDonorPosition() >= donorChain.getBegin()
867                && xlink.getDonorPosition() <= donorChain.getEnd())
868            {
869               // Donor site is within this frag.
870               hasDonorSite = true;
871            }
872
873
874            DigestFragment acceptorChain = null;
875            if (CollectionUtil.hasValues(frag.getChains()))
876            {
877               acceptorChain = (DigestFragment) frag.getChain(xlink.getAcceptorChainId());
878            }
879            else if (xlink.getAcceptorChainId().equals(frag.getID()))
880            {
881               acceptorChain = frag;
882            }
883
884            boolean hasAcceptorSite = false;
885            if (acceptorChain != null
886                && xlink.getAcceptorPosition() >= acceptorChain.getBegin()
887                && xlink.getAcceptorPosition() <= acceptorChain.getEnd())
888            {
889               // Acceptor site is within this frag.
890               hasAcceptorSite = true;
891            }
892
893            if (hasDonorSite && hasAcceptorSite)
894            {
895               DigestFragment linkedFrag;
896               if (CollectionUtil.hasValues(frag.getChains()))
897               {
898                  linkedFrag = frag;
899               }
900               else
901               {
902                  linkedFrag = new DigestFragment();
903                  linkedFrag.addChain(frag.clone());
904                  // Add it back to the pool
905                  fragsToAdd.add(linkedFrag);
906
907                  fragsToRemove.add(frag);
908               }
909
910               linkedFrag.addXLink(xlink);
911            }
912            else
913            {
914               if (hasDonorSite)    donorFrags.add(frag);
915               if (hasAcceptorSite) acceptorFrags.add(frag);
916            }
917         }
918
919         // Link the donors & acceptors in all possible combinations
920         for (DigestFragment donorFrag : donorFrags)
921         {
922            for (DigestFragment acceptorFrag : acceptorFrags)
923            {
924               DigestFragment linkedFrag;
925               if (CollectionUtil.hasValues(donorFrag.getChains()))
926               {
927                  linkedFrag = (DigestFragment) donorFrag.clone();
928               }
929               else
930               {
931                  linkedFrag = new DigestFragment();
932                  linkedFrag.addChain(donorFrag.clone());
933               }
934
935               // Add the acceptor chain (if it isn't already present)
936               Protein acceptorChain = linkedFrag.getChain(xlink.getAcceptorChainId());
937               if (null == acceptorChain)
938               {
939                  linkedFrag.addChain(acceptorFrag.clone());
940               }
941
942               linkedFrag.addXLink(xlink);
943               // Add it back to the pool
944               rawFrags.add(linkedFrag);
945            }
946         }
947
948         // Now remove the raw frags that were linked.
949         for (DigestFragment donorFrag : donorFrags)
950         {
951            rawFrags.remove(donorFrag);
952         }
953
954         for (DigestFragment acceptorFrag : acceptorFrags)
955         {
956            rawFrags.remove(acceptorFrag);
957         }
958
959         for (DigestFragment frag : fragsToRemove)
960         {
961            rawFrags.remove(frag);
962         }
963
964         rawFrags.addAll(fragsToAdd);
965      }
966
967      // Apply the DigestSettings limits.
968      for (int i = 0; i < rawFrags.size(); i++)
969      {
970         if (! inSettings.meetsCriteria(rawFrags.get(i)))
971         {
972            rawFrags.remove(i--);
973         }
974      }
975
976      return rawFrags;
977   }
978
979   //##########################################################################
980   // INNER CLASS
981   //##########################################################################
982
983   protected class SlidingWindow
984   {
985      private String          mChainId;
986      private AminoAcidSet    mAminoAcidSet;
987      private StringBuilder[] mFrags;
988      private DigestSettings  mDigestSettings;
989      private int             mIndex = 1;
990      private int             mLength;
991      private int             mCurrentFragIndex = 0;
992
993      //-----------------------------------------------------------------------
994      public SlidingWindow(String inChainId, AminoAcidSet inAASet, DigestSettings inSettings)
995      {
996         mChainId        = inChainId;
997         mAminoAcidSet   = inAASet;
998         mDigestSettings = inSettings;
999         mFrags    = new StringBuilder[inSettings.getMaxMissedCleavages() + 3];
1000         mFrags[0] = new StringBuilder();
1001         mFrags[1] = new StringBuilder();
1002         mLength   = 2;
1003      }
1004
1005      //-----------------------------------------------------------------------
1006      public List<DigestFragment> push(String inFrag)
1007      {
1008         List<DigestFragment> outFrags = null;
1009
1010         if (mLength < mFrags.length)
1011         {
1012            // Still filling the window
1013            mFrags[mLength++] = new StringBuilder(inFrag);
1014         }
1015         else
1016         {
1017            StringBuilder tmp = mFrags[0];
1018
1019            for (int i = 1; i < mFrags.length; i++)
1020            {
1021               mFrags[i - 1] = mFrags[i];
1022            }
1023
1024            mFrags[mFrags.length - 1] = tmp;
1025            mFrags[mFrags.length - 1].setLength(0);
1026            mFrags[mFrags.length - 1].append(inFrag);
1027
1028            mIndex += mFrags[0].length();
1029
1030            mCurrentFragIndex++;
1031
1032            List<DigestFragment> frags = evaluateCurrentFrag();
1033            if (frags != null)
1034            {
1035               outFrags = new ArrayList<>(frags);
1036            }
1037         }
1038
1039         return outFrags;
1040      }
1041
1042      //-----------------------------------------------------------------------
1043      public List<DigestFragment> lastPush(String inFrag)
1044      {
1045         List<DigestFragment> outFrags = null;
1046
1047         List<DigestFragment> frags = push(inFrag);
1048         if (frags != null)
1049         {
1050            outFrags = new ArrayList<>(frags);
1051         }
1052
1053         for (int i = 0; i <= mDigestSettings.getMaxMissedCleavages(); i++)
1054         {
1055            frags = push("");
1056            if (frags != null)
1057            {
1058               if (null == outFrags)
1059               {
1060                  outFrags = new ArrayList<>(frags.size());
1061               }
1062               outFrags.addAll(frags);
1063            }
1064         }
1065
1066         return outFrags;
1067      }
1068
1069      //-----------------------------------------------------------------------
1070      private List<DigestFragment> evaluateCurrentFrag()
1071      {
1072         List<DigestFragment> fragments = null;
1073
1074         int maxMissedCleavages = 0;
1075         if (mDigestSettings != null
1076             && mDigestSettings.getMaxMissedCleavages() != null)
1077         {
1078            maxMissedCleavages = mDigestSettings.getMaxMissedCleavages();
1079         }
1080
1081         if (null == mDigestSettings
1082             || null == mDigestSettings.getMaxFragmentLength()
1083             || mFrags[1].length() <= mDigestSettings.getMaxFragmentLength())
1084         {
1085            StringBuilder frag = new StringBuilder();
1086            for (int i = 1; i < mFrags.length - 1; i++)
1087            {
1088               if (mFrags[i].length() == 0) break;
1089
1090               frag.append(mFrags[i]);
1091
1092               if (null == mDigestSettings
1093                   || null == mDigestSettings.getMinFragmentLength()
1094                   || frag.length() >= mDigestSettings.getMinFragmentLength())
1095               {
1096                  // Don't get too big
1097                  if (mDigestSettings != null
1098                      && mDigestSettings.getMaxFragmentLength() != null
1099                      && frag.length() > mDigestSettings.getMaxFragmentLength())
1100                  {
1101                     break;
1102                  }
1103
1104                  DigestFragment digestFrag = allocateNewDigestFragment();
1105                  digestFrag.setSequence(frag.toString());
1106                  digestFrag.setBegin(mIndex);
1107                  digestFrag.setEnd(mIndex + frag.length() - 1);
1108                  digestFrag.setNumUncleavedSites(i - 1);
1109                  digestFrag.setBeginFragIndex(mCurrentFragIndex);
1110                  digestFrag.setEndFragIndex(mCurrentFragIndex + (i - 1));
1111
1112                  if (null == fragments)
1113                  {
1114                     fragments = new ArrayList<>(maxMissedCleavages);
1115                  }
1116                  fragments.add(digestFrag);
1117               }
1118            }
1119         }
1120
1121         return fragments;
1122      }
1123
1124      //-----------------------------------------------------------------------
1125      private DigestFragment allocateNewDigestFragment()
1126      {
1127         DigestFragment frag = new DigestFragment();
1128         frag.setID(mChainId);
1129         frag.setAminoAcidSet(mAminoAcidSet);
1130
1131         return frag;
1132      }
1133
1134   }
1135}
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195