001package com.hfg.bio.seq.pattern;
002
003import java.util.ArrayList;
004import java.util.List;
005import java.util.regex.Matcher;
006
007import com.hfg.bio.seq.BioSequenceType;
008import com.hfg.bio.seq.Protein;
009import com.hfg.bio.seq.SeqLocation;
010
011//------------------------------------------------------------------------------
012/**
013 Container for a protein pattern (motif).
014 <p>
015 From the <a href='http://prosite.expasy.org/prosuser.html#conv_pa'>PROSITE user manual</a>:
016 </p>
017 <p>
018 The patterns are described using the following conventions:
019 <ul>
020   <li>The standard IUPAC one-letter codes for the amino acids are used.</li>
021   <li>The symbol 'x' is used for a position where any amino acid is accepted.</li>
022   <li>Ambiguities are indicated by listing the acceptable amino acids for a given position, between square parentheses '[ ]'.
023       For example: [ALT] stands for Ala or Leu or Thr.</li>
024   <li>Ambiguities are also indicated by listing between a pair of curly brackets '{ }' the amino acids that are not accepted
025       at a given position. For example: {AM} stands for any amino acid except Ala and Met.</li>
026   <li>Each element in a pattern is separated from its neighbor by a '-'.</li>
027   <li>Repetition of an element of the pattern can be indicated by following that element with a numerical value or a numerical
028       range between parenthesis. Examples: x(3) corresponds to x-x-x, x(2,4) corresponds to x-x or x-x-x or x-x-x-x.</li>
029   <li>When a pattern is restricted to either the N- or C-terminal of a sequence, that pattern either starts with a '&lt;' symbol
030       or respectively ends with a '&gt;' symbol. In some rare cases (e.g. PS00267 or PS00539), '&gt;' can also occur inside square
031       brackets for the C-terminal element. 'F-[GSTV]-P-R-L-[G&gt;]' means that either 'F-[GSTV]-P-R-L-G' or 'F-[GSTV]-P-R-L&gt;' are considered.</li>
032   <li>A period ends the pattern.</li>
033 </ul>
034 <div>
035   Examples:
036   <pre>
037    PA   [AC]-x-V-x(4)-{ED}.
038   </pre>
039   This pattern is translated as: [Ala or Cys]-any-Val-any-any-any-any-{any but Glu or Asp}
040   <pre>
041    PA   &lt;A-x-[ST](2)-x(0,1)-V.
042   </pre>
043   This pattern, which must be in the N-terminal of the sequence ('&lt;'), is translated as: Ala-any-[Ser or Thr]-[Ser or Thr]-(any or none)-Val
044 </div>
045 @author J. Alex Taylor, hairyfatguy.com
046 */
047//------------------------------------------------------------------------------
048// com.hfg Library
049//
050// This library is free software; you can redistribute it and/or
051// modify it under the terms of the GNU Lesser General Public
052// License as published by the Free Software Foundation; either
053// version 2.1 of the License, or (at your option) any later version.
054//
055// This library is distributed in the hope that it will be useful,
056// but WITHOUT ANY WARRANTY; without even the implied warranty of
057// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
058// Lesser General Public License for more details.
059//
060// You should have received a copy of the GNU Lesser General Public
061// License along with this library; if not, write to the Free Software
062// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
063//
064// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
065// jataylor@hairyfatguy.com
066//------------------------------------------------------------------------------
067
068public class ProteinPattern extends SeqPattern<Protein, ProteinPatternMatch>
069{
070   //###########################################################################
071   // CONSTRUCTORS
072   //###########################################################################
073
074   //--------------------------------------------------------------------------
075   public ProteinPattern(String inPrositePattern)
076   {
077      super(inPrositePattern);
078   }
079
080   //###########################################################################
081   // PUBLIC METHODS
082   //###########################################################################
083
084   //--------------------------------------------------------------------------
085   public BioSequenceType getBioSequenceType()
086   {
087      return BioSequenceType.PROTEIN;
088   }
089
090   //--------------------------------------------------------------------------
091   public ProteinPattern setIgnoreGaps(boolean inValue)
092   {
093      return (ProteinPattern) super.setIgnoreGaps(inValue);
094   }
095
096   //--------------------------------------------------------------------------
097   @Override
098   public ProteinPattern setMaxMismatches(int inValue)
099   {
100      return (ProteinPattern) super.setMaxMismatches(inValue);
101   }
102
103   //--------------------------------------------------------------------------
104   protected ProteinPatternMatch createMatch(String inSeq, SeqLocation inLocation)
105   {
106      return new ProteinPatternMatch(this, inSeq, inLocation);
107   }
108
109}