001package com.hfg.util;
002
003import java.util.ArrayList;
004import java.util.List;
005
006//------------------------------------------------------------------------------
007/**
008 Baeza-Yates, Perleberg string matcher.
009 <div>
010 @author J. Alex Taylor, hairyfatguy.com
011 </div>
012 */
013//------------------------------------------------------------------------------
014// com.hfg XML/HTML Coding Library
015//
016// This library is free software; you can redistribute it and/or
017// modify it under the terms of the GNU Lesser General Public
018// License as published by the Free Software Foundation; either
019// version 2.1 of the License, or (at your option) any later version.
020//
021// This library is distributed in the hope that it will be useful,
022// but WITHOUT ANY WARRANTY; without even the implied warranty of
023// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
024// Lesser General Public License for more details.
025//
026// You should have received a copy of the GNU Lesser General Public
027// License along with this library; if not, write to the Free Software
028// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
029//
030// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
031// jataylor@hairyfatguy.com
032//------------------------------------------------------------------------------
033
034public class BYPStringMatcher
035{
036   public BYPStringPattern mPattern;
037   public String mTargetString;
038
039   private List<BYPStringMatch> mMatches;
040
041   private int[] mMismatchCount = new int[ALPHABET_SIZE];
042   private List<IndexNode> mOffsetList = new ArrayList<>(ALPHABET_SIZE);
043
044   private static final int ALPHABET_SIZE = 256;
045   private static final int MOD256 = 0xff;
046
047   private static final boolean STOP_AFTER_FIRST_MATCH = true;
048   private static final boolean FIND_ALL_MATCHES = false;
049
050   //###########################################################################
051   // CONSTRUCTORS
052   //###########################################################################
053
054   //---------------------------------------------------------------------------
055   private BYPStringMatcher()
056   {
057
058   }
059
060   //---------------------------------------------------------------------------
061   BYPStringMatcher(BYPStringPattern inPattern, String inTargetString)
062   {
063      mPattern = inPattern;
064      mTargetString = inTargetString;
065
066      if (null == mTargetString
067            || 0 == mTargetString.length())
068      {
069         throw new RuntimeException("No string was specified for searching!");
070      }
071      else if (mTargetString.length() < mPattern.length())
072      {
073         throw new RuntimeException("No target string cannot be shorter than the pattern!");
074      }
075
076      setup();
077   }
078
079   //###########################################################################
080   // PUBLIC METHODS
081   //###########################################################################
082
083   //---------------------------------------------------------------------------
084   public boolean matches()
085   {
086      innerFind(0, STOP_AFTER_FIRST_MATCH, mPattern.length());
087      return mMatches != null && start() == 0 && end() == mTargetString.length();
088   }
089
090   //---------------------------------------------------------------------------
091   public boolean find()
092   {
093      return find(0);
094   }
095
096   //---------------------------------------------------------------------------
097   public boolean find(int inStartIndex)
098   {
099      innerFind(inStartIndex, STOP_AFTER_FIRST_MATCH);
100      return mMatches != null;
101   }
102
103   //---------------------------------------------------------------------------
104   public List<BYPStringMatch> findAll()
105   {
106      innerFind(0, FIND_ALL_MATCHES);
107      return mMatches;
108   }
109
110   //---------------------------------------------------------------------------
111   public String group()
112   {
113      return mMatches.get(0).getString();
114   }
115
116   //---------------------------------------------------------------------------
117   public int start()
118   {
119      return mMatches.get(0).start();
120   }
121
122   //---------------------------------------------------------------------------
123   public int end()
124   {
125      return mMatches.get(0).end();
126   }
127
128   //---------------------------------------------------------------------------
129   public int mismatches()
130   {
131      return mMatches.get(0).getNumMismatches();
132   }
133
134   //###########################################################################
135   // PRIVATE METHODS
136   //###########################################################################
137
138   //---------------------------------------------------------------------------
139   private void setup()
140   {
141      if (mPattern.isCaseInsensitive())
142      {
143         mTargetString = mTargetString.toUpperCase();
144      }
145
146      int patternLength = mPattern.length();
147
148      String patternString = mPattern.getPatternString();
149      if (mPattern.isCaseInsensitive())
150      {
151         patternString = patternString.toUpperCase();
152      }
153
154      for (int i = 0; i < ALPHABET_SIZE; i++)
155      {
156         mOffsetList.add(new BYPStringMatcher(). new IndexNode());
157         mMismatchCount[i] = patternLength;
158      }
159
160      for (int i = 0, j = 128; i < patternLength; i++)
161      {
162         mMismatchCount[i] = ALPHABET_SIZE;
163
164         char patternChar = patternString.charAt(i);
165         IndexNode indexNode = mOffsetList.get((int)patternChar);
166         if (indexNode.getOffset() == -1)
167         {
168            indexNode.setOffset(patternLength - i - 1);
169         }
170         else
171         {
172            short nextIndex = indexNode.nextIndex();
173            indexNode.setNextIndex((short)j++);
174            indexNode = mOffsetList.get(indexNode.nextIndex());
175            indexNode.setOffset(patternLength - i - 1);
176            indexNode.setNextIndex(nextIndex);
177         }
178      }
179
180      mMismatchCount[patternLength - 1] = patternLength;
181   }
182
183   //---------------------------------------------------------------------------
184   private void innerFind(int inStartIndex, boolean inStopAfterFirstMatch)
185   {
186      innerFind(inStartIndex, inStopAfterFirstMatch, mTargetString.length());
187   }
188
189   //---------------------------------------------------------------------------
190   private void innerFind(int inStartIndex, boolean inStopAfterFirstMatch, int inSearchLength)
191   {
192      mMatches = null;
193
194      int patternLength = mPattern.length();
195      int maxMismatches = mPattern.getMaxMismatches();
196
197      for (int i = inStartIndex; i < inSearchLength; i++)
198      {
199         IndexNode indexNode = mOffsetList.get((int)mTargetString.charAt(i));
200         int offset;
201         if ((offset = indexNode.getOffset()) >= 0)
202         {
203            mMismatchCount[(i + offset)&MOD256]--;
204            if (indexNode.nextIndex() >= 0)
205            {
206               for (indexNode = mOffsetList.get(indexNode.nextIndex()); indexNode != null; indexNode = mOffsetList.get(indexNode.nextIndex()))
207               {
208                  mMismatchCount[(i + indexNode.getOffset()) & MOD256]--;
209                  if (indexNode.nextIndex() < 0)
210                  {
211                     break;
212                  }
213               }
214            }
215         }
216
217         if (mMismatchCount[i&MOD256] <= maxMismatches)
218         {
219            int start = i - patternLength + 1;
220            if (start >= inStartIndex)
221            {
222//            System.out.println(String.format("Match in position %d with %d mismatches", i-patternLength+1, mMismatchCount[i&MOD256]));
223               if (null == mMatches)
224               {
225                  mMatches = new ArrayList<>(1);
226               }
227
228               mMatches.add(new BYPStringMatch(mTargetString.substring(start, i + 1), start, i + 1).setNumMismatches(mMismatchCount[i & MOD256]));
229
230               if (inStopAfterFirstMatch)
231               {
232                  break;
233               }
234            }
235         }
236
237         mMismatchCount[i&MOD256] = patternLength;
238      }
239   }
240
241   //###########################################################################
242   // INNER CLASS
243   //###########################################################################
244
245   private class IndexNode
246   {
247      int mOffset = -1;
248      short mNext = -1;
249
250
251      public int getOffset()
252      {
253         return mOffset;
254      }
255      public void setOffset(int inValue)
256      {
257         mOffset = inValue;
258      }
259
260      public short nextIndex()
261      {
262         return mNext;
263      }
264
265      public void setNextIndex(short inValue)
266      {
267         mNext = inValue;
268      }
269
270      public String toString()
271      {
272         return "Offset: " + mOffset + "; Next: " + mNext;
273      }
274   }
275}