001package com.hfg.xml.parser;
002
003import java.util.*;
004import java.io.InputStream;
005import java.io.IOException;
006import java.io.InputStreamReader;
007import java.io.BufferedReader;
008import javax.xml.parsers.SAXParser;
009
010import org.xml.sax.*;
011import org.xml.sax.ext.LexicalHandler;
012import org.xml.sax.helpers.AttributesImpl;
013
014import com.hfg.util.BooleanUtil;
015import com.hfg.xml.Doctype;
016import com.hfg.xml.XMLNamespace;
017
018//------------------------------------------------------------------------------
019/**
020  SaxyParser is a lightweight SAX parser.
021  <div>
022  Important (for me) differences between SaxyParser and Xerces:
023   <ul>
024    <li>SaxyParser does NOT close InputSources upon the completion of parsing.</li>
025    <li>SaxyParser can handle the interleaving of content and subtags.</li>
026   </ul>
027  </div>
028  @author J. Alex Taylor, hairyfatguy.com
029 */
030//------------------------------------------------------------------------------
031// com.hfg XML/HTML Coding Library
032//
033// This library is free software; you can redistribute it and/or
034// modify it under the terms of the GNU Lesser General Public
035// License as published by the Free Software Foundation; either
036// version 2.1 of the License, or (at your option) any later version.
037//
038// This library is distributed in the hope that it will be useful,
039// but WITHOUT ANY WARRANTY; without even the implied warranty of
040// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
041// Lesser General Public License for more details.
042//
043// You should have received a copy of the GNU Lesser General Public
044// License along with this library; if not, write to the Free Software
045// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
046//
047// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
048// jataylor@hairyfatguy.com
049//------------------------------------------------------------------------------
050
051public class SaxyParser extends SAXParser implements XMLReader
052{
053   //###########################################################################
054   // PUBLIC FIELDS
055   //###########################################################################
056
057   /**
058    Parser property to enable entity expansion.
059    <pre>
060    http://hairyfatguy.com/sax/properties/entity-expansion
061    </pre>
062    False by default.
063    */
064   public static final String ENTITY_EXPANSION_PROPERTY = "http://hairyfatguy.com/sax/properties/entity-expansion";
065
066
067   /**
068    Parser property to enable strict parsing mode.
069    <pre>
070    http://hairyfatguy.com/sax/properties/strict
071    </pre>
072    False by default.
073    */
074   public static final String STRICT_PROPERTY = "http://hairyfatguy.com/sax/properties/strict";
075
076   //###########################################################################
077   // PRIVATE FIELDS
078   //###########################################################################
079
080   private Doctype         mDoctype;
081
082   private ContentHandler  mContentHandler;
083   private LexicalHandler  mLexicalHandler;
084   private DTDHandler      mDTDHandler;
085   private ErrorHandler    mErrHandler;
086   private EntityResolver  mEntityResolver;
087
088   private boolean         debug = false;
089   private boolean         mExpandEntities = false;
090   private boolean         mStrict = false;
091
092   private BufferedReader  mBufferedReader;
093   private int             mLineCount;
094   private int             mColumn;
095   private boolean         mEOL;
096
097   private char[]          mTempBuffer = new char[100];
098
099   private int             mState;
100   private Stack           mTagStack;
101   private Stack           mStateStack;
102   private int             mDepth;
103   private Map             mNamespaceMap;
104   private Stack<Namespace> mDefaultNamespaceStack;
105
106   private boolean mLenientHTMLParsing;
107   private Collection<String> mLenientHTMLEmptyTags = sLenientHTMLEmptyTags;
108
109   // Enumerated parser states
110   private static final int INITIAL                = 0;
111   private static final int IN_START_OF_TAG        = 1;
112   private static final int IN_XML_DECLARATION     = 2;
113   private static final int IN_DOCTYPE             = 3;
114   private static final int IN_START_TAG           = 4;
115   private static final int IN_END_TAG             = 5;
116   private static final int IN_CONTENT             = 6;
117   private static final int IN_ENTITY              = 7;
118   private static final int IN_CDATA               = 8;
119   private static final int IN_COMMENT             = 9;
120   private static final int IN_EMPTY_TAG           = 10;
121   private static final int IN_MIDDLE_OF_START_TAG = 11;
122   private static final int IN_ATT_NAME            = 12;
123   private static final int IN_ATT_VALUE           = 13;
124   private static final int DONE                   = 14;
125
126   private static final int TAG_START = '<';
127   private static final int TAG_END   = '>';
128   private static final int ENTITY_START = '&';
129   private static int       MAX_ENTITY_LENGTH = 12;
130
131   private static String XMLNS = "xmlns".intern();
132
133   private static final String LEXICAL_HANDLER_PROPERTY = "http://xml.org/sax/properties/lexical-handler";
134
135
136   private static Set<String> sLenientHTMLEmptyTags;
137   private static Set<String> sLenientHTMLTags;
138
139   static
140   {
141      sLenientHTMLEmptyTags = new HashSet<>(10);
142      sLenientHTMLEmptyTags.add("br");
143      sLenientHTMLEmptyTags.add("img");
144      sLenientHTMLEmptyTags.add("link");
145      sLenientHTMLEmptyTags.add("meta");
146
147      sLenientHTMLTags = new HashSet<>(10);
148      sLenientHTMLTags.add("p");
149   }
150
151   //###########################################################################
152   // CONSTRUCTORS
153   //###########################################################################
154
155   //---------------------------------------------------------------------------
156   public SaxyParser()
157   {
158      super();
159   }
160
161   //###########################################################################
162   // PUBLIC METHODS
163   //###########################################################################
164
165   //----------------------------------------------------------------------
166   public SaxyParser setLenientHTMLParsing(boolean inValue)
167   {
168      mLenientHTMLParsing = inValue;
169      return this;
170   }
171
172   //----------------------------------------------------------------------
173   public SaxyParser setLenientHTMLEmptyTags(Collection<String> inEmptyTags)
174   {
175      mLenientHTMLEmptyTags = inEmptyTags;
176      return this;
177   }
178
179   //---------------------------------------------------------------------------
180   /**
181    After parsing, the Doctype (if one was set) can be retrieved here.
182    */
183   public Doctype getDoctype()
184   {
185      return mDoctype;
186   }
187
188   // METHODS REQUIRED TO EXTEND SaxParser
189
190   //---------------------------------------------------------------------------
191   @SuppressWarnings("deprecation")
192   public Parser getParser()
193   throws SAXException
194   {
195      throw new SAXException("getParser() not supported. User getXMLReader().");
196   }
197
198   //---------------------------------------------------------------------------
199   public XMLReader getXMLReader()
200   {
201      return this;
202   }
203
204   //---------------------------------------------------------------------------
205   public boolean isNamespaceAware()
206   {
207      return true;
208   }
209
210   //---------------------------------------------------------------------------
211   public boolean isValidating()
212   {
213      return false;
214   }
215
216   //---------------------------------------------------------------------------
217   public void setProperty(String inName, Object inValue)
218   throws SAXNotRecognizedException
219   {
220      if (inName.equals(LEXICAL_HANDLER_PROPERTY))
221      {
222         setLexicalHandler((LexicalHandler)inValue);
223      }
224      else if (inName.equals(ENTITY_EXPANSION_PROPERTY))
225      {
226         mExpandEntities = BooleanUtil.valueOf(inValue);
227      }
228      else if (inName.equals(STRICT_PROPERTY))
229      {
230         mStrict = BooleanUtil.valueOf(inValue);
231      }
232      else
233      {
234         throw new SAXNotRecognizedException("Property '" + inName + "' not supported.");
235      }
236   }
237
238   //---------------------------------------------------------------------------
239   public Object getProperty(String inName)
240   throws SAXNotRecognizedException
241   {
242      Object obj;
243
244      if (inName.equals(LEXICAL_HANDLER_PROPERTY))
245      {
246         obj = getLexicalHandler();
247      }
248      else if (inName.equals(ENTITY_EXPANSION_PROPERTY))
249      {
250         obj = new Boolean(mExpandEntities);
251      }
252      else if (inName.equals(STRICT_PROPERTY))
253      {
254         obj = new Boolean(mStrict);
255      }
256      else
257      {
258         throw new SAXNotRecognizedException("Property '" + inName + "' not supported.");
259      }
260
261      return obj;
262   }
263
264   // METHODS REQUIRED BY THE XMLReader interface
265
266   //---------------------------------------------------------------------------
267   public ContentHandler getContentHandler()
268   {
269      return mContentHandler;
270   }
271
272   //---------------------------------------------------------------------------
273   public void setContentHandler(ContentHandler handler)
274   {
275      mContentHandler = handler;
276   }
277
278   //---------------------------------------------------------------------------
279   public DTDHandler getDTDHandler()
280   {
281      return mDTDHandler;
282   }
283
284   //---------------------------------------------------------------------------
285   public void setDTDHandler(DTDHandler handler)
286   {
287      mDTDHandler = handler;
288   }
289
290   //---------------------------------------------------------------------------
291   public ErrorHandler getErrorHandler()
292   {
293      return mErrHandler;
294   }
295
296   //---------------------------------------------------------------------------
297   public void setErrorHandler(ErrorHandler handler)
298   {
299      mErrHandler = handler;
300   }
301
302   //---------------------------------------------------------------------------
303   public EntityResolver getEntityResolver()
304   {
305      return mEntityResolver;
306   }
307
308   //---------------------------------------------------------------------------
309   public void setEntityResolver(EntityResolver resolver)
310   {
311      mEntityResolver = resolver;
312   }
313
314   //---------------------------------------------------------------------------
315   public void setFeature(String inName, boolean inValue)
316   throws SAXNotRecognizedException
317   {
318      if (inName.equals("http://xml.org/sax/features/namespaces"))
319      {
320         // Namespaces is always on.
321      }
322      else if (inName.equals("http://xml.org/sax/features/namespace-prefixes"))
323      {
324         // Always set to false.
325      }
326      else
327      {
328         throw new SAXNotRecognizedException("Feature '" + inName + "' not supported.");
329      }
330   }
331
332   //---------------------------------------------------------------------------
333   public boolean getFeature(String inName)
334   throws SAXNotRecognizedException
335   {
336      boolean outValue = false;
337
338      if (inName.equals("http://xml.org/sax/features/namespaces"))
339      {
340         outValue = true;
341      }
342      else if (inName.equals("http://xml.org/sax/features/namespace-prefixes"))
343      {
344         outValue = false;
345      }
346      else
347      {
348         throw new SAXNotRecognizedException("Feature '" + inName + "' not supported.");
349      }
350
351      return outValue;
352   }
353/*
354   //---------------------------------------------------------------------------
355   public void setLocale(Locale locale)
356   throws SAXException
357   {
358      throw new SAXException("setLocale() is not supported by this parser.");
359   }
360*/
361
362
363   //---------------------------------------------------------------------------
364   public LexicalHandler getLexicalHandler()
365   {
366      return mLexicalHandler;
367   }
368
369   //---------------------------------------------------------------------------
370   public void setLexicalHandler(LexicalHandler handler)
371   {
372      mLexicalHandler = handler;
373   }
374
375
376   //---------------------------------------------------------------------------
377   public void parse(String inSystemId)
378   throws SAXException, IOException
379   {
380      parse(new InputSource(inSystemId));
381   }
382
383   //---------------------------------------------------------------------------
384   public XMLNamespace getCurrentDefaultNamespace()
385   {
386      return (mDefaultNamespaceStack.size() > 0 ? XMLNamespace.getNamespace(mDefaultNamespaceStack.peek().getURI()) : null);
387   }
388
389   //---------------------------------------------------------------------------
390   private int nextChar()
391   throws IOException
392   {
393      int c = mBufferedReader.read();
394      if (c == -1) throw new EOFException();
395
396      // Need to map \r, \r\n, and \n to \n
397      // See XML spec section 2.11
398      if (c <= '\n')
399      {
400         if (c == '\n' && mEOL)
401         {
402            mEOL = false;
403            c = nextChar();
404         }
405         else if (mEOL)
406         {
407            mEOL = false;
408         }
409         else if (c == '\n')
410         {
411            mLineCount++;
412            mColumn = 0;
413         }
414         else if (c == '\r')
415         {
416            mEOL = true;
417            c = '\n';
418            mLineCount++;
419            mColumn = 0;
420         }
421         else
422         {
423            mColumn++;
424         }
425      }
426      else
427      {
428         mColumn++;
429      }
430
431      return c;
432   }
433
434   //---------------------------------------------------------------------------
435   private void skipEndTag(int c)
436   throws SAXException, IOException
437   {
438      String openTag;
439
440      try
441      {
442         openTag = (String) mTagStack.peek();
443      }
444      catch (EmptyStackException e)
445      {
446         StringBuffer extraEndTag = new StringBuffer(c);
447         while ((c = nextChar()) != TAG_END)
448         {
449            extraEndTag.append(c);
450         }
451         throw new SAXException("End tag '" + extraEndTag + "' didn't have a matching start tag!");
452      }
453
454      mTempBuffer[0] = (char)c;
455      int bytesToRead = openTag.length();
456      int bytesRead = mBufferedReader.read(mTempBuffer, 1, bytesToRead);
457      // Calls to read() are not guaranteed to read the full requested length.
458      // Allow for partial reads.
459      while (bytesRead < bytesToRead)
460      {
461         if (bytesRead <= 0)
462         {
463            throw new SAXException("Expected end tag '" + openTag + "' but reached the end of the file.");
464         }
465
466         bytesRead += mBufferedReader.read(mTempBuffer, bytesRead + 1, bytesToRead - bytesRead);
467      }
468
469      mColumn += bytesToRead;
470
471      if (! openTag.equals(new String(mTempBuffer, 0, openTag.length())))
472      {
473         if (! mLenientHTMLParsing || ! sLenientHTMLTags.contains(openTag.toLowerCase()))
474         {
475            throw new SAXException("Expected end tag '" + openTag + "' but found '"
476                  + (new String(mTempBuffer, 0, openTag.length())) + "' instead!"
477                  + " Line: " + mLineCount + " Col: " + mColumn);
478         }
479      }
480
481      if (mTempBuffer[bytesToRead] != TAG_END)
482      {
483         throw new SAXException("Problem with end tag '" + mTempBuffer[bytesToRead]
484                                + "'! Line: " + mLineCount + " Col: " + mColumn);
485      }
486   }
487
488   //---------------------------------------------------------------------------
489   public void parse(InputSource source)
490   throws SAXException, IOException
491   {
492      initNamespaceStructures();
493
494      mStateStack = new Stack();
495      mTagStack   = new Stack();
496      mDepth = 0;
497      XMLStringBuffer buffer = new XMLStringBuffer();
498      XMLStringBuffer entity = new XMLStringBuffer();
499      String  tagName    = null;
500      String  tagURI     = null;
501      String  tagLocalName = null;
502      AttributesImpl attributes = new AttributesImpl();
503      String  attName    = null;
504      mLineCount = 1;
505      mColumn    = 0;
506      mEOL       = false;
507
508//      int quoteChar = '"';
509      int quoteChar = -1;
510
511      // I'm not sure of any other way that will work to even out the differences between character and binary sources.
512      try
513      {
514         mBufferedReader = new BufferedReader(source.getCharacterStream(), 8192);
515      }
516      catch (NullPointerException e)
517      {
518         mBufferedReader = new BufferedReader(new InputStreamReader(source.getByteStream()), 8192);
519      }
520
521      mContentHandler.startDocument();
522
523      mState = INITIAL;
524      pushState();
525
526      try
527      {
528         int c;
529         while (mState != DONE)
530         {
531            c = nextChar();
532
533            switch (mState)
534            {
535               case IN_CONTENT:
536                  while (true)
537                  {
538                     // We are in tag content
539                     if (c == TAG_START)
540                     {
541                        pushState();
542                        mState = IN_START_OF_TAG;
543                        if (buffer.length() > 0)
544                        {
545                           if (buffer.hasNonwhitespaceContent())
546                           {
547                              mContentHandler.characters(buffer.getCharArray(), 0, buffer.length());
548                           }
549                           buffer.clear();
550                        }
551                        break;
552                     }
553                     else if (c == ENTITY_START)
554                     {
555                        pushState();
556                        mState = IN_ENTITY;
557                        entity.clear();
558                        break;
559                     }
560                     else
561                     {
562                        buffer.append((char) c);
563                     }
564
565                     c = nextChar();
566                  }
567                  break;
568
569
570
571               case INITIAL:
572                  while (true)
573                  {
574                     if (c == -1)
575                     {
576                        mState = DONE;
577                        break;
578                     }
579                     else if (c == TAG_START)
580                     {
581                        pushState();
582                        mState = IN_START_OF_TAG;
583                        break;
584                     }
585                     else if (!Character.isWhitespace((char) c))
586                     {
587                        throw new SAXException("Unexpected text outside of tags (" + (char) c + ")!"
588                                               + " Line: " + mLineCount + " Col: " + mColumn);
589                     }
590
591                     c = nextChar();
592                  }
593                  break;
594
595
596
597               case IN_START_OF_TAG:
598                  // What type of tag does it look like we've wandered into?
599                  if (c == '/')
600                  {
601                     popState();
602                     mState = IN_END_TAG;
603                     tagName = null;
604                  }
605                  else if (c == '?')
606                  {
607                     mState = IN_XML_DECLARATION;
608                  }
609                  else
610                  {
611                     mState = IN_START_TAG;
612                     tagName = null;
613                     attributes.clear();
614                     buffer.append((char) c);
615                  }
616
617                  break;
618
619
620               case IN_START_TAG:
621                  while (true)
622                  {
623
624                     if (Character.isWhitespace((char) c))
625                     {
626                        tagName = buffer.toString();
627                        pushDepth(tagName);
628                        buffer.clear();
629                        mState = IN_MIDDLE_OF_START_TAG;
630                        break;
631                     }
632                     else if (c == TAG_END)
633                     {
634                        if (null == tagName) tagName = buffer.toString();
635                        pushDepth(tagName);
636                        tagURI = getNamespaceURI(getNamespacePrefix(tagName));
637                        tagLocalName = getLocalName(tagName);
638                        mContentHandler.startElement(tagURI,
639                                                     tagLocalName,
640                                                     tagName,
641                                                     attributes);
642
643                        if (mLenientHTMLEmptyTags != null
644                            && mLenientHTMLEmptyTags.contains(tagLocalName.toLowerCase()))
645                        {
646                           mContentHandler.endElement(tagURI,
647                                 tagLocalName,
648                                 tagName);
649                           mTagStack.pop();
650                           buffer.clear();
651                           popDepth();
652                           popState();
653
654                           mState = INITIAL;
655                        }
656                        else
657                        {
658                           mState = IN_CONTENT;
659                        }
660
661                        buffer.clear();
662                        break;
663                     }
664                     else if (c == '/')
665                     {
666                        if (null == tagName) tagName = buffer.toString();
667                        pushDepth(tagName);
668                        mState = IN_EMPTY_TAG;
669                        break;
670                     }
671                     else if (c == '-'
672                              && buffer.toString().equals("!-"))
673                     {
674                        mState = IN_COMMENT;
675                        break;
676                     }
677                     else if (c == '['
678                              && buffer.toString().equals("![CDATA"))
679                     {
680                        mState = IN_CDATA;
681                        if (mLexicalHandler != null) mLexicalHandler.startCDATA();
682                        buffer.clear();
683                        break;
684                     }
685                     else if (c == 'E'
686                              && buffer.toString().equals("!DOCTYP"))
687                     {
688                        mState = IN_DOCTYPE;
689//                        buffer.clear();
690                        buffer.prepend("<");
691                        buffer.append((char) c);
692                        break;
693                     }
694                     else
695                     {
696                        buffer.append((char) c);
697                     }
698
699                     c = nextChar();
700                  }
701                  break;
702
703
704
705               case IN_MIDDLE_OF_START_TAG:
706                  while (true)
707                  {
708                     if (c == TAG_END)
709                     {
710                        tagURI = getNamespaceURI(getNamespacePrefix(tagName));
711                        tagLocalName = getLocalName(tagName);
712                        mContentHandler.startElement(tagURI,
713                                                     tagLocalName,
714                                                     tagName,
715                                                     attributes);
716
717                        if (mLenientHTMLEmptyTags != null
718                              && mLenientHTMLEmptyTags.contains(tagLocalName.toLowerCase()))
719                        {
720                           mContentHandler.endElement(tagURI,
721                                 tagLocalName,
722                                 tagName);
723                           mTagStack.pop();
724                           popDepth();
725                           popState();
726
727                           mState = INITIAL;
728                        }
729                        else
730                        {
731                           mState = IN_CONTENT;
732                        }
733
734                        buffer.clear();
735                        break;
736                     }
737                     else if (c == '/')
738                     {
739                        mState = IN_EMPTY_TAG;
740                        break;
741                     }
742                     else if (!Character.isWhitespace((char) c))
743                     {
744                        mState = IN_ATT_NAME;
745                        attName = null;
746 //                       buffer.clear();
747                        buffer.append((char) c);
748                        break;
749                     }
750
751                     c = nextChar();
752                  }
753                  break;
754
755
756
757               case IN_ATT_NAME:
758                  while (true)
759                  {
760                     if (c == '=')
761                     {
762                        attName = buffer.toString();
763                        mState = IN_ATT_VALUE;
764                        buffer.clear();
765                        break;
766                     }
767                     else
768                     {
769                        buffer.append((char) c);
770                     }
771
772                     c = nextChar();
773                  }
774                  break;
775
776
777               case IN_ATT_VALUE:
778                  if (quoteChar == -1)
779                  {
780                     if (c == '"' || c == '\'')
781                     {
782                        quoteChar = c;
783                        c = nextChar();
784                     }
785                     else if (! mLenientHTMLParsing)
786                     {
787                        throw new SAXException("Improper attribute construction. Expected a quote character!" + mLineCount);
788                     }
789                  }
790
791                  while (true)
792                  {
793                     if (c == quoteChar
794                         || (quoteChar == -1 && "> \t\r\n".contains(((char)c) + "")))
795                     {
796                        String attValue = buffer.toString();
797                        //debugMsg("attName: '" + attName + "'");
798                        if (attName.startsWith("xmlns:")
799                            || attName.equals(XMLNS))
800                        {
801                           extractNamespaceDeclaration(attName, attValue);
802                        }
803                        attributes.addAttribute("", attName, attName, "CDATA", attValue);
804
805                        buffer.clear();
806                        quoteChar = -1;
807
808                        if (c == '>')
809                        {
810                           tagURI = getNamespaceURI(getNamespacePrefix(tagName));
811                           tagLocalName = getLocalName(tagName);
812                           mContentHandler.startElement(tagURI,
813                                 tagLocalName,
814                                 tagName,
815                                 attributes);
816
817                           mState = IN_CONTENT;
818                        }
819                        else
820                        {
821                           mState = IN_MIDDLE_OF_START_TAG;
822                        }
823
824                        break;
825                     }
826                     else if (c == ENTITY_START)
827                     {
828                        pushState();
829                        mState = IN_ENTITY;
830                        entity.clear();
831                        break;
832                     }
833                     else if (" \r\n\u0009".indexOf(c) >= 0)
834                     {
835                        buffer.append(' ');
836                     }
837                     else
838                     {
839                        buffer.append((char) c);
840                     }
841
842                     c = nextChar();
843                  }
844                  break;
845
846
847               case IN_EMPTY_TAG:
848                  if (c != TAG_END)
849                  {
850                     throw new SAXException("Expected '>' for tag <" + tagName + "/>"
851                                            + " Line: " + mLineCount + " Col: " + mColumn);
852                  }
853                  tagURI = getNamespaceURI(getNamespacePrefix(tagName));
854                  tagLocalName = getLocalName(tagName);
855                  mContentHandler.startElement(tagURI,
856                                               tagLocalName,
857                                               tagName,
858                                               attributes);
859                  mContentHandler.endElement(tagURI,
860                                             tagLocalName,
861                                             tagName);
862                  mTagStack.pop();
863                  buffer.clear();
864                  popDepth();
865                  popState();
866                  break;
867
868
869               case IN_END_TAG:
870                  // XXXXXXXXXXXXX The ending tag better be the one currently open.
871                  // (Otherwise it's an error.) Try to take advantage of this precognition.
872                  //String openTag = (String) mTagStack.pop();
873                  skipEndTag(c);
874                  tagName = (String) mTagStack.pop();
875                  mContentHandler.endElement(tagURI,
876                                             getLocalName(tagName),
877                                             tagName);
878                  buffer.clear();
879                  popDepth();
880                  popState();
881
882/*
883                  while (true)
884                  {
885                     // We are in an end tag
886                     if (c == TAG_END)
887                     {
888                        tagName = buffer.toString();
889                        if (mDepth == 0)
890                        {
891                           throw new SAXException("Closing tag '</" + tagName + ">' "
892                                                  + "had no opening tag!"
893                                                  + " Line: " + mLineCount + " Col: " + mColumn);
894                        }
895                        mContentHandler.endElement(tagURI,
896                                                   tagLocalName,
897                                                   tagName);
898                        buffer.clear();
899                        popDepth(tagName);
900                        popState();
901                        break;
902                     }
903                     else
904                     {
905                        buffer.append((char) c);
906                     }
907
908                     c = nextChar();
909                  }
910*/
911                  break;
912
913
914               case IN_ENTITY:
915                  boolean recoverFromInvalidEntity = false;
916
917                  while (true)
918                  {
919                     if (c == ';')
920                     {
921                        String expandedEntity = expandEntity(entity.toString());
922                        buffer.append((expandedEntity != null ? expandedEntity : "&" + entity.toString() + ";"));
923                        entity.clear();
924                        popState();
925                        break;
926                     }
927                     else if (Character.isWhitespace((char) c))
928                     {
929                        if (mStrict)
930                        {
931                           throw new SAXException("Character entity contains whitespace !?"
932                                 + " Line: " + mLineCount + " Col: " + mColumn);
933                        }
934                        else
935                        {
936                           recoverFromInvalidEntity = true;
937                           break;
938                        }
939                     }
940                     else if (c == '"' || c == '\'' || c == '>')
941                     {
942                        if (mStrict)
943                        {
944                           throw new SAXException("Character entity contains illegal character !?"
945                                 + " Line: " + mLineCount + " Col: " + mColumn);
946                        }
947                        else
948                        {
949                           recoverFromInvalidEntity = true;
950                           break;
951                        }
952                     }
953                     else if (entity.length() >= MAX_ENTITY_LENGTH)
954                     {
955                        // Either the entity was malformed or the ampersand should have been escaped.
956                        if (mStrict)
957                        {
958                           throw new SAXException("Max entity length (" + MAX_ENTITY_LENGTH + ") exceeded!?"
959                                 + " Line: " + mLineCount + " Col: " + mColumn);
960                        }
961                        else
962                        {
963                           recoverFromInvalidEntity = true;
964                           break;
965                        }
966                     }
967                     else
968                     {
969                        entity.append((char) c);
970                     }
971
972                     c = nextChar();
973                  }
974
975                  if (recoverFromInvalidEntity)
976                  {
977                     entity.append((char) c);
978                     buffer.append("&" + entity.toString());
979                     entity.clear();
980                     popState();
981
982                     if (IN_ATT_VALUE == mState)
983                     {
984                        if (c == quoteChar)
985                        {
986                           String attValue = buffer.toString();
987                           //debugMsg("attName: '" + attName + "'");
988                           if (attName.startsWith("xmlns:")
989                                 || attName.equals(XMLNS))
990                           {
991                              extractNamespaceDeclaration(attName, attValue);
992                           }
993                           attributes.addAttribute("", attName, attName, "CDATA", attValue);
994
995                           buffer.clear();
996                           quoteChar = -1;
997                           mState = IN_MIDDLE_OF_START_TAG;
998                           break;
999                        }
1000                     }
1001                     else if (IN_CONTENT == mState)
1002                     {
1003                        if (c == TAG_START)
1004                        {
1005                           pushState();
1006                           mState = IN_START_OF_TAG;
1007                           if (buffer.length() > 0)
1008                           {
1009                              if (buffer.hasNonwhitespaceContent())
1010                              {
1011                                 mContentHandler.characters(buffer.getCharArray(), 0, buffer.length());
1012                              }
1013                              buffer.clear();
1014                           }
1015                        }
1016                     }
1017                  }
1018
1019                  break;
1020
1021
1022               case IN_CDATA:
1023                  while (true)
1024                  {
1025                     if (c == TAG_END
1026                           && buffer.toString().endsWith("]]"))
1027                     {
1028                        buffer.setLength(buffer.length() - 2);
1029                        mContentHandler.characters(buffer.getCharArray(), 0, buffer.length());
1030                        if (mLexicalHandler != null) mLexicalHandler.endCDATA();
1031                        popState();
1032                        buffer.clear();
1033                        break;
1034                     }
1035                     else
1036                     {
1037                        buffer.append((char) c);
1038                     }
1039
1040                     c = nextChar();
1041                  }
1042                  break;
1043
1044
1045
1046               case IN_COMMENT:
1047                  while (true)
1048                  {
1049                     // Inside a comment: <!-- -->
1050                     if (c == TAG_END
1051                           && buffer.toString().endsWith("--"))
1052                     {
1053                        if (mLexicalHandler != null) mLexicalHandler.comment(buffer.getCharArray(), 2, buffer.length() - 4);
1054                        popState();
1055                        buffer.clear();
1056                        break;
1057                     }
1058                     else
1059                     {
1060                        buffer.append((char) c);
1061                     }
1062
1063                     c = nextChar();
1064                  }
1065                  break;
1066
1067
1068               case IN_XML_DECLARATION:
1069                  while (true)
1070                  {
1071                     buffer.append((char) c);
1072                     if (c == TAG_END)
1073                     {
1074                        popState();
1075                        if (mState == DONE) mState = INITIAL;
1076                        buffer.clear();
1077                        break;
1078                     }
1079
1080                     c = nextChar();
1081                  }
1082                  break;
1083
1084               case IN_DOCTYPE:
1085                  while (true)
1086                  {
1087                     buffer.append((char) c);
1088                     if (c == TAG_END)
1089                     {
1090                        popState();
1091                        if (mState == DONE) mState = INITIAL;
1092                        mDoctype = Doctype.valueOf(buffer.toString());
1093                        buffer.clear();
1094                        break;
1095                     }
1096
1097                     c = nextChar();
1098                  }
1099                  break;
1100            }
1101         }
1102
1103      }
1104      catch (EOFException e)
1105      {
1106         // Ignore.
1107      }
1108
1109      if (mDepth != 0)
1110      {
1111         throw new SAXException("XML Document is not properly ended! "
1112                                + "Remaining depth= " + mDepth);
1113      }
1114      else if (mState != INITIAL
1115               && mState != DONE)
1116      {
1117         throw new SAXException("Parser ended in bad state (" + mState + ")!");
1118      }
1119      else
1120      {
1121         mContentHandler.endDocument();
1122      }
1123
1124   }
1125
1126   //###########################################################################
1127   // PRIVATE METHODS
1128   //###########################################################################
1129
1130
1131   //---------------------------------------------------------------------------
1132   private void debugMsg(String inMsg)
1133   {
1134      if (debug)
1135      {
1136         System.err.println(inMsg);
1137      }
1138   }
1139
1140   //---------------------------------------------------------------------------
1141   private String expandEntity(String inEntity)
1142   throws SAXException
1143   {
1144      String expandedEntity = null;
1145
1146      if (mExpandEntities)
1147      {
1148         if (inEntity.startsWith("#"))
1149         {
1150            if (inEntity.charAt(1) == 'x')
1151            {
1152               // Hex
1153               expandedEntity = "" + (char) Integer.parseInt(inEntity.substring(2), 16);
1154            }
1155            else
1156            {
1157               // Decimal
1158               expandedEntity = "" + (char) Integer.parseInt(inEntity.substring(1));
1159            }
1160         }
1161         else
1162         {
1163            // Try to resolve it with our battery of standard entity classes.
1164
1165            expandedEntity = SpecialCharacterEntities.resolveEntity(inEntity);
1166
1167            if (null == expandedEntity)
1168            {
1169               expandedEntity = Latin1Entities.getInstance().getNumericEntity(inEntity);
1170            }
1171
1172            if (null == expandedEntity)
1173            {
1174               expandedEntity = SymbolEntities.getInstance().getNumericEntity(inEntity);
1175            }
1176
1177
1178            if (expandedEntity != null
1179                  && expandedEntity.startsWith("#"))
1180            {
1181               expandedEntity = "" + (char) Integer.parseInt(expandedEntity.substring(1));
1182            }
1183         }
1184
1185         // Have we resolved it yet? If not, use the custom entity resolver if one
1186         // has been specified.
1187         if (null == expandedEntity)
1188         {
1189            if (mEntityResolver != null)
1190            {
1191               InputStream inStream = null;
1192
1193               try
1194               {
1195                  InputSource expandedEntitySource = mEntityResolver.resolveEntity(null, inEntity);
1196                  if (expandedEntitySource != null)
1197                  {
1198                     inStream = expandedEntitySource.getByteStream();
1199                     StringBuffer entityBuffer = new StringBuffer();
1200
1201                     int c = 0;
1202                     while ((c = inStream.read()) != -1)
1203                     {
1204                        entityBuffer.append((char) c);
1205                     }
1206
1207                     expandedEntity = entityBuffer.toString();
1208                  }
1209                  else
1210                  {
1211                     throw new SAXException("'" + inEntity + "' is not a recognized entity.");
1212                  }
1213               }
1214               catch (IOException e)
1215               {
1216                  throw new SAXException(e);
1217               }
1218               finally
1219               {
1220                  if (inStream != null)
1221                  {
1222                     try
1223                     {
1224                        inStream.close();
1225                     }
1226                     catch (IOException e)
1227                     {
1228                     }
1229                  }
1230               }
1231            }
1232         }
1233      }
1234
1235      return expandedEntity;
1236   }
1237
1238   //---------------------------------------------------------------------------
1239   private Attributes generateAttributeList(HashMap inAttributes)
1240   {
1241      // NOTE: WE ARE NOT CURRENTLY SUPPORTING ATTRIBUTE TYPES XXXXXXXXXXXXXX
1242      // The attribute type is one of the strings "CDATA", "ID", "IDREF", "IDREFS",
1243      // "NMTOKEN", "NMTOKENS", "ENTITY", "ENTITIES", or "NOTATION" (always in upper case).
1244      //
1245      // If the parser has not read a declaration for the attribute, or if the parser
1246      // does not report attribute types, then it must return the value "CDATA" as
1247      // stated in the XML 1.0 Recommentation (clause 3.3.3, "Attribute-Value Normalization").
1248      //
1249      // For an enumerated attribute that is not a notation, the parser will report
1250      // the type as "NMTOKEN".
1251      String attType = "CDATA";
1252
1253      AttributesImpl attList = new AttributesImpl();
1254
1255      if (inAttributes != null)
1256      {
1257         Iterator iter = inAttributes.entrySet().iterator();
1258         while (iter.hasNext())
1259         {
1260            Map.Entry att = (Map.Entry) iter.next();
1261            attList.addAttribute("", (String) att.getKey(), (String) att.getKey(), attType, (String) att.getValue());
1262         }
1263      }
1264
1265      return attList;
1266   }
1267
1268   //---------------------------------------------------------------------------
1269   private void initNamespaceStructures()
1270   {
1271      mNamespaceMap = new HashMap();
1272      mDefaultNamespaceStack = null;
1273   }
1274
1275   //---------------------------------------------------------------------------
1276   private void pushState()
1277   {
1278      mStateStack.push(new Integer(mState));
1279      //debugMsg(mState + " pushed onto the StateStack");
1280   }
1281
1282   //---------------------------------------------------------------------------
1283   private void popState()
1284   {
1285      mState =  ((Integer) mStateStack.pop()).intValue();
1286      //debugMsg(mState + " popped off the StateStack");
1287   }
1288
1289   //---------------------------------------------------------------------------
1290   private void pushDepth(String inTagName)
1291   {
1292      mDepth++;
1293      //debugMsg("Depth pushed to " + mDepth);
1294
1295      mTagStack.push(inTagName);
1296   }
1297
1298   //---------------------------------------------------------------------------
1299   private void popDepth()
1300   throws SAXException
1301   {
1302      //debugMsg("Depth popped to " + (mDepth - 1));
1303      if (mNamespaceMap.size() > 0)
1304      {
1305         // Can't use an iterator because we might delete the map entry and that would
1306         // cause a ConcurrentModificaitonException.
1307         Object[] prefixes = mNamespaceMap.keySet().toArray();
1308         for (int i = 0; i < prefixes.length; i++)
1309         {
1310            String prefix = (String) prefixes[i];
1311            Stack namespaceStack = (Stack) mNamespaceMap.get(prefix);
1312            // There must be at least one value on the stack or we should have deleted
1313            // the map entry for the prefix.
1314            Namespace namespace = (Namespace) namespaceStack.peek();
1315            if (namespace.getTagLevel() >= mDepth)
1316            {
1317               namespaceStack.pop();
1318               if (namespaceStack.size() == 0)
1319               {
1320                  mNamespaceMap.remove(prefix);
1321               }
1322            }
1323         }
1324      }
1325
1326      // Adjust the default namespace stack if necessary.
1327      if (mDefaultNamespaceStack != null
1328          && mDefaultNamespaceStack.size() > 0)
1329      {
1330         Namespace namespace = (Namespace) mDefaultNamespaceStack.peek();
1331         if (namespace.getTagLevel() >= mDepth)
1332         {
1333            mDefaultNamespaceStack.pop();
1334         }
1335      }
1336
1337
1338
1339      mDepth--;
1340
1341/*      String openTag = (String) mTagStack.pop();
1342      if (!inTagName.equals(openTag))
1343      {
1344         throw new SAXException("Tag mismatch! Tag '" + openTag + "' was open "
1345                                + "but the closing tag was '" + inTagName + "'!");
1346      }
1347*/
1348      if (mDepth == 0) popState();
1349   }
1350
1351   //---------------------------------------------------------------------------
1352   private void extractNamespaceDeclaration(String inName, String inURI)
1353   {
1354      int index = inName.indexOf(":");
1355      if (index > 0)
1356      {
1357         String prefix = inName.substring(index + 1);
1358         //debugMsg("Adding namespace '" + prefix + "': " + inURI);
1359         Stack namespaceStack = (Stack) mNamespaceMap.get(prefix);
1360         if (null == namespaceStack)
1361         {
1362             namespaceStack = new Stack();
1363             mNamespaceMap.put(prefix, namespaceStack);
1364         }
1365         namespaceStack.push(new Namespace(inURI, mDepth));
1366
1367         XMLNamespace.getNamespace(prefix, inURI);  // Register the namespace
1368      }
1369      else
1370      {
1371         //debugMsg("Setting default namespace: " + inURI);
1372         if (null == mDefaultNamespaceStack)
1373         {
1374            mDefaultNamespaceStack = new Stack();
1375         }
1376         mDefaultNamespaceStack.push(new Namespace(inURI, mDepth));
1377
1378         XMLNamespace.getNamespace(inURI);  // Register the namespace
1379      }
1380
1381   }
1382
1383   //---------------------------------------------------------------------------
1384   private String getNamespaceURI(String inPrefix)
1385   {
1386      String uri = null;
1387
1388      if (inPrefix != null)
1389      {
1390         Stack namespaceStack = (Stack) mNamespaceMap.get(inPrefix);
1391         if (namespaceStack != null)
1392         {
1393            uri = ((Namespace) namespaceStack.peek()).getURI();
1394         }
1395      }
1396
1397      if (null == uri
1398          && mDefaultNamespaceStack != null
1399          && mDefaultNamespaceStack.size() > 0)
1400      {
1401         uri = ((Namespace)mDefaultNamespaceStack.peek()).getURI();
1402      }
1403
1404      if (null == uri) uri = "";
1405
1406      return uri;
1407   }
1408
1409   //---------------------------------------------------------------------------
1410   private String getNamespacePrefix(String inValue)
1411   {
1412      String returnValue = null;
1413      int i = inValue.indexOf(":");
1414      if (i > 0)
1415      {
1416         returnValue = inValue.substring(0, i);
1417      }
1418
1419      return returnValue;
1420   }
1421
1422   //---------------------------------------------------------------------------
1423   private String getLocalName(String inValue)
1424   {
1425      String returnValue = inValue;
1426      int i = inValue.indexOf(":");
1427      if (i > 0)
1428      {
1429         returnValue = inValue.substring(i + 1);
1430      }
1431
1432      return returnValue;
1433   }
1434
1435   //###########################################################################
1436   // INNER CLASSES
1437   //###########################################################################
1438
1439
1440   private class EOFException extends RuntimeException
1441   {
1442   }
1443
1444   private class Namespace
1445   {
1446      private String mURI;
1447      private int    mTagLevel;
1448
1449      //------------------------------------------------------------------------
1450      public Namespace(String inURI, int inTagLevel)
1451      {
1452         mURI = inURI;
1453         mTagLevel = inTagLevel;
1454      }
1455
1456      //------------------------------------------------------------------------
1457      public String getURI()
1458      {
1459         return mURI;
1460      }
1461
1462      //------------------------------------------------------------------------
1463      public int getTagLevel()
1464      {
1465         return mTagLevel;
1466      }
1467   }
1468
1469}