001package com.hfg.xml;
002
003import java.io.BufferedInputStream;
004import java.io.BufferedReader;
005import java.io.BufferedWriter;
006import java.io.ByteArrayOutputStream;
007import java.io.File;
008import java.io.FileInputStream;
009import java.io.FileOutputStream;
010import java.io.IOException;
011import java.io.InputStreamReader;
012import java.io.OutputStream;
013import java.io.OutputStreamWriter;
014import java.io.Writer;
015import java.nio.charset.Charset;
016import java.util.List;
017import java.util.regex.Matcher;
018import java.util.regex.Pattern;
019
020import com.hfg.exception.ProgrammingException;
021import com.hfg.util.StringUtil;
022import com.hfg.xml.parser.XMLTagReader;
023
024//------------------------------------------------------------------------------
025/**
026 Abstract base XML / HTML document.
027
028 @author J. Alex Taylor, hairyfatguy.com
029 */
030//------------------------------------------------------------------------------
031// com.hfg XML/HTML Coding Library
032//
033// This library is free software; you can redistribute it and/or
034// modify it under the terms of the GNU Lesser General Public
035// License as published by the Free Software Foundation; either
036// version 2.1 of the License, or (at your option) any later version.
037//
038// This library is distributed in the hope that it will be useful,
039// but WITHOUT ANY WARRANTY; without even the implied warranty of
040// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
041// Lesser General Public License for more details.
042//
043// You should have received a copy of the GNU Lesser General Public
044// License along with this library; if not, write to the Free Software
045// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
046//
047// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
048// jataylor@hairyfatguy.com
049//------------------------------------------------------------------------------
050
051public abstract class XMLBasedDoc<T extends XMLNode>
052{
053   private XMLNode mRootNode;
054   private String  mName;
055   private XMLSpec mSpec       = DEFAULT_XMLSPEC;
056   private Charset mEncoding   = DEFAULT_CHARSET;
057   private boolean mByteOrderMarkPresent;
058   private boolean mStandalone = true;
059   private Doctype mDoctype;
060   private List<XMLComment> mTopLevelComments;
061   // Was the document constructed from objects or read in from a pre-existing file?
062   private boolean mConstructedContent = true;
063
064   private static final XMLSpec DEFAULT_XMLSPEC = XMLSpec.v1_0;
065   private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8");
066
067   private static final Pattern XML_HEADER_PATTERN = Pattern.compile("^\\s*<\\?xml .+encoding=[\\'\"](\\S+)[\\'\"]", Pattern.CASE_INSENSITIVE);
068
069   protected static final String  NL = System.getProperty("line.separator");
070
071   //###########################################################################
072   // CONSTRUCTORS
073   //###########################################################################
074
075   //---------------------------------------------------------------------------
076   public XMLBasedDoc()
077   {
078
079   }
080
081   //---------------------------------------------------------------------------
082   public XMLBasedDoc(XMLNode inRootNode)
083   {
084      setRootNode(inRootNode);
085   }
086
087   //---------------------------------------------------------------------------
088   /**
089    The preferred way to read XML from a file.
090    @param inFile The XML file to read.
091    */
092   public XMLBasedDoc(File inFile)
093      throws XMLException, IOException
094   {
095      if (inFile != null
096          && inFile.exists())
097      {
098         try
099         {
100            mEncoding = determineEncoding(inFile);
101
102            setName(inFile.getName());
103
104            // Now read in the XML.
105            BufferedReader reader = null;
106            try
107            {
108               reader = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), mEncoding));
109
110               if (mByteOrderMarkPresent)
111               {
112                  reader.read(); // Skip the byte order mark
113               }
114
115//               mRootNode = new XMLTag(reader);
116               XMLTagReader tagReader = getTagReader();
117               tagReader.parse(reader);
118               setRootNode(tagReader.getRootNode());
119               setDoctype(tagReader.getDoctype());
120
121               mConstructedContent = false;
122            }
123            finally
124            {
125               if (reader != null) reader.close();
126            }
127         }
128         catch (XMLException e)
129         {
130            throw new XMLException("The file " + StringUtil.singleQuote(inFile.getPath()) + " doesn't appear to be in proper XML format!", e);
131         }
132         catch (IOException e)
133         {
134            throw new IOException("Problem encountered while reading file " + StringUtil.singleQuote(inFile.getPath()) + "!", e);
135         }
136      }
137   }
138
139   //---------------------------------------------------------------------------
140   public XMLBasedDoc(BufferedReader inReader)
141   {
142      try
143      {
144         mEncoding = determineEncoding(inReader);
145
146         // Now read in the XML.
147         try
148         {
149            XMLTagReader tagReader = getTagReader();
150            tagReader.parse(inReader);
151            setRootNode(tagReader.getRootNode());
152            setDoctype(tagReader.getDoctype());
153            mTopLevelComments = tagReader.getTopLevelComments();
154
155            mConstructedContent = false;
156         }
157         finally
158         {
159            if (inReader != null)
160            {
161               inReader.close();
162            }
163         }
164      }
165      catch (IOException e)
166      {
167         throw new XMLException(e);
168      }
169   }
170
171   //---------------------------------------------------------------------------
172   /**
173    The preferred way to read XML from a stream.
174    */
175   public XMLBasedDoc(BufferedInputStream inStream)
176   {
177      this(new BufferedReader(new InputStreamReader(inStream)));
178   }
179
180   //###########################################################################
181   // PUBLIC METHODS
182   //###########################################################################
183
184
185   //---------------------------------------------------------------------------
186   public XMLBasedDoc setName(String inValue)
187   {
188      mName = inValue;
189      return this;
190   }
191
192
193   //---------------------------------------------------------------------------
194   public String name()
195   {
196      return mName;
197   }
198
199   //---------------------------------------------------------------------------
200   @Override
201   public XMLBasedDoc clone()
202   {
203      XMLBasedDoc cloneObj;
204      try
205      {
206         cloneObj = (XMLBasedDoc) super.clone();
207      }
208      catch (CloneNotSupportedException e)
209      {
210         throw new ProgrammingException(e);
211      }
212
213      if (mRootNode != null)
214      {
215         cloneObj.mRootNode = (XMLNode) mRootNode.clone();
216      }
217
218      return cloneObj;
219   }
220
221   //---------------------------------------------------------------------------
222   public XMLBasedDoc setSpec(XMLSpec inValue)
223   {
224      mSpec = inValue;
225      return this;
226   }
227
228   //---------------------------------------------------------------------------
229   public XMLBasedDoc setEncoding(Charset inValue)
230   {
231      mEncoding = inValue;
232      return this;
233   }
234
235   //---------------------------------------------------------------------------
236   public Charset getEncoding()
237   {
238      return mEncoding;
239   }
240
241   //---------------------------------------------------------------------------
242   public XMLBasedDoc setIsStandalone(boolean inValue)
243   {
244      mStandalone = inValue;
245      return this;
246   }
247
248   //--------------------------------------------------------------------------
249   public XMLBasedDoc setDoctype(Doctype inValue)
250   {
251      mDoctype = inValue;
252      return this;
253   }
254
255   //--------------------------------------------------------------------------
256   public Doctype getDoctype()
257   {
258      return mDoctype;
259   }
260
261   //---------------------------------------------------------------------------
262   public List<XMLComment> getTopLevelComments()
263   {
264      return mTopLevelComments;
265   }
266
267   //---------------------------------------------------------------------------
268   public void setRootNode(XMLNode inRootNode)
269   {
270      mRootNode = inRootNode;
271   }
272
273   //---------------------------------------------------------------------------
274   public XMLNode getRootNode()
275   {
276      return mRootNode;
277   }
278
279   //---------------------------------------------------------------------------
280   /**
281    @return The XML String encoded in the scheme specified for the XMLDoc.
282    */
283   public String toXML()
284   {
285      ByteArrayOutputStream outStream;
286      try
287      {
288         outStream = new ByteArrayOutputStream();
289         BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(outStream, mEncoding));
290         toXML(bufferedWriter);
291         bufferedWriter.close();
292      }
293      catch (IOException e)
294      {
295         throw new XMLException(e);
296      }
297
298      return outStream.toString();
299   }
300
301   //---------------------------------------------------------------------------
302   /**
303    The preferred way to save XML to a file. Properly encodes the XML based on
304    the specified Charset.
305
306    @param inFile the File to which the XML should be written
307    */
308   public void toXML(File inFile)
309   {
310      checkPermissions(inFile);
311
312      try
313      {
314         Writer writer = null;
315         try
316         {
317            writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(inFile), mEncoding));
318            toXML(writer);
319         }
320         finally
321         {
322            if (writer != null) writer.close();
323         }
324      }
325      catch (IOException e)
326      {
327         throw new XMLException(e);
328      }
329   }
330
331   //---------------------------------------------------------------------------
332   public void toXML(OutputStream inStream)
333   {
334      Writer writer = new OutputStreamWriter(inStream, mEncoding);
335
336      toXML(writer);
337   }
338
339   //---------------------------------------------------------------------------
340   /**
341    Writes the XML document to the specified Writer. Note that the caller
342    needs to have specified the Charset for the Writer if other than the default encoding is desired.
343    */
344   public void toXML(Writer inWriter)
345   {
346      prepareForOutput();
347
348      BufferedWriter bufferedWriter;
349      try
350      {
351         if (inWriter instanceof BufferedWriter)
352         {
353            bufferedWriter = (BufferedWriter) inWriter;
354         }
355         else
356         {
357            bufferedWriter = new BufferedWriter(inWriter);
358         }
359
360         bufferedWriter.write(getHeader());
361         if (mRootNode != null) mRootNode.toXML(bufferedWriter);
362         bufferedWriter.flush();
363      }
364      catch (IOException e)
365      {
366         throw new RuntimeException(e);
367      }
368   }
369
370   //---------------------------------------------------------------------------
371   public String toIndentedXML(int inInitialIndentLevel, int inIndentSize)
372   {
373      ByteArrayOutputStream outStream;
374      try
375      {
376         outStream = new ByteArrayOutputStream();
377         toIndentedXML(outStream, inInitialIndentLevel, inIndentSize);
378         outStream.close();
379      }
380      catch (Exception e)
381      {
382         throw new XMLException(e);
383      }
384
385      return outStream.toString();
386    }
387
388   //---------------------------------------------------------------------------
389   /**
390    Writes an indented form of the XML document to the specified OutputStream.
391    Flushes but does not close the specified OutputStream after writing.
392
393    @param inStream   The OutputStream to which the XML will be written
394    @param inInitialIndentLevel The size of the initial indent
395    @param inIndentSize The number of spaces incremented for ea. indent level
396    */
397   public void toIndentedXML(OutputStream inStream, int inInitialIndentLevel, int inIndentSize)
398   {
399      OutputStreamWriter writer;
400      try
401      {
402         writer = new OutputStreamWriter(inStream, mEncoding);
403
404         writer.write(getHeader());
405         if (mRootNode != null) mRootNode.toIndentedXML(writer, inInitialIndentLevel, inIndentSize);
406         writer.flush();
407      }
408      catch (IOException e)
409      {
410         throw new XMLException(e);
411      }
412   }
413
414   //---------------------------------------------------------------------------
415   /**
416    Writes an indented form of the XML document to the specified Writer. Note that the caller
417    needs to have specified the Charset for the Writer if other than the default encoding is desired.
418    */
419   public void toIndentedXML(Writer inWriter, int inInitialIndentLevel, int inIndentSize)
420   {
421      prepareForOutput();
422
423      BufferedWriter bufferedWriter;
424      try
425      {
426         if (inWriter instanceof BufferedWriter)
427         {
428            bufferedWriter = (BufferedWriter) inWriter;
429         }
430         else
431         {
432            bufferedWriter = new BufferedWriter(inWriter);
433         }
434
435         bufferedWriter.write(getHeader());
436         if (mRootNode != null) mRootNode.toIndentedXML(bufferedWriter, inInitialIndentLevel, inIndentSize);
437         bufferedWriter.flush();
438      }
439      catch (IOException e)
440      {
441         throw new XMLException(e);
442      }
443   }
444
445
446   //---------------------------------------------------------------------------
447   /**
448    The preferred way to save XML to a file. Properly encodes the XML based on
449    the specified Charset.
450
451    @param inFile The target output file
452    @param inInitialIndentLevel The size of the initial indent
453    @param inIndentSize The number of spaces incremented for ea. indent level
454    */
455   public void toIndentedXML(File inFile, int inInitialIndentLevel, int inIndentSize)
456   {
457      checkPermissions(inFile);
458
459      try
460      {
461         Writer writer = null;
462         try
463         {
464            writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(inFile), mEncoding));
465            toIndentedXML(writer, inInitialIndentLevel, inIndentSize);
466         }
467         finally
468         {
469            if (writer != null) writer.close();
470         }
471      }
472      catch (IOException e)
473      {
474         throw new XMLException(e);
475      }
476   }
477
478   //--------------------------------------------------------------------------
479   public String getHeader()
480   {
481      StringBuilder buffer = new StringBuilder();
482      buffer.append("<?xml version=");
483      buffer.append(StringUtil.singleQuote(mSpec.getVersionString()));
484      buffer.append(" encoding=");
485      buffer.append(StringUtil.singleQuote(mEncoding.displayName()));
486      buffer.append(" standalone=");
487      buffer.append(StringUtil.singleQuote(mStandalone ? "yes" : "no"));
488      buffer.append(" ?>");
489      buffer.append(NL);
490
491      if (getDoctype() != null)
492      {
493         buffer.append(getDoctype().toString());
494         buffer.append(NL);
495      }
496
497      return buffer.toString();
498   }
499
500   //--------------------------------------------------------------------------
501   public void replaceCharacterEntities()
502   {
503      mRootNode.replaceCharacterEntities();
504   }
505
506   //###########################################################################
507   // PROTECTED METHODS
508   //###########################################################################
509
510   //--------------------------------------------------------------------------
511   protected void  prepareForOutput()
512   {
513
514   }
515
516   //--------------------------------------------------------------------------
517   protected boolean isConstructedContent()
518   {
519      return mConstructedContent;
520   }
521
522   //--------------------------------------------------------------------------
523   protected XMLTagReader getTagReader()
524   {
525      return new XMLTagReader();
526   }
527
528   //###########################################################################
529   // PRIVATE METHODS
530   //###########################################################################
531
532   //---------------------------------------------------------------------------
533   // See:  http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
534   private Charset determineEncoding(File inFile)
535         throws IOException
536   {
537      Charset encoding = DEFAULT_CHARSET;
538
539      BufferedInputStream stream = null;
540      try
541      {
542         // Determine the XML file's encoding scheme.
543         stream = new BufferedInputStream(new FileInputStream(inFile));
544
545         encoding = determineEncoding(stream);
546      }
547      finally
548      {
549         if (stream != null) stream.close();
550      }
551
552      return encoding;
553   }
554
555   //---------------------------------------------------------------------------
556   // See:  http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
557   private Charset determineEncoding(BufferedInputStream inStream)
558         throws IOException
559   {
560      Charset encoding = null;
561      mByteOrderMarkPresent = false;
562
563      int READ_LIMIT = 1024;
564      int bytesRead = 0;
565
566      try
567      {
568         inStream.mark(READ_LIMIT);
569
570         // Determine the XML file's encoding scheme.
571         byte[] byteBuffer = new byte[READ_LIMIT];
572         if ((bytesRead = inStream.read(byteBuffer)) > 4)
573         {
574            // Is there a BOM (Byte Order Mark)?
575            String encodingString = null;
576            if ((0xff & byteBuffer[0]) == 0xFE && (0xff & byteBuffer[1]) == 0xFF)
577            {
578               encodingString = "UTF-16BE";
579               mByteOrderMarkPresent = true;
580            }
581            else if ((0xff & byteBuffer[0]) == 0xFF && (0xff & byteBuffer[1]) == 0xFE)
582            {
583               encodingString = "UTF-16LE";
584               mByteOrderMarkPresent = true;
585            }
586            else if ((0xff & byteBuffer[0]) == 0xEF && (0xff & byteBuffer[1]) == 0xBB && (0xff & byteBuffer[2]) == 0xBF)
587            {
588               encodingString = "UTF-8";
589               mByteOrderMarkPresent = true;
590            }
591
592            if (StringUtil.isSet(encodingString))
593            {
594               encoding = Charset.forName(encodingString);
595            }
596
597            if (null == encoding)
598            {
599               // See if the first line is and xml declaration line that specifies the encoding
600               StringBuilder buffer = new StringBuilder();
601               for (int i = 0; i < bytesRead; i++)
602               {
603                  int theChar = byteBuffer[i];
604                  if (theChar == '\n'
605                        || theChar == '\r')
606                  {
607                     if (buffer.toString().trim().length() > 0)
608                     {
609                        break;
610                     }
611                     else
612                     {
613                        buffer.setLength(0);
614                     }
615                  }
616
617                  buffer.append((char) theChar);
618               }
619
620               String xmlHeader = buffer.toString().trim();
621               if (StringUtil.isSet(xmlHeader))
622               {
623                  if (mByteOrderMarkPresent) xmlHeader = xmlHeader.substring(4);
624                  Matcher m = XML_HEADER_PATTERN.matcher(xmlHeader);
625                  if (m.find())
626                  {
627                     encoding = Charset.forName(m.group(1));
628                  }
629               }
630            }
631         }
632      }
633      finally
634      {
635         if (inStream != null)
636         {
637            inStream.reset();
638         }
639      }
640
641      return encoding != null ? encoding : DEFAULT_CHARSET;
642   }
643
644   //---------------------------------------------------------------------------
645   // See:  http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info
646   private Charset determineEncoding(BufferedReader inReader)
647         throws IOException
648   {
649      Charset encoding = null;
650      mByteOrderMarkPresent = false;
651
652      int READ_LIMIT = 1024;
653      int bytesRead = 0;
654      
655      try
656      {
657         inReader.mark(READ_LIMIT);
658
659         // Determine the XML file's encoding scheme.
660         char[] buffer = new char[READ_LIMIT];
661         if ((bytesRead = inReader.read(buffer)) > 4)
662         {
663            byte[] bytes = new String(buffer, 0, 4).getBytes();
664
665            // Is there a BOM (Byte Order Mark)?
666            String encodingString = null;
667            if ((0xff & bytes[0]) == 0xFE && (0xff & bytes[1]) == 0xFF)
668            {
669               encodingString = "UTF-16BE";
670               mByteOrderMarkPresent = true;
671            }
672            else if ((0xff & bytes[0]) == 0xFF && (0xff & bytes[1]) == 0xFE)
673            {
674               encodingString = "UTF-16LE";
675               mByteOrderMarkPresent = true;
676            }
677            else if ((0xff & bytes[0]) == 0xEF && (0xff & bytes[1]) == 0xBB && (0xff & bytes[2]) == 0xBF)
678            {
679               encodingString = "UTF-8";
680               mByteOrderMarkPresent = true;
681            }
682
683            if (StringUtil.isSet(encodingString))
684            {
685               encoding = Charset.forName(encodingString);
686            }
687
688            if (null == encoding)
689            {
690               // See if the first line is and xml declaration line that specifies the encoding
691               Matcher m = XML_HEADER_PATTERN.matcher(new String(buffer, 0, bytesRead).trim());
692               if (m.find())
693               {
694                  encoding = Charset.forName(m.group(1));
695               }
696            }
697         }
698      }
699      finally
700      {
701         if (inReader != null)
702         {
703            inReader.reset();
704         }
705      }
706
707      return encoding != null ? encoding : DEFAULT_CHARSET;
708   }
709
710   //--------------------------------------------------------------------------
711   private void checkPermissions(File inFile)
712   {
713      if (null == inFile)
714      {
715         throw new RuntimeException("The specified file was null!");
716      }
717      else if (inFile.exists())
718      {
719         if (! inFile.canWrite())
720         {
721            throw new RuntimeException("No write permissions for " + StringUtil.singleQuote(inFile.getAbsolutePath()) + "!");
722         }
723      }
724      else if (inFile.getParentFile() != null)
725      {
726         if (inFile.getParentFile().exists())
727         {
728            if (! inFile.getParentFile().canWrite())
729            {
730               throw new RuntimeException("No write permissions for dir " + StringUtil.singleQuote(inFile.getParentFile().getAbsolutePath()) + "!");
731            }
732         }
733         else if (! inFile.getParentFile().mkdirs())
734         {
735            throw new RuntimeException("Could not create dir " + StringUtil.singleQuote(inFile.getParentFile().getAbsolutePath()) + "!");
736         }
737      }
738   }
739}