001package com.hfg.xml; 002 003import java.io.BufferedInputStream; 004import java.io.BufferedReader; 005import java.io.BufferedWriter; 006import java.io.ByteArrayOutputStream; 007import java.io.File; 008import java.io.FileInputStream; 009import java.io.FileOutputStream; 010import java.io.IOException; 011import java.io.InputStreamReader; 012import java.io.OutputStream; 013import java.io.OutputStreamWriter; 014import java.io.Writer; 015import java.nio.charset.Charset; 016import java.util.List; 017import java.util.regex.Matcher; 018import java.util.regex.Pattern; 019 020import com.hfg.exception.ProgrammingException; 021import com.hfg.util.StringUtil; 022import com.hfg.xml.parser.XMLTagReader; 023 024//------------------------------------------------------------------------------ 025/** 026 Abstract base XML / HTML document. 027 028 @author J. Alex Taylor, hairyfatguy.com 029 */ 030//------------------------------------------------------------------------------ 031// com.hfg XML/HTML Coding Library 032// 033// This library is free software; you can redistribute it and/or 034// modify it under the terms of the GNU Lesser General Public 035// License as published by the Free Software Foundation; either 036// version 2.1 of the License, or (at your option) any later version. 037// 038// This library is distributed in the hope that it will be useful, 039// but WITHOUT ANY WARRANTY; without even the implied warranty of 040// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 041// Lesser General Public License for more details. 042// 043// You should have received a copy of the GNU Lesser General Public 044// License along with this library; if not, write to the Free Software 045// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 046// 047// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 048// jataylor@hairyfatguy.com 049//------------------------------------------------------------------------------ 050 051public abstract class XMLBasedDoc<T extends XMLNode> 052{ 053 private XMLNode mRootNode; 054 private String mName; 055 private XMLSpec mSpec = DEFAULT_XMLSPEC; 056 private Charset mEncoding = DEFAULT_CHARSET; 057 private boolean mByteOrderMarkPresent; 058 private boolean mStandalone = true; 059 private Doctype mDoctype; 060 private List<XMLComment> mTopLevelComments; 061 // Was the document constructed from objects or read in from a pre-existing file? 062 private boolean mConstructedContent = true; 063 064 private static final XMLSpec DEFAULT_XMLSPEC = XMLSpec.v1_0; 065 private static final Charset DEFAULT_CHARSET = Charset.forName("UTF-8"); 066 067 private static final Pattern XML_HEADER_PATTERN = Pattern.compile("^\\s*<\\?xml .+encoding=[\\'\"](\\S+)[\\'\"]", Pattern.CASE_INSENSITIVE); 068 069 protected static final String NL = System.getProperty("line.separator"); 070 071 //########################################################################### 072 // CONSTRUCTORS 073 //########################################################################### 074 075 //--------------------------------------------------------------------------- 076 public XMLBasedDoc() 077 { 078 079 } 080 081 //--------------------------------------------------------------------------- 082 public XMLBasedDoc(XMLNode inRootNode) 083 { 084 setRootNode(inRootNode); 085 } 086 087 //--------------------------------------------------------------------------- 088 /** 089 The preferred way to read XML from a file. 090 @param inFile The XML file to read. 091 */ 092 public XMLBasedDoc(File inFile) 093 throws XMLException, IOException 094 { 095 if (inFile != null 096 && inFile.exists()) 097 { 098 try 099 { 100 mEncoding = determineEncoding(inFile); 101 102 setName(inFile.getName()); 103 104 // Now read in the XML. 105 BufferedReader reader = null; 106 try 107 { 108 reader = new BufferedReader(new InputStreamReader(new FileInputStream(inFile), mEncoding)); 109 110 if (mByteOrderMarkPresent) 111 { 112 reader.read(); // Skip the byte order mark 113 } 114 115// mRootNode = new XMLTag(reader); 116 XMLTagReader tagReader = getTagReader(); 117 tagReader.parse(reader); 118 setRootNode(tagReader.getRootNode()); 119 setDoctype(tagReader.getDoctype()); 120 121 mConstructedContent = false; 122 } 123 finally 124 { 125 if (reader != null) reader.close(); 126 } 127 } 128 catch (XMLException e) 129 { 130 throw new XMLException("The file " + StringUtil.singleQuote(inFile.getPath()) + " doesn't appear to be in proper XML format!", e); 131 } 132 catch (IOException e) 133 { 134 throw new IOException("Problem encountered while reading file " + StringUtil.singleQuote(inFile.getPath()) + "!", e); 135 } 136 } 137 } 138 139 //--------------------------------------------------------------------------- 140 public XMLBasedDoc(BufferedReader inReader) 141 { 142 try 143 { 144 mEncoding = determineEncoding(inReader); 145 146 // Now read in the XML. 147 try 148 { 149 XMLTagReader tagReader = getTagReader(); 150 tagReader.parse(inReader); 151 setRootNode(tagReader.getRootNode()); 152 setDoctype(tagReader.getDoctype()); 153 mTopLevelComments = tagReader.getTopLevelComments(); 154 155 mConstructedContent = false; 156 } 157 finally 158 { 159 if (inReader != null) 160 { 161 inReader.close(); 162 } 163 } 164 } 165 catch (IOException e) 166 { 167 throw new XMLException(e); 168 } 169 } 170 171 //--------------------------------------------------------------------------- 172 /** 173 The preferred way to read XML from a stream. 174 */ 175 public XMLBasedDoc(BufferedInputStream inStream) 176 { 177 this(new BufferedReader(new InputStreamReader(inStream))); 178 } 179 180 //########################################################################### 181 // PUBLIC METHODS 182 //########################################################################### 183 184 185 //--------------------------------------------------------------------------- 186 public XMLBasedDoc setName(String inValue) 187 { 188 mName = inValue; 189 return this; 190 } 191 192 193 //--------------------------------------------------------------------------- 194 public String name() 195 { 196 return mName; 197 } 198 199 //--------------------------------------------------------------------------- 200 @Override 201 public XMLBasedDoc clone() 202 { 203 XMLBasedDoc cloneObj; 204 try 205 { 206 cloneObj = (XMLBasedDoc) super.clone(); 207 } 208 catch (CloneNotSupportedException e) 209 { 210 throw new ProgrammingException(e); 211 } 212 213 if (mRootNode != null) 214 { 215 cloneObj.mRootNode = (XMLNode) mRootNode.clone(); 216 } 217 218 return cloneObj; 219 } 220 221 //--------------------------------------------------------------------------- 222 public XMLBasedDoc setSpec(XMLSpec inValue) 223 { 224 mSpec = inValue; 225 return this; 226 } 227 228 //--------------------------------------------------------------------------- 229 public XMLBasedDoc setEncoding(Charset inValue) 230 { 231 mEncoding = inValue; 232 return this; 233 } 234 235 //--------------------------------------------------------------------------- 236 public Charset getEncoding() 237 { 238 return mEncoding; 239 } 240 241 //--------------------------------------------------------------------------- 242 public XMLBasedDoc setIsStandalone(boolean inValue) 243 { 244 mStandalone = inValue; 245 return this; 246 } 247 248 //-------------------------------------------------------------------------- 249 public XMLBasedDoc setDoctype(Doctype inValue) 250 { 251 mDoctype = inValue; 252 return this; 253 } 254 255 //-------------------------------------------------------------------------- 256 public Doctype getDoctype() 257 { 258 return mDoctype; 259 } 260 261 //--------------------------------------------------------------------------- 262 public List<XMLComment> getTopLevelComments() 263 { 264 return mTopLevelComments; 265 } 266 267 //--------------------------------------------------------------------------- 268 public void setRootNode(XMLNode inRootNode) 269 { 270 mRootNode = inRootNode; 271 } 272 273 //--------------------------------------------------------------------------- 274 public XMLNode getRootNode() 275 { 276 return mRootNode; 277 } 278 279 //--------------------------------------------------------------------------- 280 /** 281 @return The XML String encoded in the scheme specified for the XMLDoc. 282 */ 283 public String toXML() 284 { 285 ByteArrayOutputStream outStream; 286 try 287 { 288 outStream = new ByteArrayOutputStream(); 289 BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(outStream, mEncoding)); 290 toXML(bufferedWriter); 291 bufferedWriter.close(); 292 } 293 catch (IOException e) 294 { 295 throw new XMLException(e); 296 } 297 298 return outStream.toString(); 299 } 300 301 //--------------------------------------------------------------------------- 302 /** 303 The preferred way to save XML to a file. Properly encodes the XML based on 304 the specified Charset. 305 306 @param inFile the File to which the XML should be written 307 */ 308 public void toXML(File inFile) 309 { 310 checkPermissions(inFile); 311 312 try 313 { 314 Writer writer = null; 315 try 316 { 317 writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(inFile), mEncoding)); 318 toXML(writer); 319 } 320 finally 321 { 322 if (writer != null) writer.close(); 323 } 324 } 325 catch (IOException e) 326 { 327 throw new XMLException(e); 328 } 329 } 330 331 //--------------------------------------------------------------------------- 332 public void toXML(OutputStream inStream) 333 { 334 Writer writer = new OutputStreamWriter(inStream, mEncoding); 335 336 toXML(writer); 337 } 338 339 //--------------------------------------------------------------------------- 340 /** 341 Writes the XML document to the specified Writer. Note that the caller 342 needs to have specified the Charset for the Writer if other than the default encoding is desired. 343 */ 344 public void toXML(Writer inWriter) 345 { 346 prepareForOutput(); 347 348 BufferedWriter bufferedWriter; 349 try 350 { 351 if (inWriter instanceof BufferedWriter) 352 { 353 bufferedWriter = (BufferedWriter) inWriter; 354 } 355 else 356 { 357 bufferedWriter = new BufferedWriter(inWriter); 358 } 359 360 bufferedWriter.write(getHeader()); 361 if (mRootNode != null) mRootNode.toXML(bufferedWriter); 362 bufferedWriter.flush(); 363 } 364 catch (IOException e) 365 { 366 throw new RuntimeException(e); 367 } 368 } 369 370 //--------------------------------------------------------------------------- 371 public String toIndentedXML(int inInitialIndentLevel, int inIndentSize) 372 { 373 ByteArrayOutputStream outStream; 374 try 375 { 376 outStream = new ByteArrayOutputStream(); 377 toIndentedXML(outStream, inInitialIndentLevel, inIndentSize); 378 outStream.close(); 379 } 380 catch (Exception e) 381 { 382 throw new XMLException(e); 383 } 384 385 return outStream.toString(); 386 } 387 388 //--------------------------------------------------------------------------- 389 /** 390 Writes an indented form of the XML document to the specified OutputStream. 391 Flushes but does not close the specified OutputStream after writing. 392 393 @param inStream The OutputStream to which the XML will be written 394 @param inInitialIndentLevel The size of the initial indent 395 @param inIndentSize The number of spaces incremented for ea. indent level 396 */ 397 public void toIndentedXML(OutputStream inStream, int inInitialIndentLevel, int inIndentSize) 398 { 399 OutputStreamWriter writer; 400 try 401 { 402 writer = new OutputStreamWriter(inStream, mEncoding); 403 404 writer.write(getHeader()); 405 if (mRootNode != null) mRootNode.toIndentedXML(writer, inInitialIndentLevel, inIndentSize); 406 writer.flush(); 407 } 408 catch (IOException e) 409 { 410 throw new XMLException(e); 411 } 412 } 413 414 //--------------------------------------------------------------------------- 415 /** 416 Writes an indented form of the XML document to the specified Writer. Note that the caller 417 needs to have specified the Charset for the Writer if other than the default encoding is desired. 418 */ 419 public void toIndentedXML(Writer inWriter, int inInitialIndentLevel, int inIndentSize) 420 { 421 prepareForOutput(); 422 423 BufferedWriter bufferedWriter; 424 try 425 { 426 if (inWriter instanceof BufferedWriter) 427 { 428 bufferedWriter = (BufferedWriter) inWriter; 429 } 430 else 431 { 432 bufferedWriter = new BufferedWriter(inWriter); 433 } 434 435 bufferedWriter.write(getHeader()); 436 if (mRootNode != null) mRootNode.toIndentedXML(bufferedWriter, inInitialIndentLevel, inIndentSize); 437 bufferedWriter.flush(); 438 } 439 catch (IOException e) 440 { 441 throw new XMLException(e); 442 } 443 } 444 445 446 //--------------------------------------------------------------------------- 447 /** 448 The preferred way to save XML to a file. Properly encodes the XML based on 449 the specified Charset. 450 451 @param inFile The target output file 452 @param inInitialIndentLevel The size of the initial indent 453 @param inIndentSize The number of spaces incremented for ea. indent level 454 */ 455 public void toIndentedXML(File inFile, int inInitialIndentLevel, int inIndentSize) 456 { 457 checkPermissions(inFile); 458 459 try 460 { 461 Writer writer = null; 462 try 463 { 464 writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(inFile), mEncoding)); 465 toIndentedXML(writer, inInitialIndentLevel, inIndentSize); 466 } 467 finally 468 { 469 if (writer != null) writer.close(); 470 } 471 } 472 catch (IOException e) 473 { 474 throw new XMLException(e); 475 } 476 } 477 478 //-------------------------------------------------------------------------- 479 public String getHeader() 480 { 481 StringBuilder buffer = new StringBuilder(); 482 buffer.append("<?xml version="); 483 buffer.append(StringUtil.singleQuote(mSpec.getVersionString())); 484 buffer.append(" encoding="); 485 buffer.append(StringUtil.singleQuote(mEncoding.displayName())); 486 buffer.append(" standalone="); 487 buffer.append(StringUtil.singleQuote(mStandalone ? "yes" : "no")); 488 buffer.append(" ?>"); 489 buffer.append(NL); 490 491 if (getDoctype() != null) 492 { 493 buffer.append(getDoctype().toString()); 494 buffer.append(NL); 495 } 496 497 return buffer.toString(); 498 } 499 500 //-------------------------------------------------------------------------- 501 public void replaceCharacterEntities() 502 { 503 mRootNode.replaceCharacterEntities(); 504 } 505 506 //########################################################################### 507 // PROTECTED METHODS 508 //########################################################################### 509 510 //-------------------------------------------------------------------------- 511 protected void prepareForOutput() 512 { 513 514 } 515 516 //-------------------------------------------------------------------------- 517 protected boolean isConstructedContent() 518 { 519 return mConstructedContent; 520 } 521 522 //-------------------------------------------------------------------------- 523 protected XMLTagReader getTagReader() 524 { 525 return new XMLTagReader(); 526 } 527 528 //########################################################################### 529 // PRIVATE METHODS 530 //########################################################################### 531 532 //--------------------------------------------------------------------------- 533 // See: http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info 534 private Charset determineEncoding(File inFile) 535 throws IOException 536 { 537 Charset encoding = DEFAULT_CHARSET; 538 539 BufferedInputStream stream = null; 540 try 541 { 542 // Determine the XML file's encoding scheme. 543 stream = new BufferedInputStream(new FileInputStream(inFile)); 544 545 encoding = determineEncoding(stream); 546 } 547 finally 548 { 549 if (stream != null) stream.close(); 550 } 551 552 return encoding; 553 } 554 555 //--------------------------------------------------------------------------- 556 // See: http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info 557 private Charset determineEncoding(BufferedInputStream inStream) 558 throws IOException 559 { 560 Charset encoding = null; 561 mByteOrderMarkPresent = false; 562 563 int READ_LIMIT = 1024; 564 int bytesRead = 0; 565 566 try 567 { 568 inStream.mark(READ_LIMIT); 569 570 // Determine the XML file's encoding scheme. 571 byte[] byteBuffer = new byte[READ_LIMIT]; 572 if ((bytesRead = inStream.read(byteBuffer)) > 4) 573 { 574 // Is there a BOM (Byte Order Mark)? 575 String encodingString = null; 576 if ((0xff & byteBuffer[0]) == 0xFE && (0xff & byteBuffer[1]) == 0xFF) 577 { 578 encodingString = "UTF-16BE"; 579 mByteOrderMarkPresent = true; 580 } 581 else if ((0xff & byteBuffer[0]) == 0xFF && (0xff & byteBuffer[1]) == 0xFE) 582 { 583 encodingString = "UTF-16LE"; 584 mByteOrderMarkPresent = true; 585 } 586 else if ((0xff & byteBuffer[0]) == 0xEF && (0xff & byteBuffer[1]) == 0xBB && (0xff & byteBuffer[2]) == 0xBF) 587 { 588 encodingString = "UTF-8"; 589 mByteOrderMarkPresent = true; 590 } 591 592 if (StringUtil.isSet(encodingString)) 593 { 594 encoding = Charset.forName(encodingString); 595 } 596 597 if (null == encoding) 598 { 599 // See if the first line is and xml declaration line that specifies the encoding 600 StringBuilder buffer = new StringBuilder(); 601 for (int i = 0; i < bytesRead; i++) 602 { 603 int theChar = byteBuffer[i]; 604 if (theChar == '\n' 605 || theChar == '\r') 606 { 607 if (buffer.toString().trim().length() > 0) 608 { 609 break; 610 } 611 else 612 { 613 buffer.setLength(0); 614 } 615 } 616 617 buffer.append((char) theChar); 618 } 619 620 String xmlHeader = buffer.toString().trim(); 621 if (StringUtil.isSet(xmlHeader)) 622 { 623 if (mByteOrderMarkPresent) xmlHeader = xmlHeader.substring(4); 624 Matcher m = XML_HEADER_PATTERN.matcher(xmlHeader); 625 if (m.find()) 626 { 627 encoding = Charset.forName(m.group(1)); 628 } 629 } 630 } 631 } 632 } 633 finally 634 { 635 if (inStream != null) 636 { 637 inStream.reset(); 638 } 639 } 640 641 return encoding != null ? encoding : DEFAULT_CHARSET; 642 } 643 644 //--------------------------------------------------------------------------- 645 // See: http://www.w3.org/TR/REC-xml/#sec-guessing-no-ext-info 646 private Charset determineEncoding(BufferedReader inReader) 647 throws IOException 648 { 649 Charset encoding = null; 650 mByteOrderMarkPresent = false; 651 652 int READ_LIMIT = 1024; 653 int bytesRead = 0; 654 655 try 656 { 657 inReader.mark(READ_LIMIT); 658 659 // Determine the XML file's encoding scheme. 660 char[] buffer = new char[READ_LIMIT]; 661 if ((bytesRead = inReader.read(buffer)) > 4) 662 { 663 byte[] bytes = new String(buffer, 0, 4).getBytes(); 664 665 // Is there a BOM (Byte Order Mark)? 666 String encodingString = null; 667 if ((0xff & bytes[0]) == 0xFE && (0xff & bytes[1]) == 0xFF) 668 { 669 encodingString = "UTF-16BE"; 670 mByteOrderMarkPresent = true; 671 } 672 else if ((0xff & bytes[0]) == 0xFF && (0xff & bytes[1]) == 0xFE) 673 { 674 encodingString = "UTF-16LE"; 675 mByteOrderMarkPresent = true; 676 } 677 else if ((0xff & bytes[0]) == 0xEF && (0xff & bytes[1]) == 0xBB && (0xff & bytes[2]) == 0xBF) 678 { 679 encodingString = "UTF-8"; 680 mByteOrderMarkPresent = true; 681 } 682 683 if (StringUtil.isSet(encodingString)) 684 { 685 encoding = Charset.forName(encodingString); 686 } 687 688 if (null == encoding) 689 { 690 // See if the first line is and xml declaration line that specifies the encoding 691 Matcher m = XML_HEADER_PATTERN.matcher(new String(buffer, 0, bytesRead).trim()); 692 if (m.find()) 693 { 694 encoding = Charset.forName(m.group(1)); 695 } 696 } 697 } 698 } 699 finally 700 { 701 if (inReader != null) 702 { 703 inReader.reset(); 704 } 705 } 706 707 return encoding != null ? encoding : DEFAULT_CHARSET; 708 } 709 710 //-------------------------------------------------------------------------- 711 private void checkPermissions(File inFile) 712 { 713 if (null == inFile) 714 { 715 throw new RuntimeException("The specified file was null!"); 716 } 717 else if (inFile.exists()) 718 { 719 if (! inFile.canWrite()) 720 { 721 throw new RuntimeException("No write permissions for " + StringUtil.singleQuote(inFile.getAbsolutePath()) + "!"); 722 } 723 } 724 else if (inFile.getParentFile() != null) 725 { 726 if (inFile.getParentFile().exists()) 727 { 728 if (! inFile.getParentFile().canWrite()) 729 { 730 throw new RuntimeException("No write permissions for dir " + StringUtil.singleQuote(inFile.getParentFile().getAbsolutePath()) + "!"); 731 } 732 } 733 else if (! inFile.getParentFile().mkdirs()) 734 { 735 throw new RuntimeException("Could not create dir " + StringUtil.singleQuote(inFile.getParentFile().getAbsolutePath()) + "!"); 736 } 737 } 738 } 739}