001package com.hfg.xml; 002 003// IMPORTS 004import java.io.StringReader; 005import java.util.ArrayList; 006import java.util.Collection; 007import java.util.Collections; 008import java.util.HashSet; 009import java.util.LinkedList; 010import java.util.List; 011import java.util.Map; 012import java.util.Set; 013import java.util.regex.Pattern; 014import java.util.regex.Matcher; 015 016import org.xml.sax.Attributes; 017 018import com.hfg.util.StringBuilderPlus; 019import com.hfg.util.StringUtil; 020import com.hfg.util.collection.CollectionUtil; 021import com.hfg.xml.parser.Latin1Entities; 022import com.hfg.xml.parser.SpecialCharacterEntities; 023import com.hfg.xml.parser.SymbolEntities; 024 025 026//------------------------------------------------------------------------------ 027/** 028 Helper class with static methods for XML tag constrution and 029 some parser helper methods. 030 031 @author J. Alex Taylor 032 */ 033//------------------------------------------------------------------------------ 034// com.hfg XML/HTML Coding Library 035// 036// This library is free software; you can redistribute it and/or 037// modify it under the terms of the GNU Lesser General Public 038// License as published by the Free Software Foundation; either 039// version 2.1 of the License, or (at your option) any later version. 040// 041// This library is distributed in the hope that it will be useful, 042// but WITHOUT ANY WARRANTY; without even the implied warranty of 043// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 044// Lesser General Public License for more details. 045// 046// You should have received a copy of the GNU Lesser General Public 047// License along with this library; if not, write to the Free Software 048// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 049// 050// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 051// jataylor@hairyfatguy.com 052//------------------------------------------------------------------------------ 053 054public class XMLUtil 055{ 056 057 //########################################################################### 058 // PUBLIC FIELDS 059 //########################################################################### 060 061 062 public static final String COMPRESSION_ATT = "compression"; 063 public static final String GZIP = "gzip"; 064 public static final String ENCODING_ATT = "encoding"; 065 public static final String BASE64 = "base64"; 066 067 068 /** 069 Constant which can be used with composeStartTag(). 070 */ 071 public static final boolean EMPTY_TAG = true; 072 073 /** 074 Constant which can be used with composeStartTag(). 075 */ 076 public static final boolean NOT_EMPTY_TAG = false; 077 078 //########################################################################### 079 // PRIVATE FIELDS 080 //########################################################################### 081 082 private static char sQuoteChar = '\''; 083 084 private static ValidatedNameCache sValidatedNameCache = new ValidatedNameCache(); 085 086// private static Pattern sEntityPattern = Pattern.compile("^&(amp|gt|lt|apos);"); 087 private static Pattern sCharacterEntityPattern = Pattern.compile("&(\\w{2,6});"); 088 private static Pattern sEntityPattern = Pattern.compile("^&(\\w{2,6}|#\\d{1,6});"); 089// private static Pattern sISOControlPattern = Pattern.compile("([\\\\x00-\\\\x08]|[\\x0B\\x0C\\x7F]|[\\x0E-\\x1F])"); 090 private static Pattern sISOControlPattern = Pattern.compile("([\u0001-\u0008]|[\u0011\u0012\u0127]|[\u0014-\u0031])"); 091// private static Pattern sISOControlEntityPattern = Pattern.compile("&#([1-8]|11|12|127|[14-31]);"); 092 private static Pattern sISOControlEntityPattern = Pattern.compile("�{0,3}([1-8]|11|12|14|15|16|17|18|19|20|21|22|23|24|25|26|27|28|29|30|31|127);"); 093 094 095 //########################################################################### 096 // PUBLIC FUNCTIONS 097 //########################################################################### 098 099 100 //--------------------------------------------------------------------------- 101 /** 102 Set whether to use double quotes or single quotes for attribute values. 103 Single quotes are used by default. 104 */ 105 public static void useDoubleQuotes(boolean inValue) 106 { 107 sQuoteChar = inValue ? '"' : '\''; 108 } 109 110 //--------------------------------------------------------------------------- 111 /** 112 Composes an xml start tag (ex: "<inName>"). 113 */ 114 public static String composeStartTag(String inName) 115 { 116 ArrayList<XMLAttribute> emptyAttrList = null; 117 return composeStartTag(inName, emptyAttrList, NOT_EMPTY_TAG); 118 } 119 120 //--------------------------------------------------------------------------- 121 /** 122 Composes an xml start tag (ex: "<inName att1='value1' att2='value2'>"). 123 Attributes appear in alphabetical order for consistency. 124 */ 125 public static String composeStartTag(String inName, Attributes inAttributes) 126 { 127 return composeStartTag(inName, inAttributes, NOT_EMPTY_TAG); 128 } 129 130 //--------------------------------------------------------------------------- 131 /** 132 Composes an xml start tag (ex: "<inName att1='value1' att2='value2'>"). 133 Attributes appear in alphabetical order for consistency. 134 */ 135 public static String composeStartTag(String inName, Attributes inAttributes, boolean isEmptyTag) 136 { 137 Collection<XMLAttribute> attributes = null; 138 if (inAttributes.getLength() > 0) 139 { 140 attributes = new ArrayList<>(inAttributes.getLength()); 141 for (int i = 0; i < inAttributes.getLength(); i++) 142 { 143 attributes.add(new XMLAttribute(inAttributes.getQName(i), inAttributes.getValue(i))); 144 } 145 } 146 147 return composeStartTag(inName, attributes, isEmptyTag); 148 } 149 150 //--------------------------------------------------------------------------- 151 /** 152 Composes an xml start tag (ex: "<inName att1='value1' att2='value2'>"). 153 Attributes appear in alphabetical order for consistency. 154 */ 155 public static String composeStartTag(String inName, Collection<XMLAttribute> inAttributes) 156 { 157 return composeStartTag(inName, inAttributes, NOT_EMPTY_TAG); 158 } 159 160 //--------------------------------------------------------------------------- 161 /** 162 Composes an xml start tag (ex: "<inName att1='value1' att2='value2'>"). 163 If the isEmptyTag parameter is true, the output tag will end with "/>". 164 ex: "<inName att1='value1' att2='value2'/>". Attributes appear in 165 alphabetical order for consistency. 166 167 @param inName the tag name 168 @param inAttributes a List of XMLAttribute objects 169 @param isEmptyTag whether or not the tag has any content or subtags 170 */ 171 public static String composeStartTag(String inName, Collection<XMLAttribute> inAttributes, 172 boolean isEmptyTag) 173 { 174 StringBuilder tag = new StringBuilder(); 175 176 tag.append("<"); 177 tag.append(inName); 178 179 // Write attributes. 180 if (inAttributes != null) 181 { 182 List<XMLAttribute> sordidAttributes; 183 if (inAttributes instanceof List) 184 { 185 sordidAttributes = (List<XMLAttribute>) inAttributes; 186 } 187 else 188 { 189 sordidAttributes = new ArrayList<>(inAttributes); 190 } 191 192 Collections.sort(sordidAttributes); 193 194 for (XMLAttribute attribute : sordidAttributes) 195 { 196 XMLNamespace namespace = attribute.getNamespace(); 197 tag.append(" "); 198 tag.append(namespace != null && StringUtil.isSet(namespace.getPrefix()) ? namespace + ":" : ""); 199 tag.append(attribute.getName()); 200 201 String value = attribute.getValue(); 202 if (null == value) value = ""; 203 tag.append("="); 204 tag.append(getQuotedAttributeValue(value)); 205 } 206 } 207 208 if (isEmptyTag) tag.append(" /"); 209 tag.append(">"); 210 211 return tag.toString(); 212 } 213 214 //--------------------------------------------------------------------------- 215 /** 216 Composes an xml start tag (ex: "<inName att1='value1' att2='value2'>"). 217 Attributes appear in alphabetical order for consistency. 218 */ 219 public static String composeStartTag(String inName, Map<String, Object> inAttributes) 220 { 221 return composeStartTag(inName, inAttributes, NOT_EMPTY_TAG); 222 } 223 224 //--------------------------------------------------------------------------- 225 /** 226 Composes an xml start tag (ex: "<inName att1='value1' att2='value2'>"). 227 If the isEmptyTag parameter is true, the output tag will end with "/>". 228 ex: "<inName att1='value1' att2='value2'/>". Attributes appear in 229 alphabetical order for consistency. 230 */ 231 public static String composeStartTag(String inName, Map<String, Object> inAttributes, 232 boolean isEmptyTag) 233 { 234 StringBuilder tag = new StringBuilder(); 235 236 tag.append("<"); 237 tag.append(inName); 238 239 // Write attributes. 240 if (inAttributes != null) 241 { 242 List<String> keys = new ArrayList<>(inAttributes.keySet()); 243 Collections.sort(keys); 244 245 for (String attributeName : keys) 246 { 247 248 tag.append(" "); 249 tag.append(attributeName); 250 251 Object value = inAttributes.get(attributeName); 252 if (null == value) value = ""; 253 tag.append("="); 254 tag.append(getQuotedAttributeValue(value.toString())); 255 } 256 } 257 258 if (isEmptyTag) tag.append(" /"); 259 tag.append(">"); 260 261 return tag.toString(); 262 } 263 264 //--------------------------------------------------------------------------- 265 /** 266 Composes an xml end tag (ex: "</inName>"). 267 */ 268 public static String composeEndTag(String inName) 269 { 270 return "</" + inName + ">"; 271 } 272 273 //--------------------------------------------------------------------------- 274 public static boolean isWellFormedFragment(String inXML) 275 { 276 boolean result = true; 277 try 278 { 279 XMLTag testTag = new XMLTag(new StringReader(inXML)); 280 } 281 catch (Exception e) 282 { 283 result = false; 284 } 285 286 return result; 287 } 288 289 //--------------------------------------------------------------------------- 290 /** 291 Returns whether or not the content fragment is well-formed and if 292 tags are present internal to the fragment, whether the tag section 293 and preceding or trailing raw content is well-formed. 294 <pre> 295 Examples of valid content fragments: '<foo />', 'foo &amp; bar', 'foo', 'foo <bar>1</bar> one' 296 Examples of invalid content fragments: '<foo>', 'foo & bar', 'foo <bar>1</zoot> one' 297 </pre> 298 */ 299 public static boolean isWellFormedContentFragment(String inXML) 300 { 301 boolean result = true; 302 303 int startIndex = inXML.indexOf("<"); 304 int endIndex = inXML.lastIndexOf(">"); 305 306 if (startIndex > -1 307 && endIndex > startIndex) 308 { 309 // Test the tag frag 310 result = isWellFormedFragment(inXML.substring(startIndex, endIndex + 1)); 311 } 312 313 if (startIndex > 0 314 && result) 315 { 316 // Test preceeding raw content 317 String preceedingContent = inXML.substring(0, startIndex); 318 result = preceedingContent.equals(escapeContentIfNecessary(preceedingContent)); 319 } 320 321 if (endIndex < inXML.length() - 1 322 && result) 323 { 324 // Test trailing raw content 325 String trailingContent = inXML.substring(endIndex + 1); 326 result = trailingContent.equals(escapeContentIfNecessary(trailingContent)); 327 } 328 329 return result; 330 } 331 332 //--------------------------------------------------------------------------- 333 public static String convertCharacterEntitiesToNumeric(String inContent) 334 { 335 String outContent = inContent; 336 337 if (inContent != null) 338 { 339 if (inContent.contains("&")) 340 { 341 StringBuilder buffer = new StringBuilder(inContent); 342 int index = 0; 343 Matcher m = sCharacterEntityPattern.matcher(buffer); 344 while (index < buffer.length() 345 && m.find(index)) 346 { 347 String numericEntity = Latin1Entities.getInstance().getNumericEntity(m.group(1)); 348 if (null == numericEntity) 349 { 350 numericEntity = SymbolEntities.getInstance().getNumericEntity(m.group(1)); 351 } 352 353 index = m.end(); 354 355 if (numericEntity != null) 356 { 357 int length = m.group(1).length(); 358 buffer.replace(m.start(1), m.end(1), numericEntity); 359 index += (numericEntity.length() - length); 360 } 361 362 // Since the buffer changed we need to reinstantiate the matcher 363 m = sCharacterEntityPattern.matcher(buffer); 364 } 365 366 outContent = buffer.toString(); 367 } 368 } 369 370 return outContent; 371 } 372 373 //--------------------------------------------------------------------------- 374 public static String convertCharacterEntitiesToUnicode(String inContent) 375 { 376 String outContent = inContent; 377 378 if (inContent != null) 379 { 380 if (inContent.contains("&")) 381 { 382 StringBuilder buffer = new StringBuilder(inContent); 383 int index = 0; 384 Matcher m = sCharacterEntityPattern.matcher(buffer); 385 while (index < buffer.length() 386 && m.find(index)) 387 { 388 Character unicodeChar = Latin1Entities.getInstance().getUnicodeChar(m.group(1)); 389 if (null == unicodeChar) 390 { 391 unicodeChar = SymbolEntities.getInstance().getUnicodeChar(m.group(1)); 392 } 393 394 index = m.end(); 395 396 if (unicodeChar != null) 397 { 398 int length = m.group(0).length(); 399 buffer.replace(m.start(0), m.end(0), unicodeChar + ""); 400 index += (1 - length); 401 } 402 403 // Since the buffer changed we need to reinstantiate the matcher 404 m = sCharacterEntityPattern.matcher(buffer); 405 } 406 407 outContent = buffer.toString(); 408 } 409 } 410 411 return outContent; 412 } 413 414 // 415 // RULES FOR ESCAPING: 416 // - '&' and '<' must *always* be escaped as & and <, without 417 // exception. 418 // - '>' needs to be escaped as > only when (a) it is in character 419 // data content and (b) it immediately follows the string "]]". 420 // - '"' and "'" *never* need to be escaped in character data content; in 421 // attribute value literals, they need to be escaped only when that 422 // character is being used as a delimiter, i.e. """ or '''. 423 424 //--------------------------------------------------------------------------- 425 /** 426 * Makes the content XML safe by ensuring that all '<'s are all escaped and that 427 * all '&'s are part of an entity. Avoids double escaping. 428 * @param inContent 429 * @return XML-safe content string 430 */ 431 public static String escapeContentIfNecessary(String inContent) 432 { 433 String safeContent = inContent; 434 435 if (inContent != null) 436 { 437 if (inContent.indexOf("&") >= 0) 438 { 439 StringBuilder buffer = new StringBuilder(inContent); 440 int index = 0; 441 while ((index = buffer.indexOf("&", index)) >= 0) 442 { 443 Matcher m = sEntityPattern.matcher(buffer.substring(index)); 444 if (!m.find()) 445 { 446 buffer.replace(index, index + 1, "&"); 447 index += 3; 448 } 449 else 450 { 451 index++; 452 } 453 } 454 safeContent = buffer.toString(); 455 456 } 457 458 safeContent = safeContent.replaceAll("<", "<"); 459 460 if (sISOControlPattern.matcher(safeContent).find()) 461 { 462 safeContent = replaceISOControlWithEntities(safeContent); 463 } 464 } 465 466 return safeContent; 467 } 468 469 //--------------------------------------------------------------------------- 470 public static String escapeContent(String inContent) 471 { 472 String encodedString = null; 473 if (inContent != null) 474 { 475 encodedString = inContent.replaceAll("&", "&"); 476 encodedString = encodedString.replaceAll("<", "<"); 477 } 478 479 return encodedString; 480 } 481 482 //--------------------------------------------------------------------------- 483 public static String unescapeContent(String inContent) 484 { 485 String unencodedString = inContent; 486 if (inContent != null) 487 { 488 unencodedString = inContent.replaceAll("<", "<"); 489 unencodedString = unencodedString.replaceAll("&", "&"); 490 491 unencodedString = expandISOControlEntities(unencodedString); 492 } 493 494 return unencodedString; 495 } 496 497 //--------------------------------------------------------------------------- 498 public static String escapeAttributeValue(String inAttributeValue) 499 { 500 StringBuilderPlus buffer = null; // We won't instantiate this unless it is necessary 501 502 if (inAttributeValue != null) 503 { 504 for (int i = inAttributeValue.length() - 1; i >= 0; i--) 505 { 506 char c = inAttributeValue.charAt(i); 507 508 if (c < 32 || 127 == c // ISO control character range 509 || '&' == c 510 || '\'' == c 511 || '<' == c) 512 { 513 String entity; 514 switch (c) 515 { 516 case '&': 517 entity = "&"; 518 break; 519 case '\'': 520 entity = "'"; 521 break; 522 case '<': 523 entity = "<"; 524 break; 525 default: 526 entity = "&#" + (int) c + ";"; 527 } 528 529 if (null == buffer) 530 { 531 buffer = new StringBuilderPlus(); 532 if (i < inAttributeValue.length() - 1) 533 { 534 // Include any regular characters that we have skipped 535 buffer.append(inAttributeValue.substring(i + 1)); 536 } 537 } 538 539 buffer.insert(0, entity); 540 } 541 else if (buffer != null) 542 { 543 // This character should be safe without escaping 544 buffer.insert(0, c); 545 } 546 } 547 } 548 549 return (buffer != null ? buffer.toString() : inAttributeValue); 550 } 551 552 //--------------------------------------------------------------------------- 553 public static String unescapeAttributeValue(String inAttributeValue) 554 { 555 return unescapeEntities(inAttributeValue); 556 } 557 558 //--------------------------------------------------------------------------- 559 public static String escapeApos(String inAttributeValue) 560 { 561 String escapedVaule = null; 562 if (inAttributeValue != null) 563 { 564 escapedVaule = inAttributeValue.replaceAll("'", "'"); 565 } 566 567 return escapedVaule; 568 } 569 570 //--------------------------------------------------------------------------- 571 public static String escapeQuote(String inAttributeValue) 572 { 573 String escapedVaule = null; 574 if (inAttributeValue != null) 575 { 576 escapedVaule = inAttributeValue.replaceAll("\"", """); 577 } 578 579 return escapedVaule; 580 } 581 582 //--------------------------------------------------------------------------- 583 public static String escapeAmp(String inAttributeValue) 584 { 585 String escapedVaule = null; 586 if (inAttributeValue != null) 587 { 588 escapedVaule = inAttributeValue.replaceAll("&", "&"); 589 } 590 591 return escapedVaule; 592 } 593 594 // public static Pattern sEntityPattern2 = Pattern.compile("&(\\S{3,8});"); 595 public static Pattern sEntityPattern2 = Pattern.compile("&(#?[\\w\\d]{1,7});"); 596 597 //--------------------------------------------------------------------------- 598 public static synchronized String unescapeEntities(String inValue) 599 { 600 StringBuilder unescapedVaule = null; 601 if (inValue != null) 602 { 603 unescapedVaule = new StringBuilder(inValue); 604 605 Matcher m = sEntityPattern2.matcher(unescapedVaule); 606 int index = 0; 607 while (m.find(index)) 608 { 609 String entity = m.group(1); 610 String expandedEntity; 611 if (entity.startsWith("#")) 612 { 613 if (entity.charAt(1) == 'x') 614 { 615 // Hex 616 expandedEntity = "" + (char) Integer.parseInt(entity.substring(2), 16); 617 } 618 else 619 { 620 // Decimal 621 expandedEntity = "" + (char) Integer.parseInt(entity.substring(1)); 622 } 623 } 624 else 625 { 626 // Try to resolve it with our battery of standard entity classes. 627 628 expandedEntity = SpecialCharacterEntities.resolveEntity(entity); 629 630 if (null == expandedEntity) 631 { 632 expandedEntity = Latin1Entities.getInstance().getNumericEntity(entity); 633 } 634 635 if (null == expandedEntity) 636 { 637 expandedEntity = SymbolEntities.getInstance().getNumericEntity(entity); 638 } 639 640 641 if (expandedEntity != null 642 && expandedEntity.startsWith("#")) 643 { 644 expandedEntity = "" + (char) Integer.parseInt(expandedEntity.substring(1)); 645 } 646 } 647 648 if (expandedEntity != null) 649 { 650 unescapedVaule.replace(m.start(), m.start() + m.group(1).length() + 2, expandedEntity); 651 index = m.start(1) + expandedEntity.length() - 1; 652 } 653 else 654 { 655 index++; 656 } 657 658 if (index >= unescapedVaule.length()) 659 { 660 break; 661 } 662 } 663 } 664 665 return unescapedVaule != null ? unescapedVaule.toString() : null; 666 } 667 668 669 //--------------------------------------------------------------------------- 670 /** 671 Is the tag or element name valid? 672 */ 673 public static void checkXMLNameValidity(String inValue) 674 throws InvalidXMLNameException 675 { 676 if (null == inValue) 677 { 678 throw new InvalidXMLNameException("XML tag/element names cannot be set to null."); 679 } 680 681 // Check for xml validity of the tag name. 682 if (inValue.length() == 0) 683 { 684 throw new InvalidXMLNameException("XML tag/element names cannot be an empty string."); 685 } 686 else if (! sValidatedNameCache.contains(inValue)) 687 { 688 if (! Character.isLetter(inValue.charAt(0)) 689 && inValue.charAt(0) != '_') 690 { 691 throw new InvalidXMLNameException("'" + inValue + "' is not a valid XML tag/element name. " + 692 "The first character must be a letter " + 693 "or an underscore."); 694 } 695 else if (inValue.contains(" ")) 696 { 697 throw new InvalidXMLNameException("'" + inValue + "' is not a valid XML tag/element name. " + 698 "It cannot contain whitespace."); 699 } 700 else 701 { 702 for (int i = 0; i < inValue.length(); i++) 703 { 704 char tagChar = inValue.charAt(i); 705 if (! Character.isLetterOrDigit(tagChar) 706 && tagChar != '_' 707 && tagChar != '.' 708 && tagChar != '-') 709 { 710 throw new InvalidXMLNameException("'" + inValue + "' is not a valid XML tag/element name. " + 711 "'" + tagChar + "' is an invalid character."); 712 } 713 } 714 715 sValidatedNameCache.add(inValue); 716 } 717 } 718 } 719 720 //--------------------------------------------------------------------------- 721 public static List<XMLNode> findNodesByAttribute(XMLNode inRootNode, XMLAttribute inAttribute) 722 { 723 List<XMLNode> nodes = new ArrayList<>(); 724 725 recursivelyFindByAttribute(nodes, inRootNode, inAttribute); 726 727 return nodes; 728 } 729 730 //--------------------------------------------------------------------------- 731 public static List<XMLNode> findNodesByAttribute(XMLNode inRootNode, String inAttribute) 732 { 733 List<XMLNode> nodes = new ArrayList<>(); 734 735 recursivelyFindByAttribute(nodes, inRootNode, new XMLAttribute(inAttribute, null)); 736 737 return nodes; 738 } 739 740 //--------------------------------------------------------------------------- 741 private static void recursivelyFindByAttribute(List<XMLNode> inNodes, XMLNode inNode, XMLAttribute inAttribute) 742 { 743 if (inNode.hasAttribute(inAttribute.getName()) 744 && null == inAttribute.getValue() || inAttribute.getValue().equals(inNode.getAttributeValue(inAttribute.getName()))) 745 { 746 inNodes.add(inNode); 747 } 748 749 List<? extends XMLNode> subnodes = inNode.getSubtags(); 750 if (CollectionUtil.hasValues(subnodes)) 751 { 752 for (XMLNode subnode : subnodes) 753 { 754 recursivelyFindByAttribute(inNodes, subnode, inAttribute); 755 } 756 } 757 } 758 759 //########################################################################### 760 // PRIVATE FUNCTIONS 761 //########################################################################### 762 763 //--------------------------------------------------------------------------- 764 private static String getQuotedAttributeValue(String inAttributeValue) 765 { 766 String safeValue = "''"; 767 if (inAttributeValue != null) 768 { 769 if (sQuoteChar == '\'') 770 { 771 safeValue = "'" + escapeAttributeValue(inAttributeValue) + "'"; 772 } 773 else 774 { 775 safeValue = "\"" + escapeDoubleQuotedAttributeValue(inAttributeValue) + "\""; 776 } 777 } 778 779 return safeValue; 780 } 781 782 //--------------------------------------------------------------------------- 783 public static String escapeDoubleQuotedAttributeValue(String inAttributeValue) 784 { 785 String encodedString = null; 786 if (inAttributeValue != null) 787 { 788 encodedString = escapeAmp(inAttributeValue); 789 encodedString = escapeQuote(encodedString); 790 encodedString = encodedString.replaceAll("<", "<"); 791 } 792 793 return encodedString; 794 } 795 796 //--------------------------------------------------------------------------- 797 private static String replaceISOControlWithEntities(String inString) 798 { 799 StringBuilder buffer = new StringBuilder(); 800 for (char theChar : inString.toCharArray()) 801 { 802 if (! Character.isWhitespace(theChar) // Skip \t, \r, and \n 803 && Character.isISOControl(theChar)) 804 { 805 buffer.append("&#" + (int)theChar + ";"); 806 } 807 else 808 { 809 buffer.append(theChar); 810 } 811 } 812 813 return buffer.toString(); 814 } 815 816 //--------------------------------------------------------------------------- 817 private static String expandISOControlEntities(String inString) 818 { 819 String resultString = inString; 820 821 if (sISOControlEntityPattern.matcher(inString).find()) 822 { 823 StringBuilder buffer = new StringBuilder(inString); 824 int start = 0; 825 Matcher m = sISOControlEntityPattern.matcher(buffer); 826 while (m.find(start)) 827 { 828 buffer.replace(m.start(), m.end(), "" + (char)Integer.parseInt(m.group(1))); 829 830 start = m.start() + 1; 831 } 832 833 resultString = buffer.toString(); 834 } 835 836 return resultString; 837 } 838 839 //--------------------------------------------------------------------------- 840 public static String replaceUnicodeWithEntities(String inString) 841 { 842 StringBuilder buffer = new StringBuilder(); 843 844 if (inString != null) 845 { 846 for (int i = 0; i < inString.length(); i++) 847 { 848 char theChar = inString.charAt(i); 849 if (126 < (int) theChar) 850 { 851 buffer.append("&#" + ((int) theChar) + ";"); 852 } 853 else 854 { 855 buffer.append(theChar); 856 } 857 858 } 859 } 860 861 return inString != null ? buffer.toString() : null; 862 } 863 864 // This cache is used for improved efficiency in name validation. 865 //--------------------------------------------------------------------------- 866 private static class ValidatedNameCache 867 { 868 private int mMaxCacheSize = 150; 869 private Set<String> mNameSet = new HashSet<>(); 870 private LinkedList<String> mNameQueue = new LinkedList<>(); 871 872 //------------------------------------------------------------------------ 873 public boolean contains(String inValue) 874 { 875 return (mNameSet.contains(inValue)); 876 } 877 878 //------------------------------------------------------------------------ 879 public synchronized void add(String inValue) 880 { 881 if (mNameSet.size() >= mMaxCacheSize) 882 { 883 mNameSet.remove(mNameQueue.removeFirst()); 884 } 885 886 mNameSet.add(inValue); 887 mNameQueue.add(inValue); 888 } 889 } 890}