001package com.hfg.xml.msofficexml.docx.wordprocessingml; 002 003import java.net.URL; 004import java.util.ArrayList; 005import java.util.List; 006import java.util.regex.Matcher; 007import java.util.regex.Pattern; 008 009import com.hfg.css.*; 010import com.hfg.exception.ProgrammingException; 011import com.hfg.graphics.units.Pixels; 012import com.hfg.graphics.units.Points; 013import com.hfg.html.*; 014import com.hfg.html.attribute.HTMLColor; 015import com.hfg.util.collection.CollectionUtil; 016import com.hfg.util.Recursion; 017import com.hfg.util.StringUtil; 018import com.hfg.util.io.GZIP; 019import com.hfg.xml.XMLNode; 020import com.hfg.xml.XMLTag; 021import com.hfg.xml.msofficexml.docx.Docx; 022import com.hfg.xml.msofficexml.docx.wordprocessingml.style.*; 023 024//------------------------------------------------------------------------------ 025/** 026 * For converting HTML into WordprocessingML. 027 * 028 * @author J. Alex Taylor, hairyfatguy.com 029 */ 030//------------------------------------------------------------------------------ 031// com.hfg XML/HTML Coding Library 032// 033// This library is free software; you can redistribute it and/or 034// modify it under the terms of the GNU Lesser General Public 035// License as published by the Free Software Foundation; either 036// version 2.1 of the License, or (at your option) any later version. 037// 038// This library is distributed in the hope that it will be useful, 039// but WITHOUT ANY WARRANTY; without even the implied warranty of 040// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 041// Lesser General Public License for more details. 042// 043// You should have received a copy of the GNU Lesser General Public 044// License along with this library; if not, write to the Free Software 045// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 046// 047// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com 048// jataylor@hairyfatguy.com 049//------------------------------------------------------------------------------ 050 051public class HtmlToWmlConverter 052{ 053 private CSS mCSS; 054 055 private static final Pattern PTS_SIZE_PATTERN = Pattern.compile("(\\d+)(pt)?"); 056 private static final Pattern PX_SIZE_PATTERN = Pattern.compile("(\\d+)(px)?"); 057 058 //########################################################################## 059 // PUBLIC METHODS 060 //########################################################################## 061 062 //--------------------------------------------------------------------------- 063 /** 064 Specify associated CSS to use during conversion. 065 066 @param inValue CSS object containing a collection of CSSRules 067 */ 068 public HtmlToWmlConverter setCSS(CSS inValue) 069 { 070 mCSS = inValue; 071 return this; 072 } 073 074 //--------------------------------------------------------------------------- 075 /** 076 Warning: this method is still a brittle work-in-progress. 077 */ 078 public synchronized List<XMLTag> convert(HTMLTag inHTMLTag, Docx inDocx) 079 { 080 List<XMLTag> wmlTags = new ArrayList<>(5); 081 082 if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.BR)) 083 { 084 wmlTags.add(new WmlParagraph(inDocx).br()); 085 } 086 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.DIV)) 087 { 088 wmlTags.addAll(parseDivTag((Div) inHTMLTag, inDocx)); 089 } 090 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.A)) 091 { 092 wmlTags.addAll(parseLinkTag((Link) inHTMLTag, inDocx)); 093 } 094 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.P)) 095 { 096 wmlTags.addAll(parsePTag((P) inHTMLTag, inDocx)); 097 } 098 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.PRE)) 099 { 100 wmlTags.addAll(parsePreTag((Pre) inHTMLTag, inDocx)); 101 } 102 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.SPAN)) 103 { 104 wmlTags.addAll(parseSpanTag((Span) inHTMLTag, inDocx)); 105 } 106 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TABLE)) 107 { 108 wmlTags.addAll(parseTableTag((Table) inHTMLTag, inDocx)); 109 } 110 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TBODY)) 111 { 112 wmlTags.addAll(parseTBodyTag((TBody) inHTMLTag, inDocx)); 113 } 114 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.THEAD)) 115 { 116 wmlTags.addAll(parseTHeadTag((THead) inHTMLTag, inDocx)); 117 } 118 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TR)) 119 { 120 wmlTags.addAll(parseTrTag((Tr) inHTMLTag, inDocx)); 121 } 122 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TH)) 123 { 124 wmlTags.addAll(parseThTag((Th) inHTMLTag, inDocx)); 125 } 126 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.TD)) 127 { 128 wmlTags.addAll(parseTdTag((Td) inHTMLTag, inDocx)); 129 } 130 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.H1)) 131 { 132 wmlTags.addAll(parseH1Tag((H1) inHTMLTag, inDocx)); 133 } 134 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.H2)) 135 { 136 wmlTags.addAll(parseH2Tag((H2) inHTMLTag, inDocx)); 137 } 138 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.LI)) 139 { 140 wmlTags.addAll(parseLiTag((Li) inHTMLTag, inDocx)); 141 } 142 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.UL)) 143 { 144 wmlTags.addAll(parseUlTag((Ul) inHTMLTag, inDocx)); 145 } 146 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.OL)) 147 { 148 wmlTags.addAll(parseOlTag((Ol) inHTMLTag, inDocx)); 149 } 150 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.NOBR)) 151 { 152 wmlTags.addAll(parseNobrTag((Nobr) inHTMLTag, inDocx)); 153 } 154 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.U)) 155 { 156 wmlTags.addAll(parseSpanTag(inHTMLTag, new CSSDeclaration(CSSProperty.text_decoration, "underline"), inDocx)); 157 } 158 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.EM)) 159 { 160 wmlTags.addAll(parseSpanTag(inHTMLTag, new CSSDeclaration(CSSProperty.font_style, "italic"), inDocx)); 161 } 162 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.B) 163 || inHTMLTag.getTagName().equalsIgnoreCase(HTML.STRONG)) 164 { 165 wmlTags.addAll(parseSpanTag(inHTMLTag, new CSSDeclaration(CSSProperty.font_weight, "bold"), inDocx)); 166 } 167 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.S)) 168 { 169 wmlTags.addAll(parseSpanTag(inHTMLTag, new CSSDeclaration(CSSProperty.text_decoration, "line-through"), inDocx)); 170 } 171 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.HTML)) 172 { 173 wmlTags.addAll(parseHtmlTag((HTML) inHTMLTag, inDocx)); 174 } 175 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.CAPTION)) 176 { 177 // Ignoring for now 178 } 179 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.COL)) 180 { 181 // TODO: Ignore for now 182 } 183 else if (inHTMLTag.getTagName().equalsIgnoreCase(HTML.COLGROUP)) 184 { 185 // TODO: Ignore for now 186 } 187 else 188 { 189 throw new ProgrammingException("No tag mapping for tag " + StringUtil.singleQuote(inHTMLTag.getTagName())); 190 } 191 192 // Replace any character entities (like """) with the corresponding numeric entities 193 if (CollectionUtil.hasValues(wmlTags)) 194 { 195 for (XMLTag xmlTag : wmlTags) 196 { 197 xmlTag.replaceCharacterEntities(); 198 } 199 } 200 201 return wmlTags; 202 } 203 204 //########################################################################## 205 // PRIVATE METHODS 206 //########################################################################## 207 208 //--------------------------------------------------------------------------- 209 private List<XMLTag> parseTableTag(Table inTableTag, Docx inDocx) 210 { 211 List<XMLTag> wmlTags = new ArrayList<>(5); 212 213 WmlTable table = new WmlTable(inDocx); 214 wmlTags.add(table); 215 216 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inTableTag); 217 if (CollectionUtil.hasValues(cssDeclarations)) 218 { 219 convertCSSToTableProperties(cssDeclarations, table.getTableProperties()); 220 } 221 222 223 // Was a cell padding size specified as a attribute value? 224 String cellPaddingString = inTableTag.getAttributeValue(HTML.CELLPADDING); 225 if (StringUtil.isSet(cellPaddingString)) 226 { 227 table.getTableProperties().getTableCellMargins().addMargin(new WmlTableCellMargin(new Pixels(Integer.parseInt(cellPaddingString)))); 228 } 229 230 // Was a border size specified as a attribute value? 231 String borderString = inTableTag.getAttributeValue(HTML.BORDER); 232 if (StringUtil.isSet(borderString)) 233 { 234 WmlTableBorder border = new WmlTableBorder() 235 .setSize(new Pixels(Integer.parseInt(cellPaddingString))) 236 .setStyle(WmlLineBorderStyle.single) 237 .setColor(HTMLColor.BLACK); 238 239 table.getTableProperties().getBorders().addBorder(border); 240 } 241 242 if (inTableTag.hasContentOrSubtags()) 243 { 244 for (XMLNode tableSubtag : inTableTag.getXMLNodeSubtags()) 245 { 246 List<XMLTag> subtags = convert((HTMLTag) tableSubtag, inDocx); 247 if (CollectionUtil.hasValues(subtags)) 248 { 249 for (XMLTag subtag: subtags) 250 { 251 table.addSubtag(subtag); 252 } 253 } 254 } 255 } 256 257 // Apply any row merging 258 int currentRow = 0; 259 for (Tr htmlRow : inTableTag.getRows()) 260 { 261 currentRow++; 262 263 List<HTMLTag> cellTags = htmlRow.getSubtags(); 264 for (int cellIdx = 0; cellIdx < cellTags.size(); cellIdx++) 265 { 266 HTMLTag cellTag = cellTags.get(cellIdx); 267 268 if (cellTag.hasAttribute(HTML.ROWSPAN)) 269 { 270 int rowSpan = Integer.parseInt(cellTag.getAttributeValue(HTML.ROWSPAN)); 271 272 if (rowSpan > 1) 273 { 274 List<WmlTableRow> tableRows = table.getRows(); 275 276 // The first cell is marked with RESTART 277 WmlTableCell firstMergedCell = tableRows.get(currentRow - 1).getCells().get(cellIdx); 278 firstMergedCell.getCellProperties().setVerticalMerge(WmlVerticalMerge.RESTART); 279 280 // Subsequent merged cells (in the subsequent rows) are marked CONTINUE 281 for (int rowIdx = currentRow; rowIdx < currentRow + rowSpan - 1; rowIdx++) 282 { 283 WmlTableRow row = tableRows.get(rowIdx); 284 285 // Make sure enough cells have been defined 286 List<WmlTableCell> cells = row.getCells(); 287 int cellCount = cells.size(); 288 while (cellCount < cellIdx + 1) 289 { 290 row.addCell(); 291 cellCount++; 292 } 293 294 WmlTableCell cell = row.getCells().get(cellIdx); 295 cell.getCellProperties() 296 .setVerticalMerge(WmlVerticalMerge.CONTINUE); 297 } 298 } 299 } 300 } 301 } 302 303 return wmlTags; 304 } 305 306 //--------------------------------------------------------------------------- 307 // The THead tag itself doesn't map to anything but any table data it contains should get parsed. 308 private List<XMLTag> parseTHeadTag(THead inTHeadTag, Docx inDocx) 309 { 310 List<XMLTag> wmlTags = new ArrayList<>(5); 311 312 if (inTHeadTag.hasContentOrSubtags()) 313 { 314 for (XMLNode subtag : inTHeadTag.getXMLNodeSubtags()) 315 { 316 List<XMLTag> subtags = convert((HTMLTag) subtag, inDocx); 317 if (CollectionUtil.hasValues(subtags)) 318 { 319 wmlTags.addAll(subtags); 320 } 321 } 322 } 323 324 return wmlTags; 325 } 326 327 //--------------------------------------------------------------------------- 328 // The TBody tag itself doesn't map to anything but any table data it contains should get parsed. 329 private List<XMLTag> parseTBodyTag(TBody inTBodyTag, Docx inDocx) 330 { 331 List<XMLTag> wmlTags = new ArrayList<>(5); 332 333 if (inTBodyTag.hasContentOrSubtags()) 334 { 335 for (XMLNode tbodySubtag : inTBodyTag.getXMLNodeSubtags()) 336 { 337 List<XMLTag> subtags = convert((HTMLTag) tbodySubtag, inDocx); 338 if (CollectionUtil.hasValues(subtags)) 339 { 340 wmlTags.addAll(subtags); 341 } 342 } 343 } 344 345 return wmlTags; 346 } 347 348 //--------------------------------------------------------------------------- 349 private List<XMLTag> parseTrTag(Tr inTrTag, Docx inDocx) 350 { 351 List<XMLTag> wmlTags = new ArrayList<>(5); 352 353 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inTrTag); 354 if (CollectionUtil.hasValues(cssDeclarations)) 355 { 356 for (CSSDeclaration cssDeclaration : cssDeclarations) 357 { 358 if (cssDeclaration.getProperty() == CSSProperty.display 359 && cssDeclaration.getValue().equals("none")) 360 { 361 return wmlTags; 362 } 363 } 364 } 365 366 WmlTableRow tr = new WmlTableRow(inDocx); 367 wmlTags.add(tr); 368 369 if (CollectionUtil.hasValues(cssDeclarations)) 370 { 371 convertCSSToTableRowProperties(cssDeclarations, tr.getRowProperties()); 372 } 373 374 WmlTableCellProperties tcPr = null; 375 376 if (CollectionUtil.hasValues(cssDeclarations)) 377 { 378 tcPr = new WmlTableCellProperties(inDocx); 379 convertCSSToTableCellProperties(cssDeclarations, tcPr); 380 } 381 382 if (inTrTag.hasContentOrSubtags()) 383 { 384 for (XMLNode trSubtag : inTrTag.getXMLNodeSubtags()) 385 { 386 List<XMLTag> subtags = convert((HTMLTag) trSubtag, inDocx); 387 if (CollectionUtil.hasValues(subtags)) 388 { 389 for (XMLTag subtag: subtags) 390 { 391 tr.addSubtag(subtag); 392 } 393 } 394 } 395 } 396 397 // Copy the table cell properties down to any cells since OfficeOpenXML is brain damaged 398 if (tcPr != null) 399 { 400 for (XMLTag wmlTag : wmlTags) 401 { 402 List<XMLTag> tableCells = wmlTag.getSubtagsByName(WmlXML.TABLE_CELL.getLocalName(), Recursion.ON); 403 if (CollectionUtil.hasValues(tableCells)) 404 { 405 for (WmlTableCell tc : (List<WmlTableCell>) (Object) tableCells) 406 { 407 tcPr.add(tc.getCellProperties()); 408 tc.setCellProperties(tcPr); 409 } 410 } 411 } 412 } 413 414 return wmlTags; 415 } 416 417 //--------------------------------------------------------------------------- 418 private List<XMLTag> parseTdTag(Td inTdTag, Docx inDocx) 419 { 420 List<XMLTag> wmlTags = new ArrayList<>(5); 421 422 WmlTableCell td = new WmlTableCell(inDocx); 423 wmlTags.add(td); 424 425 boolean containsCss = false; 426 427 WmlTextRunProperties textRunProperties = null; 428 429 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inTdTag); 430 if (CollectionUtil.hasValues(cssDeclarations)) 431 { 432 containsCss = true; 433 convertCSSToTableCellProperties(cssDeclarations, td.getCellProperties()); 434 435 // Remove bgColor and border styles before setting text run properties 436 for (int i = 0; i < cssDeclarations.size(); i++) 437 { 438 CSSDeclaration cssDeclaration = cssDeclarations.get(i); 439 440 if (cssDeclaration.getProperty().equals(CSSProperty.background_color) 441 || cssDeclaration.getProperty().equals(CSSProperty.border)) 442 { 443 cssDeclarations.remove(i--); 444 } 445 } 446 447 textRunProperties = new WmlTextRunProperties(inDocx); 448 convertCSSToTextRunProperties(cssDeclarations, textRunProperties); 449 } 450 451 // Apply column merging (Note that row merging is dealt with at the table level) 452 String colSpanString = inTdTag.getColSpan(); 453 if (StringUtil.isSet(colSpanString)) 454 { 455 int colSpan = Integer.parseInt(colSpanString); 456 if (colSpan > 1) 457 { 458 td.getCellProperties() 459 .setGridSpan(colSpan); 460 } 461 } 462 463 if (inTdTag.hasContentOrSubtags()) 464 { 465 WmlParagraph p = td.getParagraph(); 466 467 for (Object content : inTdTag.getContentPlusSubtagList()) 468 { 469 if (content instanceof String) 470 { 471 p.addTextRun((String) content); 472 } 473 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 474 { 475 p.addTextRun(GZIP.uncompressToString((byte[]) content)); 476 } 477 else 478 { 479 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 480 if (CollectionUtil.hasValues(subtags)) 481 { 482 for (XMLTag subtag: subtags) 483 { 484 if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName())) 485 { 486 if (! p.hasContentOrSubtags()) 487 { 488 td.removeSubtag(p); 489 } 490 491 td.addSubtag(subtag); 492 } 493 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 494 { 495 p.addSubtag(subtag); 496 } 497 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName())) 498 { 499 WmlTextRun textRun = p.addTextRun(); 500 textRun.addSubtag(subtag); 501 } 502 else 503 { 504 p.addSubtag(subtag); 505 } 506 } 507 } 508 } 509 } 510 511 // Copy the text run properties on the cell down to any text runs since OfficeOpenXML is brain damaged 512 if (containsCss) 513 { 514 for (XMLTag wmlTag : wmlTags) 515 { 516 List<XMLTag> paragraphs = wmlTag.getSubtagsByName(WmlXML.P.getLocalName(), Recursion.ON); 517 if (CollectionUtil.hasValues(paragraphs)) 518 { 519 for (WmlParagraph paragraph : (List<WmlParagraph>) (Object) paragraphs) 520 { 521 convertCSSToParagraphProperties(cssDeclarations, paragraph.getProperties()); 522 } 523 } 524 525 List<XMLTag> textRuns = wmlTag.getSubtagsByName(WmlXML.R.getLocalName(), Recursion.ON); 526 if (CollectionUtil.hasValues(textRuns)) 527 { 528 for (WmlTextRun textRun : (List<WmlTextRun>) (Object) textRuns) 529 { 530 WmlTextRunProperties parentProperties = (WmlTextRunProperties) textRunProperties.clone(); 531 textRun.setProperties(parentProperties.combine(textRun.getProperties())); 532 } 533 } 534 } 535 } 536 } 537 538 return wmlTags; 539 } 540 541 //--------------------------------------------------------------------------- 542 private List<XMLTag> parseThTag(Th inThTag, Docx inDocx) 543 { 544 List<XMLTag> wmlTags = new ArrayList<>(5); 545 546 WmlTableCell th = new WmlTableCell(inDocx); 547 wmlTags.add(th); 548 549 WmlTextRunProperties textRunProperties = null; 550 boolean containsCss = false; 551 552 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inThTag); 553 // Th should default to bold 554 CSSDeclaration bold = new CSSDeclaration(CSSProperty.font_weight, "bold"); 555 if (CollectionUtil.hasValues(cssDeclarations)) 556 { 557 cssDeclarations.add(0, bold); 558 } 559 else 560 { 561 cssDeclarations = new ArrayList<>(4); 562 cssDeclarations.add(bold); 563 } 564 565 if (CollectionUtil.hasValues(cssDeclarations)) 566 { 567 containsCss = true; 568 convertCSSToTableCellProperties(cssDeclarations, th.getCellProperties()); 569 570 // Remove bgColor and border styles before setting text run properties 571 for (int i = 0; i < cssDeclarations.size(); i++) 572 { 573 CSSDeclaration cssDeclaration = cssDeclarations.get(i); 574 if (cssDeclaration.getProperty().equals(CSSProperty.background_color) 575 || cssDeclaration.getProperty().equals(CSSProperty.border)) 576 { 577 cssDeclarations.remove(i--); 578 } 579 } 580 581 textRunProperties = new WmlTextRunProperties(inDocx); 582 convertCSSToTextRunProperties(cssDeclarations, textRunProperties); 583 } 584 585 586 if (inThTag.hasContentOrSubtags()) 587 { 588 WmlParagraph p = th.getParagraph(); 589 590 for (Object content : inThTag.getContentPlusSubtagList()) 591 { 592 if (content instanceof String) 593 { 594 p.addTextRun((String) content); 595 } 596 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 597 { 598 p.addTextRun(GZIP.uncompressToString((byte[]) content)); 599 } 600 else 601 { 602 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 603 if (CollectionUtil.hasValues(subtags)) 604 { 605 for (XMLTag subtag: subtags) 606 { 607 if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName())) 608 { 609 if (! p.hasContentOrSubtags()) 610 { 611 th.removeSubtag(p); 612 } 613 614 th.addSubtag(subtag); 615 } 616 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 617 { 618 p.addSubtag(subtag); 619 } 620 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName())) 621 { 622 WmlTextRun textRun = p.addTextRun(); 623 textRun.addSubtag(subtag); 624 } 625 else 626 { 627 p.addSubtag(subtag); 628 } 629 } 630 } 631 } 632 } 633 } 634 635 // Copy the text run properties on the cell down to any text runs since OfficeOpenXML is brain damaged 636 if (containsCss) 637 { 638 for (XMLTag wmlTag : wmlTags) 639 { 640 List<XMLTag> paragraphs = wmlTag.getSubtagsByName(WmlXML.P.getLocalName(), Recursion.ON); 641 if (CollectionUtil.hasValues(paragraphs)) 642 { 643 for (WmlParagraph paragraph : (List<WmlParagraph>) (Object) paragraphs) 644 { 645 convertCSSToParagraphProperties(cssDeclarations, paragraph.getProperties()); 646 } 647 } 648 649 List<XMLTag> textRuns = wmlTag.getSubtagsByName(WmlXML.R.getLocalName(), Recursion.ON); 650 if (CollectionUtil.hasValues(textRuns)) 651 { 652 for (WmlTextRun textRun : (List<WmlTextRun>) (Object) textRuns) 653 { 654 WmlTextRunProperties parentProperties = (WmlTextRunProperties) textRunProperties.clone(); 655 textRun.setProperties(parentProperties.combine(textRun.getProperties())); 656 } 657 } 658 } 659 } 660 661 return wmlTags; 662 } 663 664 665 //--------------------------------------------------------------------------- 666 private List<XMLTag> parsePreTag(Pre inPreTag, Docx inDocx) 667 { 668 List<XMLTag> wmlTags = new ArrayList<>(5); 669 670 WmlParagraph p = new WmlParagraph(inDocx); 671 wmlTags.add(p); 672 673 WmlStyle preStyle = inDocx.getStylesPart().getStyle("pre"); 674 if (null == preStyle) 675 { 676 preStyle = new WmlParagraphStyle("pre", inDocx); 677 preStyle.getTextRunProperties().setFont("Courier"); 678 inDocx.getStylesPart().addStyle(preStyle); 679 } 680 p.getProperties().setStyle(preStyle.getId()); 681 682 683 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inPreTag); 684 if (CollectionUtil.hasValues(cssDeclarations)) 685 { 686 convertCSSToTextRunProperties(cssDeclarations, p.getProperties().getRunProperties()); 687 } 688 689 if (inPreTag.hasContentOrSubtags()) 690 { 691 boolean containerClosed = false; 692 693 for (Object content : inPreTag.getContentPlusSubtagList()) 694 { 695 if (content instanceof String) 696 { 697 p.addTextRun((String) content); 698 } 699 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 700 { 701 p.addTextRun(GZIP.uncompressToString((byte[]) content)); 702 } 703 else 704 { 705 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 706 if (CollectionUtil.hasValues(subtags)) 707 { 708 for (XMLTag subtag: subtags) 709 { 710 if (containerClosed) 711 { 712 p = new WmlParagraph(inDocx); 713 wmlTags.add(p); 714 containerClosed = false; 715 } 716 717 if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 718 { 719 p.addSubtag(subtag); 720 } 721 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName())) 722 { 723 WmlTextRun textRun = p.addTextRun(); 724 textRun.addSubtag(subtag); 725 } 726 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName())) 727 { 728 p.addSubtags(subtag.getSubtags()); 729 } 730 else 731 { 732 p.addSubtag(subtag); 733 } 734 } 735 736 } 737 } 738 } 739 740 // Make sure all of the text runs use the pre style 741 for (XMLNode xmlTag : p.getXMLNodeSubtags()) 742 { 743 if (xmlTag instanceof WmlTextRun) 744 { 745 ((WmlTextRun)xmlTag).getProperties().setStyle(preStyle.getId()); 746 } 747 } 748 } 749 750 return wmlTags; 751 } 752 753 //--------------------------------------------------------------------------- 754 private List<XMLTag> parseDivTag(Div inDivTag, Docx inDocx) 755 { 756 List<XMLTag> wmlTags = new ArrayList<>(5); 757 758 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inDivTag); 759 if (CollectionUtil.hasValues(cssDeclarations)) 760 { 761 for (CSSDeclaration cssDeclaration : cssDeclarations) 762 { 763 if (cssDeclaration.getProperty() == CSSProperty.display 764 && cssDeclaration.getValue().equals("none")) 765 { 766 return wmlTags; 767 } 768 } 769 } 770 771 WmlParagraph p = new WmlParagraph(inDocx); 772 wmlTags.add(p); 773 774 if (CollectionUtil.hasValues(cssDeclarations)) 775 { 776 convertCSSToTextRunProperties(cssDeclarations, p.getProperties().getRunProperties()); 777 } 778 779 if (inDivTag.hasContentOrSubtags()) 780 { 781 boolean containerClosed = false; 782 for (Object content : inDivTag.getContentPlusSubtagList()) 783 { 784 if (content instanceof String) 785 { 786 if (containerClosed) 787 { 788 p = new WmlParagraph(inDocx); 789 wmlTags.add(p); 790 containerClosed = false; 791 } 792 793 p.addTextRun((String) content); 794 } 795 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 796 { 797 if (containerClosed) 798 { 799 p = new WmlParagraph(inDocx); 800 wmlTags.add(p); 801 containerClosed = false; 802 } 803 804 p.addTextRun(GZIP.uncompressToString((byte[]) content)); 805 } 806 else 807 { 808 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 809 if (CollectionUtil.hasValues(subtags)) 810 { 811 for (XMLTag subtag: subtags) 812 { 813 if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 814 { 815 if (containerClosed) 816 { 817 p = new WmlParagraph(inDocx); 818 wmlTags.add(p); 819 containerClosed = false; 820 } 821 822 p.addSubtag(subtag); 823 } 824 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName())) 825 { 826 if (containerClosed) 827 { 828 p = new WmlParagraph(inDocx); 829 wmlTags.add(p); 830 containerClosed = false; 831 } 832 833 WmlTextRun textRun = p.addTextRun(); 834 textRun.addSubtag(subtag); 835 } 836 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName())) 837 { 838 wmlTags.add(subtag); 839 p = (WmlParagraph) subtag; 840 } 841 else 842 { 843 wmlTags.add(subtag); 844 845 containerClosed = true; 846 } 847 } 848 } 849 } 850 } 851 } 852 853 return wmlTags; 854 } 855 856 857 //--------------------------------------------------------------------------- 858 private List<XMLTag> parseHtmlTag(HTML inHtmlTag, Docx inDocx) 859 { 860 List<XMLTag> wmlTags = new ArrayList<>(5); 861 862 if (inHtmlTag.hasContentOrSubtags()) 863 { 864 WmlParagraph p = new WmlParagraph(inDocx); 865 866 boolean containerClosed = false; 867 for (Object content : inHtmlTag.getContentPlusSubtagList()) 868 { 869 if (content instanceof String) 870 { 871 if (containerClosed) 872 { 873 p = new WmlParagraph(inDocx); 874 wmlTags.add(p); 875 containerClosed = false; 876 } 877 878 p.addTextRun((String) content); 879 } 880 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 881 { 882 if (containerClosed) 883 { 884 p = new WmlParagraph(inDocx); 885 wmlTags.add(p); 886 containerClosed = false; 887 } 888 889 p.addTextRun(GZIP.uncompressToString((byte[]) content)); 890 } 891 else 892 { 893 if (p.hasContentOrSubtags()) 894 { 895 wmlTags.add(p); 896 containerClosed = true; 897 } 898 899 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 900 if (CollectionUtil.hasValues(subtags)) 901 { 902 for (XMLTag subtag: subtags) 903 { 904 if (WmlXML.R.getLocalName().equalsIgnoreCase(subtag.getTagName())) 905 { 906 if (containerClosed) 907 { 908 p = new WmlParagraph(inDocx); 909 wmlTags.add(p); 910 containerClosed = false; 911 } 912 913 p.addSubtag(subtag); 914 } 915 else if (WmlXML.BR.getLocalName().equalsIgnoreCase(subtag.getTagName())) 916 { 917 if (containerClosed) 918 { 919 p = new WmlParagraph(inDocx); 920 wmlTags.add(p); 921 containerClosed = false; 922 } 923 924 WmlTextRun textRun = p.addTextRun(); 925 textRun.addSubtag(subtag); 926 } 927 else if (WmlXML.P.getLocalName().equalsIgnoreCase(subtag.getTagName())) 928 { 929 wmlTags.add(subtag); 930 containerClosed = true; 931 } 932 else 933 { 934 wmlTags.add(subtag); 935 936 containerClosed = true; 937 } 938 } 939 } 940 } 941 } 942 } 943 944 return wmlTags; 945 } 946 947 //--------------------------------------------------------------------------- 948 private List<XMLTag> parseH1Tag(H1 inH1Tag, Docx inDocx) 949 { 950 return parseHeadingTag(inH1Tag, inDocx); 951 } 952 953 //--------------------------------------------------------------------------- 954 private List<XMLTag> parseH2Tag(H2 inH1Tag, Docx inDocx) 955 { 956 return parseHeadingTag(inH1Tag, inDocx); 957 } 958 959 //--------------------------------------------------------------------------- 960 private List<XMLTag> parseH3Tag(H3 inH1Tag, Docx inDocx) 961 { 962 return parseHeadingTag(inH1Tag, inDocx); 963 } 964 965 //--------------------------------------------------------------------------- 966 private List<XMLTag> parseHeadingTag(HTMLTag inHTMLTag, Docx inDocx) 967 { 968 List<XMLTag> wmlTags = new ArrayList<>(5); 969 970 971 WmlParagraph p = new WmlParagraph(inDocx); 972 wmlTags.add(p); 973 974 String styleId = null; 975 if (inHTMLTag instanceof H1) 976 { 977 styleId = WmlParagraphStyle.HEADING1_STYLE_ID; 978 } 979 else if (inHTMLTag instanceof H2) 980 { 981 styleId = WmlParagraphStyle.HEADING2_STYLE_ID; 982 } 983 // TODO: Add support for H3 and H4 984 985 if (styleId != null) 986 { 987 p.getProperties().setStyle(styleId); 988 } 989 990 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inHTMLTag); 991 if (CollectionUtil.hasValues(cssDeclarations)) 992 { 993 convertCSSToTextRunProperties(cssDeclarations, p.getProperties().getRunProperties()); 994 } 995 996 if (inHTMLTag.hasContentOrSubtags()) 997 { 998 boolean containerClosed = false; 999 for (Object content : inHTMLTag.getContentPlusSubtagList()) 1000 { 1001 if (content instanceof String) 1002 { 1003 if (containerClosed) 1004 { 1005 p = new WmlParagraph(inDocx); 1006 wmlTags.add(p); 1007 containerClosed = false; 1008 } 1009 1010 p.addTextRun((String) content); 1011 } 1012 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 1013 { 1014 if (containerClosed) 1015 { 1016 p = new WmlParagraph(inDocx); 1017 wmlTags.add(p); 1018 containerClosed = false; 1019 } 1020 1021 p.addTextRun(GZIP.uncompressToString((byte[]) content)); 1022 } 1023 else 1024 { 1025 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 1026 if (CollectionUtil.hasValues(subtags)) 1027 { 1028 for (XMLTag subtag: subtags) 1029 { 1030 if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 1031 { 1032 if (containerClosed) 1033 { 1034 p = new WmlParagraph(inDocx); 1035 wmlTags.add(p); 1036 containerClosed = false; 1037 } 1038 1039 p.addSubtag(subtag); 1040 } 1041 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName())) 1042 { 1043 if (containerClosed) 1044 { 1045 p = new WmlParagraph(inDocx); 1046 wmlTags.add(p); 1047 containerClosed = false; 1048 } 1049 1050 WmlTextRun textRun = p.addTextRun(); 1051 textRun.addSubtag(subtag); 1052 } 1053 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName())) 1054 { 1055 wmlTags.add(subtag); 1056 p = (WmlParagraph) subtag; 1057 } 1058 else 1059 { 1060 wmlTags.add(subtag); 1061 containerClosed = true; 1062 } 1063 } 1064 } 1065 } 1066 } 1067 } 1068 1069 return wmlTags; 1070 } 1071 1072 //--------------------------------------------------------------------------- 1073 private List<XMLTag> parsePTag(P inPTag, Docx inDocx) 1074 { 1075 List<XMLTag> wmlTags = new ArrayList<>(5); 1076 1077 WmlParagraph p = new WmlParagraph(inDocx); 1078 wmlTags.add(p); 1079 1080 1081 WmlTextRunProperties runProperties = new WmlTextRunProperties(inDocx); 1082 1083 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inPTag); 1084 if (CollectionUtil.hasValues(cssDeclarations)) 1085 { 1086// convertCSSToTextRunProperties(cssDeclarations, p.getProperties().getRunProperties()); 1087 convertCSSToTextRunProperties(cssDeclarations, runProperties); 1088 } 1089 1090 if (inPTag.hasContentOrSubtags()) 1091 { 1092 boolean containerClosed = false; 1093 for (Object content : inPTag.getContentPlusSubtagList()) 1094 { 1095 if (content instanceof String) 1096 { 1097 if (containerClosed) 1098 { 1099 p = new WmlParagraph(inDocx); 1100 wmlTags.add(p); 1101 containerClosed = false; 1102 } 1103 1104 p.addTextRun((String) content).setProperties(runProperties); 1105 } 1106 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 1107 { 1108 if (containerClosed) 1109 { 1110 p = new WmlParagraph(inDocx); 1111 wmlTags.add(p); 1112 containerClosed = false; 1113 } 1114 1115 p.addTextRun(GZIP.uncompressToString((byte[]) content)).setProperties(runProperties); 1116 } 1117 else 1118 { 1119 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 1120 if (CollectionUtil.hasValues(subtags)) 1121 { 1122 for (XMLTag subtag: subtags) 1123 { 1124 if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 1125 { 1126 if (containerClosed) 1127 { 1128 p = new WmlParagraph(inDocx); 1129 wmlTags.add(p); 1130 containerClosed = false; 1131 } 1132 1133 p.addSubtag(subtag); 1134 } 1135 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName())) 1136 { 1137 if (containerClosed) 1138 { 1139 p = new WmlParagraph(inDocx); 1140 wmlTags.add(p); 1141 containerClosed = false; 1142 } 1143 1144 WmlTextRun textRun = p.addTextRun(); 1145 textRun.setProperties(runProperties); 1146 textRun.addSubtag(subtag); 1147 } 1148 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName())) 1149 { 1150 wmlTags.add(subtag); 1151 p = (WmlParagraph) subtag; 1152 } 1153 else 1154 { 1155 wmlTags.add(subtag); 1156 containerClosed = true; 1157 } 1158 } 1159 } 1160 } 1161 } 1162 } 1163 1164 return wmlTags; 1165 } 1166 1167 1168 //--------------------------------------------------------------------------- 1169 private List<XMLTag> parseSpanTag(Span inSpanTag, Docx inDocx) 1170 { 1171 return parseSpanTag(inSpanTag, null, inDocx); 1172 } 1173 1174 //--------------------------------------------------------------------------- 1175 private List<XMLTag> parseSpanTag(HTMLTag inSpanTag, CSSDeclaration inCSS, Docx inDocx) 1176 { 1177 List<XMLTag> wmlTags = new ArrayList<>(5); 1178 1179 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inSpanTag); 1180 if (CollectionUtil.hasValues(cssDeclarations)) 1181 { 1182 for (CSSDeclaration cssDeclaration : cssDeclarations) 1183 { 1184 if (cssDeclaration.getProperty() == CSSProperty.display 1185 && cssDeclaration.getValue().equals("none")) 1186 { 1187 return wmlTags; 1188 } 1189 } 1190 } 1191 1192 if (inCSS != null) 1193 { 1194 if (null == cssDeclarations) 1195 { 1196 cssDeclarations = new ArrayList<>(1); 1197 } 1198 1199 cssDeclarations.add(inCSS); 1200 } 1201 1202 1203 WmlTextRun textRun = new WmlTextRun(inDocx); 1204 wmlTags.add(textRun); 1205 1206 if (CollectionUtil.hasValues(cssDeclarations)) 1207 { 1208 convertCSSToTextRunProperties(cssDeclarations, textRun.getProperties()); 1209 } 1210 1211 if (inSpanTag.hasContentOrSubtags()) 1212 { 1213 boolean containerClosed = false; 1214 for (Object content : inSpanTag.getContentPlusSubtagList()) 1215 { 1216 if (content instanceof String) 1217 { 1218 if (containerClosed) 1219 { 1220 textRun = new WmlTextRun(inDocx); 1221 wmlTags.add(textRun); 1222 1223 if (CollectionUtil.hasValues(cssDeclarations)) 1224 { 1225 convertCSSToTextRunProperties(cssDeclarations, textRun.getProperties()); 1226 } 1227 } 1228 1229 textRun.addText((String)content); 1230 } 1231 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 1232 { 1233 if (containerClosed) 1234 { 1235 textRun = new WmlTextRun(inDocx); 1236 } 1237 1238 textRun.addText(GZIP.uncompressToString((byte[]) content)); 1239 } 1240 else 1241 { 1242 if (! textRun.hasContentOrSubtags()) 1243 { 1244 wmlTags.remove(textRun); 1245 } 1246 1247 containerClosed = true; 1248 1249 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 1250 if (CollectionUtil.hasValues(subtags)) 1251 { 1252 for (XMLTag subtag: subtags) 1253 { 1254 if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 1255 { 1256 // Word won't allow nesting of text runs 1257 containerClosed = true; 1258 WmlTextRun runSubtag = (WmlTextRun) subtag; 1259 if (textRun.hasProperties()) 1260 { 1261 WmlTextRunProperties parentProperties = (WmlTextRunProperties) textRun.getProperties().clone(); 1262 runSubtag.setProperties(parentProperties.combine(runSubtag.getProperties())); 1263 } 1264 wmlTags.add(runSubtag); 1265 } 1266 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName())) 1267 { 1268 if (containerClosed 1269 || textRun.hasContentOrSubtags()) 1270 { 1271 containerClosed = true; 1272 textRun = new WmlTextRun(inDocx); 1273 wmlTags.add(textRun); 1274 } 1275 textRun.addSubtag(subtag); 1276 } 1277 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName())) 1278 { 1279 wmlTags.addAll(subtag.getSubtags()); 1280 } 1281 else 1282 { 1283 wmlTags.add(subtag); 1284 } 1285 } 1286 } 1287 } 1288 } 1289 } 1290 1291 return wmlTags; 1292 } 1293 1294 //--------------------------------------------------------------------------- 1295 // The Link (A) tag itself doesn't map to anything but any data it contains should get parsed. 1296 private List<XMLTag> parseLinkTag(Link inLinkTag, Docx inDocx) 1297 { 1298 List<XMLTag> wmlTags = new ArrayList<>(5); 1299 1300 try 1301 { 1302 if (inLinkTag.hasContentOrSubtags()) 1303 { 1304 WmlHyperlink link = null; 1305 boolean containerClosed = true; 1306 for (Object content : inLinkTag.getContentPlusSubtagList()) 1307 { 1308 if (content instanceof String) 1309 { 1310 if (containerClosed) 1311 { 1312 link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx); 1313 wmlTags.add(link); 1314 } 1315 1316 link.addTextRun((String) content); 1317 } 1318 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 1319 { 1320 if (containerClosed) 1321 { 1322 link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx); 1323 wmlTags.add(link); 1324 } 1325 1326 link.addTextRun(GZIP.uncompressToString((byte[]) content)); 1327 } 1328 else 1329 { 1330 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 1331 if (CollectionUtil.hasValues(subtags)) 1332 { 1333 for (XMLTag subtag: subtags) 1334 { 1335 if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 1336 { 1337 if (containerClosed) 1338 { 1339 link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx); 1340 wmlTags.add(link); 1341 } 1342 1343 link.addSubtag(subtag); 1344 } 1345 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName())) 1346 { 1347 if (containerClosed) 1348 { 1349 link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx); 1350 wmlTags.add(link); 1351 } 1352 1353 for (XMLNode runTag : subtag.getSubtagsByName(WmlXML.R)) 1354 { 1355 link.addSubtag(runTag); 1356 } 1357 } 1358 else 1359 { 1360 if (containerClosed) 1361 { 1362 link = new WmlHyperlink(new URL(inLinkTag.getURL()), inDocx); 1363 wmlTags.add(link); 1364 } 1365 1366 link.addSubtag(subtag); 1367 } 1368 } 1369 } 1370 } 1371 } 1372 } 1373 } 1374 catch (Exception e) 1375 { 1376 throw new RuntimeException(e); 1377 } 1378 1379 return wmlTags; 1380 } 1381 1382 //--------------------------------------------------------------------------- 1383 // The Nobr tag itself doesn't map to anything but any data it contains should get parsed. 1384 private List<XMLTag> parseNobrTag(Nobr inNobrTag, Docx inDocx) 1385 { 1386 List<XMLTag> wmlTags = new ArrayList<>(5); 1387 1388 if (inNobrTag.hasContentOrSubtags()) 1389 { 1390 WmlTextRun textRun = null; 1391 boolean containerClosed = true; 1392 for (Object content : inNobrTag.getContentPlusSubtagList()) 1393 { 1394 if (content instanceof String) 1395 { 1396 if (containerClosed) 1397 { 1398 textRun = new WmlTextRun(inDocx); 1399 wmlTags.add(textRun); 1400 } 1401 1402 textRun.addText((String)content); 1403 } 1404 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 1405 { 1406 if (containerClosed) 1407 { 1408 textRun = new WmlTextRun(inDocx); 1409 } 1410 1411 textRun.addText(GZIP.uncompressToString((byte[]) content)); 1412 } 1413 else 1414 { 1415 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 1416 if (CollectionUtil.hasValues(subtags)) 1417 { 1418 for (XMLTag subtag: subtags) 1419 { 1420 if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 1421 { 1422 // Word won't allow nesting of text runs 1423 containerClosed = true; 1424 WmlTextRun runSubtag = (WmlTextRun) subtag; 1425 if (textRun.hasProperties()) 1426 { 1427 WmlTextRunProperties parentProperties = (WmlTextRunProperties) textRun.getProperties().clone(); 1428 runSubtag.setProperties(parentProperties.combine(runSubtag.getProperties())); 1429 } 1430 wmlTags.add(runSubtag); 1431 } 1432 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName())) 1433 { 1434 if (containerClosed 1435 || textRun.hasContentOrSubtags()) 1436 { 1437 containerClosed = true; 1438 textRun = new WmlTextRun(inDocx); 1439 wmlTags.add(textRun); 1440 } 1441 textRun.addSubtag(subtag); 1442 } 1443 else if (containerClosed) 1444 { 1445 textRun = new WmlTextRun(inDocx); 1446 wmlTags.add(textRun); 1447 textRun.addSubtag(subtag); 1448 } 1449 else 1450 { 1451 textRun.addSubtag(subtag); 1452 } 1453 } 1454 } 1455 } 1456 } 1457 } 1458 1459 return wmlTags; 1460 } 1461 1462 //--------------------------------------------------------------------------- 1463 private List<XMLTag> parseLiTag(Li inLiTag, Docx inDocx) 1464 { 1465 List<XMLTag> wmlTags = new ArrayList<>(5); 1466 1467 WmlParagraph p = new WmlParagraph(inDocx); 1468 wmlTags.add(p); 1469 1470 1471 List<CSSDeclaration> cssDeclarations = getCSSDeclarations(inLiTag); 1472 if (CollectionUtil.hasValues(cssDeclarations)) 1473 { 1474 convertCSSToTextRunProperties(cssDeclarations, p.getProperties().getRunProperties()); 1475 } 1476 1477 if (inLiTag.hasContentOrSubtags()) 1478 { 1479 boolean containerClosed = false; 1480 for (Object content : inLiTag.getContentPlusSubtagList()) 1481 { 1482 if (content instanceof String) 1483 { 1484 if (containerClosed) 1485 { 1486 p = new WmlParagraph(inDocx); 1487 wmlTags.add(p); 1488 containerClosed = false; 1489 } 1490 1491 p.addTextRun((String) content); 1492 } 1493 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 1494 { 1495 if (containerClosed) 1496 { 1497 p = new WmlParagraph(inDocx); 1498 wmlTags.add(p); 1499 containerClosed = false; 1500 } 1501 1502 p.addTextRun(GZIP.uncompressToString((byte[]) content)); 1503 } 1504 else 1505 { 1506 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 1507 if (CollectionUtil.hasValues(subtags)) 1508 { 1509 for (XMLTag subtag: subtags) 1510 { 1511 if (subtag.getTagName().equalsIgnoreCase(WmlXML.R.getLocalName())) 1512 { 1513 if (containerClosed) 1514 { 1515 p = new WmlParagraph(inDocx); 1516 wmlTags.add(p); 1517 containerClosed = false; 1518 } 1519 1520 p.addSubtag(subtag); 1521 } 1522 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.BR.getLocalName())) 1523 { 1524 if (containerClosed) 1525 { 1526 p = new WmlParagraph(inDocx); 1527 wmlTags.add(p); 1528 containerClosed = false; 1529 } 1530 1531 WmlTextRun textRun = p.addTextRun(); 1532 textRun.addSubtag(subtag); 1533 } 1534 else if (subtag.getTagName().equalsIgnoreCase(WmlXML.P.getLocalName())) 1535 { 1536 wmlTags.add(subtag); 1537 p = (WmlParagraph) subtag; 1538 } 1539 else 1540 { 1541 wmlTags.add(subtag); 1542 containerClosed = true; 1543 } 1544 } 1545 } 1546 } 1547 } 1548 } 1549 1550 return wmlTags; 1551 } 1552 1553 //--------------------------------------------------------------------------- 1554 private List<XMLTag> parseUlTag(Ul inUlTag, Docx inDocx) 1555 { 1556 List<XMLTag> wmlTags = new ArrayList<>(5); 1557 1558 1559 WmlUnorderedList list = new WmlUnorderedList(inDocx); 1560 wmlTags.add(list); 1561 1562 if (inUlTag.hasContentOrSubtags()) 1563 { 1564 for (Object content : inUlTag.getContentPlusSubtagList()) 1565 { 1566 if (content instanceof String) 1567 { 1568 WmlParagraph p = new WmlParagraph(inDocx); 1569 wmlTags.add(p); 1570 p.addTextRun((String) content); 1571 } 1572 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 1573 { 1574 WmlParagraph p = new WmlParagraph(inDocx); 1575 wmlTags.add(p); 1576 p.addTextRun(GZIP.uncompressToString((byte[]) content)); 1577 } 1578 else 1579 { 1580 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 1581 if (CollectionUtil.hasValues(subtags)) 1582 { 1583 for (XMLTag subtag: subtags) 1584 { 1585 if (subtag instanceof WmlParagraph) 1586 { 1587 list.addItem((WmlParagraph) subtag); 1588 } 1589 else if (subtag instanceof WmlList) 1590 { 1591 int lastLevel = list.getLastLevel(); 1592 for (WmlParagraph item : ((WmlList) subtag).getItems()) 1593 { 1594 list.addItem(item, lastLevel + 1 + item.getProperties().getNumberingProperties().getLevel()); 1595 } 1596 } 1597 else 1598 { 1599 list.addSubtag(subtag); 1600 } 1601 } 1602 } 1603 } 1604 } 1605 } 1606 1607 return wmlTags; 1608 } 1609 1610 //--------------------------------------------------------------------------- 1611 private List<XMLTag> parseOlTag(Ol inOlTag, Docx inDocx) 1612 { 1613 List<XMLTag> wmlTags = new ArrayList<>(5); 1614 1615 1616 WmlOrderedList list = new WmlOrderedList(inDocx); 1617 wmlTags.add(list); 1618 1619 if (inOlTag.hasContentOrSubtags()) 1620 { 1621 for (Object content : inOlTag.getContentPlusSubtagList()) 1622 { 1623 if (content instanceof String) 1624 { 1625 WmlParagraph p = new WmlParagraph(inDocx); 1626 wmlTags.add(p); 1627 p.addTextRun((String) content); 1628 } 1629 else if (content instanceof byte[]) // Compressed content is stored as a byte[] 1630 { 1631 WmlParagraph p = new WmlParagraph(inDocx); 1632 wmlTags.add(p); 1633 p.addTextRun(GZIP.uncompressToString((byte[]) content)); 1634 } 1635 else 1636 { 1637 List<XMLTag> subtags = convert((HTMLTag) content, inDocx); 1638 if (CollectionUtil.hasValues(subtags)) 1639 { 1640 for (XMLTag subtag: subtags) 1641 { 1642 if (subtag instanceof WmlParagraph) 1643 { 1644 list.addItem((WmlParagraph) subtag); 1645 } 1646 else if (subtag instanceof WmlList) 1647 { 1648 int lastLevel = list.getLastLevel(); 1649 for (WmlParagraph item : ((WmlList) subtag).getItems()) 1650 { 1651 list.addItem(item, lastLevel + 1 + item.getProperties().getNumberingProperties().getLevel()); 1652 } 1653 } 1654 else 1655 { 1656 list.addSubtag(subtag); 1657 } 1658 } 1659 } 1660 } 1661 } 1662 } 1663 1664 return wmlTags; 1665 } 1666 1667 //--------------------------------------------------------------------------- 1668 // Returns the in-line and CSS-defined styling for the specified tag. 1669 private List<CSSDeclaration> getCSSDeclarations(HTMLTag inHTMLTag) 1670 { 1671 List<CSSDeclaration> cssDeclarations = new ArrayList<>(20); 1672 if (mCSS != null) 1673 { 1674 List<CSSDeclaration> declarations = mCSS.getCSSDeclarationsForHTMLTag(inHTMLTag, CSSMediaType.print); 1675 if (CollectionUtil.hasValues(declarations)) 1676 { 1677 cssDeclarations.addAll(declarations); 1678 } 1679 } 1680 1681 String styleString = inHTMLTag.getStyle(); 1682 if (StringUtil.isSet(styleString)) 1683 { 1684 List<CSSDeclaration> styleCSSDeclarations = parseCssStyleString(styleString); 1685 if (CollectionUtil.hasValues(styleCSSDeclarations)) 1686 { 1687 cssDeclarations.addAll(styleCSSDeclarations); 1688 } 1689 } 1690 1691 return cssDeclarations; 1692 } 1693 1694 //--------------------------------------------------------------------------- 1695 private void convertCSSToParagraphProperties(List<CSSDeclaration> inCSSDeclarations, WmlParagraphProperties inParagraphProperties) 1696 { 1697 for (CSSDeclaration cssDeclaration : inCSSDeclarations) 1698 { 1699 if (cssDeclaration.getProperty() == CSSProperty.text_align) 1700 { 1701 WmlJustification justification = null; 1702 if (cssDeclaration.getValue().equalsIgnoreCase("center")) 1703 { 1704 justification = WmlJustification.center; 1705 } 1706 else if (cssDeclaration.getValue().equalsIgnoreCase("left")) 1707 { 1708 justification = WmlJustification.left; 1709 } 1710 else if (cssDeclaration.getValue().equalsIgnoreCase("right")) 1711 { 1712 justification = WmlJustification.right; 1713 } 1714 1715 if (justification != null) 1716 { 1717 inParagraphProperties.setJustification(justification); 1718 } 1719 } 1720 } 1721 } 1722 1723 //--------------------------------------------------------------------------- 1724 private void convertCSSToTextRunProperties(List<CSSDeclaration> inCSSDeclarations, WmlTextRunProperties inTextRunProperties) 1725 { 1726 for (CSSDeclaration cssDeclaration : inCSSDeclarations) 1727 { 1728 if (cssDeclaration.getProperty() == CSSProperty.color) 1729 { 1730 HTMLColor color = HTMLColor.valueOf(cssDeclaration.getValue()); 1731 if (color != null) 1732 { 1733 inTextRunProperties.setColor(color); 1734 } 1735 } 1736 else if (cssDeclaration.getProperty() == CSSProperty.background_color) 1737 { 1738 HTMLColor color = HTMLColor.valueOf(cssDeclaration.getValue()); 1739 if (color != null) 1740 { 1741 inTextRunProperties.getShading().setFill(color); 1742 } 1743 } 1744 else if (cssDeclaration.getProperty() == CSSProperty.font_weight 1745 && cssDeclaration.getValue().equalsIgnoreCase("bold")) 1746 { 1747 inTextRunProperties.setBold(); 1748 } 1749 else if (cssDeclaration.getProperty() == CSSProperty.font_style 1750 && cssDeclaration.getValue().equalsIgnoreCase("italic")) 1751 { 1752 inTextRunProperties.setItalics(); 1753 } 1754 else if (cssDeclaration.getProperty() == CSSProperty.font_size) 1755 { 1756 if (cssDeclaration.getValue().endsWith("pt")) 1757 { 1758 Matcher m = PTS_SIZE_PATTERN.matcher(cssDeclaration.getValue()); 1759 if (m.matches()) 1760 { 1761 inTextRunProperties.setSize(new Points(Integer.parseInt(m.group(1)))); 1762 } 1763 } 1764 else if (cssDeclaration.getValue().endsWith("px")) 1765 { 1766 Matcher m = PX_SIZE_PATTERN.matcher(cssDeclaration.getValue()); 1767 if (m.matches()) 1768 { 1769 inTextRunProperties.setSize(new Pixels(Integer.parseInt(m.group(1)))); 1770 } 1771 } 1772 // Don't know what to do with 'em' sizes 1773 } 1774 else if (cssDeclaration.getProperty() == CSSProperty.border) 1775 { 1776 inTextRunProperties.getBorder(cssDeclaration); 1777 } 1778 else if (cssDeclaration.getProperty() == CSSProperty.text_decoration 1779 && cssDeclaration.getValue().equalsIgnoreCase("underline")) 1780 { 1781 inTextRunProperties.setUnderline(); 1782 } 1783 else if (cssDeclaration.getProperty() == CSSProperty.text_decoration 1784 && cssDeclaration.getValue().equalsIgnoreCase("line-through")) 1785 { 1786 inTextRunProperties.setStrikeThrough(); 1787 } 1788 } 1789 } 1790 1791 //--------------------------------------------------------------------------- 1792 private void convertCSSToTableProperties(List<CSSDeclaration> inCSSDeclarations, WmlTableProperties inTableProperties) 1793 { 1794 for (CSSDeclaration cssDeclaration : inCSSDeclarations) 1795 { 1796 if (cssDeclaration.getProperty() == CSSProperty.margin) 1797 { 1798 inTableProperties.getTableCellMargins().addMargins(cssDeclaration); 1799 } 1800 else if (cssDeclaration.getProperty() == CSSProperty.width 1801 && cssDeclaration.getValue().endsWith("px")) 1802 { 1803 // Don't know what to do with 'em' sizes 1804 Matcher m = PX_SIZE_PATTERN.matcher(cssDeclaration.getValue()); 1805 if (m.matches()) 1806 { 1807 inTableProperties.setWidth(new Pixels(Integer.parseInt(m.group(1)))); 1808 } 1809 } 1810 } 1811 } 1812 1813 1814 //--------------------------------------------------------------------------- 1815 private void convertCSSToTableCellProperties(List<CSSDeclaration> inCSSDeclarations, WmlTableCellProperties inTableCellProperties) 1816 { 1817 for (CSSDeclaration cssDeclaration : inCSSDeclarations) 1818 { 1819 if (cssDeclaration.getProperty() == CSSProperty.background_color) 1820 { 1821 HTMLColor color = HTMLColor.valueOf(cssDeclaration.getValue()); 1822 if (color != null) 1823 { 1824 inTableCellProperties.getShading().setFill(color); 1825 } 1826 } 1827 else if (cssDeclaration.getProperty() == CSSProperty.vertical_align) 1828 { 1829 WmlVerticalJustification justification = null; 1830 if (cssDeclaration.getValue().equalsIgnoreCase("middle")) 1831 { 1832 justification = WmlVerticalJustification.center; 1833 } 1834 else if (cssDeclaration.getValue().equalsIgnoreCase("top")) 1835 { 1836 justification = WmlVerticalJustification.top; 1837 } 1838 else if (cssDeclaration.getValue().equalsIgnoreCase("bottom")) 1839 { 1840 justification = WmlVerticalJustification.bottom; 1841 } 1842 1843 if (justification != null) 1844 { 1845 inTableCellProperties.setVerticalJustification(justification); 1846 } 1847 } 1848 else if ((cssDeclaration.getProperty() == CSSProperty.border 1849 || cssDeclaration.getProperty() == CSSProperty.border_top 1850 || cssDeclaration.getProperty() == CSSProperty.border_bottom 1851 || cssDeclaration.getProperty() == CSSProperty.border_left 1852 || cssDeclaration.getProperty() == CSSProperty.border_right) 1853 && ! cssDeclaration.getValue().equalsIgnoreCase("none")) 1854 { 1855 inTableCellProperties.getBorders().addBorder(new WmlTableCellBorder(cssDeclaration)); 1856 } 1857 else if (cssDeclaration.getProperty() == CSSProperty.width 1858 && cssDeclaration.getValue().endsWith("px")) 1859 { 1860 // Don't know what to do with 'em' sizes 1861 Matcher m = PX_SIZE_PATTERN.matcher(cssDeclaration.getValue()); 1862 if (m.matches()) 1863 { 1864 inTableCellProperties.setWidth(new Pixels(Integer.parseInt(m.group(1)))); 1865 } 1866 } 1867 else if (cssDeclaration.getProperty() == CSSProperty.transform) 1868 { 1869 if (cssDeclaration.getValue().equals("rotate(-90deg)")) 1870 { 1871 inTableCellProperties.setTextDirection(WmlTextDirection.btLr); 1872 } 1873 } 1874 } 1875 } 1876 1877 //--------------------------------------------------------------------------- 1878 private void convertCSSToTableRowProperties(List<CSSDeclaration> inCSSDeclarations, WmlTableRowProperties inTableRowProperties) 1879 { 1880 for (CSSDeclaration cssDeclaration : inCSSDeclarations) 1881 { 1882 if (cssDeclaration.getProperty() == CSSProperty.height 1883 && cssDeclaration.getValue().endsWith("px")) 1884 { 1885 // Don't know what to do with 'em' sizes 1886 Matcher m = PX_SIZE_PATTERN.matcher(cssDeclaration.getValue()); 1887 if (m.matches()) 1888 { 1889 inTableRowProperties.setExactHeight(new Pixels(Integer.parseInt(m.group(1)))); 1890 } 1891 } 1892 } 1893 } 1894 1895 //--------------------------------------------------------------------------- 1896 private List<CSSDeclaration> parseCssStyleString(String inCssStyleString) 1897 { 1898 List<CSSDeclaration> cssDeclarations = new ArrayList<>(20); 1899 1900 String[] pieces = inCssStyleString.split(";"); 1901 for (String piece : pieces) 1902 { 1903 int colonIdx = piece.indexOf(":"); 1904 CSSProperty property = CSSProperty.valueOf(piece.substring(0, colonIdx).trim()); 1905 if (property != null) 1906 { 1907 String value = piece.substring(colonIdx + 1).trim(); 1908 cssDeclarations.add(new CSSDeclaration(property, value)); 1909 } 1910 } 1911 1912 return cssDeclarations; 1913 } 1914 1915}