1 package org.apache.maven.archetype.common.util; 2 3 /* 4 * Copyright (C) 2000-2004 Jason Hunter & Brett McLaughlin. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions, and the following disclaimer. 13 * 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions, and the disclaimer that follows 16 * these conditions in the documentation and/or other materials 17 * provided with the distribution. 18 * 19 * 3. The name "JDOM" must not be used to endorse or promote products 20 * derived from this software without prior written permission. For 21 * written permission, please contact <request_AT_jdom_DOT_org>. 22 * 23 * 4. Products derived from this software may not be called "JDOM", nor 24 * may "JDOM" appear in their name, without prior written permission 25 * from the JDOM Project Management <request_AT_jdom_DOT_org>. 26 * 27 * In addition, we request (but do not require) that you include in the 28 * end-user documentation provided with the redistribution and/or in the 29 * software itself an acknowledgement equivalent to the following: 30 * "This product includes software developed by the 31 * JDOM Project (http://www.jdom.org/)." 32 * Alternatively, the acknowledgment may be graphical using the logos 33 * available at http://www.jdom.org/images/logos. 34 * 35 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED 36 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 37 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 38 * DISCLAIMED. IN NO EVENT SHALL THE JDOM AUTHORS OR THE PROJECT 39 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 40 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 41 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF 42 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND 43 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 44 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 45 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 46 * SUCH DAMAGE. 47 * 48 * This software consists of voluntary contributions made by many 49 * individuals on behalf of the JDOM Project and was originally 50 * created by Jason Hunter <jhunter_AT_jdom_DOT_org> and 51 * Brett McLaughlin <brett_AT_jdom_DOT_org>. For more information 52 * on the JDOM Project, please see <http://www.jdom.org/>. 53 */ 54 55 import org.jdom.output.EscapeStrategy; 56 57 import java.lang.reflect.Method; 58 59 /** 60 * Class to encapsulate XMLOutputter format options. 61 * Typical users can use the standard format configurations obtained by 62 * {@link #getRawFormat} (no whitespace changes), 63 * {@link #getPrettyFormat} (whitespace beautification), and 64 * {@link #getCompactFormat} (whitespace normalization). 65 * <p/> 66 * Several modes are available to effect the way textual content is printed. 67 * See the documentation for {@link TextMode} for details. 68 * 69 * @author Jason Hunter 70 * @version $Revision: 1006414 $, $Date: 2017-02-08 16:24:44 +0000 (Wed, 08 Feb 2017) $ 71 */ 72 public class Format 73 implements Cloneable 74 { 75 76 /** 77 * Returns a new Format object that performs no whitespace changes, uses 78 * the UTF-8 encoding, doesn't expand empty elements, includes the 79 * declaration and encoding, and uses the default entity escape strategy. 80 * Tweaks can be made to the returned Format instance without affecting 81 * other instances. 82 * 83 * @return a Format with no whitespace changes 84 */ 85 public static Format getRawFormat() 86 { 87 return new Format(); 88 } 89 90 /** 91 * Returns a new Format object that performs whitespace beautification with 92 * 2-space indents, uses the UTF-8 encoding, doesn't expand empty elements, 93 * includes the declaration and encoding, and uses the default entity 94 * escape strategy. 95 * Tweaks can be made to the returned Format instance without affecting 96 * other instances. 97 * 98 * @return a Format with whitespace beautification 99 */ 100 public static Format getPrettyFormat() 101 { 102 Format f = new Format(); 103 f.setIndent( STANDARD_INDENT ); 104 f.setTextMode( TextMode.TRIM ); 105 return f; 106 } 107 108 /** 109 * Returns a new Format object that performs whitespace normalization, uses 110 * the UTF-8 encoding, doesn't expand empty elements, includes the 111 * declaration and encoding, and uses the default entity escape strategy. 112 * Tweaks can be made to the returned Format instance without affecting 113 * other instances. 114 * 115 * @return a Format with whitespace normalization 116 */ 117 public static Format getCompactFormat() 118 { 119 Format f = new Format(); 120 f.setTextMode( TextMode.NORMALIZE ); 121 return f; 122 } 123 124 /** standard value to indent by, if we are indenting */ 125 private static final String STANDARD_INDENT = " "; 126 127 /** standard string with which to end a line */ 128 private static final String STANDARD_LINE_SEPARATOR = "\r\n"; 129 130 /** standard encoding */ 131 private static final String STANDARD_ENCODING = "UTF-8"; 132 133 134 /** The default indent is no spaces (as original document) */ 135 String indent = null; 136 137 /** New line separator */ 138 String lineSeparator = STANDARD_LINE_SEPARATOR; 139 140 /** The encoding format */ 141 String encoding = STANDARD_ENCODING; 142 143 /** 144 * Whether or not to output the XML declaration 145 * - default is <code>false</code> 146 */ 147 boolean omitDeclaration = false; 148 149 /** 150 * Whether or not to output the encoding in the XML declaration 151 * - default is <code>false</code> 152 */ 153 boolean omitEncoding = false; 154 155 /** 156 * Whether or not to expand empty elements to 157 * <tagName></tagName> - default is <code>false</code> 158 */ 159 boolean expandEmptyElements = false; 160 161 /** 162 * Whether TrAX output escaping disabling/enabling PIs are ignored 163 * or processed - default is <code>false</code> 164 */ 165 boolean ignoreTrAXEscapingPIs = false; 166 167 /** text handling mode */ 168 TextMode mode = TextMode.PRESERVE; 169 170 /** entity escape logic */ 171 EscapeStrategy escapeStrategy = new DefaultEscapeStrategy( encoding ); 172 173 /** Creates a new Format instance with default (raw) behavior. */ 174 private Format() 175 { 176 } 177 178 /** 179 * Sets the {@link EscapeStrategy} to use for character escaping. 180 * 181 * @param strategy the EscapeStrategy to use 182 * @return a pointer to this Format for chaining 183 */ 184 public Format setEscapeStrategy( EscapeStrategy strategy ) 185 { 186 escapeStrategy = strategy; 187 return this; 188 } 189 190 /** 191 * Returns the current escape strategy 192 * 193 * @return the current escape strategy 194 */ 195 public EscapeStrategy getEscapeStrategy() 196 { 197 return escapeStrategy; 198 } 199 200 /** 201 * This will set the newline separator (<code>lineSeparator</code>). 202 * The default is <code>\r\n</code>. Note that if the "newlines" 203 * property is false, this value is irrelevant. To make it output 204 * the system default line ending string, call 205 * <code>setLineSeparator(System.getProperty("line.separator"))</code> 206 * <p/> 207 * <p/> 208 * To output "UNIX-style" documents, call 209 * <code>setLineSeparator("\n")</code>. To output "Mac-style" 210 * documents, call <code>setLineSeparator("\r")</code>. DOS-style 211 * documents use CR-LF ("\r\n"), which is the default. 212 * </p> 213 * <p/> 214 * <p/> 215 * Note that this only applies to newlines generated by the 216 * outputter. If you parse an XML document that contains newlines 217 * embedded inside a text node, and you do not set TextMode.NORMALIZE, 218 * then the newlines will be output 219 * verbatim, as "\n" which is how parsers normalize them. 220 * </p> 221 * 222 * @param separator <code>String</code> line separator to use. 223 * @return a pointer to this Format for chaining 224 * @see #setTextMode 225 */ 226 public Format setLineSeparator( String separator ) 227 { 228 this.lineSeparator = separator; 229 return this; 230 } 231 232 /** 233 * Returns the current line separator. 234 * 235 * @return the current line separator 236 */ 237 public String getLineSeparator() 238 { 239 return lineSeparator; 240 } 241 242 /** 243 * This will set whether the XML declaration 244 * (<code><?xml version="1.0" 245 * encoding="UTF-8"?></code>) 246 * includes the encoding of the document. It is common to omit 247 * this in uses such as WML and other wireless device protocols. 248 * 249 * @param omitEncoding <code>boolean</code> indicating whether or not 250 * the XML declaration should indicate the document encoding. 251 * @return a pointer to this Format for chaining 252 */ 253 public Format setOmitEncoding( boolean omitEncoding ) 254 { 255 this.omitEncoding = omitEncoding; 256 return this; 257 } 258 259 /** 260 * Returns whether the XML declaration encoding will be omitted. 261 * 262 * @return whether the XML declaration encoding will be omitted 263 */ 264 public boolean getOmitEncoding() 265 { 266 return omitEncoding; 267 } 268 269 /** 270 * This will set whether the XML declaration 271 * (<code><?xml version="1.0"?gt;</code>) 272 * will be omitted or not. It is common to omit this in uses such 273 * as SOAP and XML-RPC calls. 274 * 275 * @param omitDeclaration <code>boolean</code> indicating whether or not 276 * the XML declaration should be omitted. 277 * @return a pointer to this Format for chaining 278 */ 279 public Format setOmitDeclaration( boolean omitDeclaration ) 280 { 281 this.omitDeclaration = omitDeclaration; 282 return this; 283 } 284 285 /** 286 * Returns whether the XML declaration will be omitted. 287 * 288 * @return whether the XML declaration will be omitted 289 */ 290 public boolean getOmitDeclaration() 291 { 292 return omitDeclaration; 293 } 294 295 /** 296 * This will set whether empty elements are expanded from 297 * <code><tagName/></code> to 298 * <code><tagName></tagName></code>. 299 * 300 * @param expandEmptyElements <code>boolean</code> indicating whether or not 301 * empty elements should be expanded. 302 * @return a pointer to this Format for chaining 303 */ 304 public Format setExpandEmptyElements( boolean expandEmptyElements ) 305 { 306 this.expandEmptyElements = expandEmptyElements; 307 return this; 308 } 309 310 /** 311 * Returns whether empty elements are expanded. 312 * 313 * @return whether empty elements are expanded 314 */ 315 public boolean getExpandEmptyElements() 316 { 317 return expandEmptyElements; 318 } 319 320 /** 321 * This will set whether JAXP TrAX processing instructions for 322 * disabling/enabling output escaping are ignored. Disabling 323 * output escaping allows using XML text as element content and 324 * outputing it verbatim, i.e. as element children would be. 325 * <p/> 326 * When processed, these processing instructions are removed from 327 * the generated XML text and control whether the element text 328 * content is output verbatim or with escaping of the pre-defined 329 * entities in XML 1.0. The text to be output verbatim shall be 330 * surrounded by the 331 * <code><?javax.xml.transform.disable-output-escaping ?></code> 332 * and <code><?javax.xml.transform.enable-output-escaping ?></code> 333 * PIs.</p> 334 * <p/> 335 * When ignored, the processing instructions are present in the 336 * generated XML text and the pre-defined entities in XML 1.0 are 337 * escaped. 338 * <p/> 339 * Default: <code>false</code>.</p> 340 * 341 * @param ignoreTrAXEscapingPIs <code>boolean</code> indicating 342 * whether or not TrAX ouput escaping PIs are ignored. 343 * @see javax.xml.transform.Result#PI_ENABLE_OUTPUT_ESCAPING 344 * @see javax.xml.transform.Result#PI_DISABLE_OUTPUT_ESCAPING 345 */ 346 public void setIgnoreTrAXEscapingPIs( boolean ignoreTrAXEscapingPIs ) 347 { 348 this.ignoreTrAXEscapingPIs = ignoreTrAXEscapingPIs; 349 } 350 351 /** 352 * Returns whether JAXP TrAX processing instructions for 353 * disabling/enabling output escaping are ignored. 354 * 355 * @return whether or not TrAX ouput escaping PIs are ignored. 356 */ 357 public boolean getIgnoreTrAXEscapingPIs() 358 { 359 return ignoreTrAXEscapingPIs; 360 } 361 362 /** 363 * This sets the text output style. Options are available as static 364 * {@link TextMode} instances. The default is {@link TextMode#PRESERVE}. 365 * 366 * @return a pointer to this Format for chaining 367 */ 368 public Format setTextMode( Format.TextMode mode ) 369 { 370 this.mode = mode; 371 return this; 372 } 373 374 /** 375 * Returns the current text output style. 376 * 377 * @return the current text output style 378 */ 379 public Format.TextMode getTextMode() 380 { 381 return mode; 382 } 383 384 /** 385 * This will set the indent <code>String</code> to use; this 386 * is usually a <code>String</code> of empty spaces. If you pass 387 * null, or the empty string (""), then no indentation will 388 * happen. Default: none (null) 389 * 390 * @param indent <code>String</code> to use for indentation. 391 * @return a pointer to this Format for chaining 392 */ 393 public Format setIndent( String indent ) 394 { 395 // if passed the empty string, change it to null, for marginal 396 // performance gains later (can compare to null first instead 397 // of calling equals()) 398 if ( "".equals( indent ) ) 399 { 400 indent = null; 401 } 402 this.indent = indent; 403 return this; 404 } 405 406 /** 407 * Returns the indent string in use. 408 * 409 * @return the indent string in use 410 */ 411 public String getIndent() 412 { 413 return indent; 414 } 415 416 /** 417 * Sets the output encoding. The name should be an accepted XML 418 * encoding. 419 * 420 * @param encoding the encoding format. Use XML-style names like 421 * "UTF-8" or "ISO-8859-1" or "US-ASCII" 422 * @return a pointer to this Format for chaining 423 */ 424 public Format setEncoding( String encoding ) 425 { 426 this.encoding = encoding; 427 escapeStrategy = new DefaultEscapeStrategy( encoding ); 428 return this; 429 } 430 431 /** 432 * Returns the configured output encoding. 433 * 434 * @return the output encoding 435 */ 436 public String getEncoding() 437 { 438 return encoding; 439 } 440 441 protected Object clone() 442 { 443 Format format = null; 444 445 try 446 { 447 format = (Format) super.clone(); 448 } 449 catch ( CloneNotSupportedException ce ) 450 { 451 } 452 453 return format; 454 } 455 456 457 /** 458 * Handle common charsets quickly and easily. Use reflection 459 * to query the JDK 1.4 CharsetEncoder class for unknown charsets. 460 * If JDK 1.4 isn't around, default to no special encoding. 461 */ 462 class DefaultEscapeStrategy 463 implements EscapeStrategy 464 { 465 private int bits; 466 Object encoder; 467 Method canEncode; 468 469 public DefaultEscapeStrategy( String encoding ) 470 { 471 if ( "UTF-8".equalsIgnoreCase( encoding ) 472 || "UTF-16".equalsIgnoreCase( encoding ) ) 473 { 474 bits = 16; 475 } 476 else if ( "ISO-8859-1".equalsIgnoreCase( encoding ) 477 || "Latin1".equalsIgnoreCase( encoding ) ) 478 { 479 bits = 8; 480 } 481 else if ( "US-ASCII".equalsIgnoreCase( encoding ) 482 || "ASCII".equalsIgnoreCase( encoding ) ) 483 { 484 bits = 7; 485 } 486 else 487 { 488 bits = 0; 489 //encoder = Charset.forName(encoding).newEncoder(); 490 try 491 { 492 Class<?> charsetClass = Class.forName( "java.nio.charset.Charset" ); 493 Class<?> encoderClass = Class.forName( "java.nio.charset.CharsetEncoder" ); 494 Method forName = charsetClass.getMethod( "forName", new Class[] { String.class } ); 495 Object charsetObj = forName.invoke( null, new Object[] { encoding } ); 496 Method newEncoder = charsetClass.getMethod( "newEncoder" ); 497 encoder = newEncoder.invoke( charsetObj ); 498 canEncode = encoderClass.getMethod( "canEncode", new Class[] { char.class } ); 499 } 500 catch ( Exception ignored ) 501 { 502 } 503 } 504 } 505 506 public boolean shouldEscape( char ch ) 507 { 508 if ( bits == 16 ) 509 { 510 return false; 511 } 512 if ( bits == 8 ) 513 { 514 return ( (int) ch > 255 ); 515 } 516 if ( bits == 7 ) 517 { 518 return ( (int) ch > 127 ); 519 } 520 else 521 { 522 if ( canEncode != null && encoder != null ) 523 { 524 try 525 { 526 Boolean val = (Boolean) canEncode.invoke( encoder, new Object[] { Character.valueOf( ch ) } ); 527 return !val.booleanValue(); 528 } 529 catch ( Exception ignored ) 530 { 531 } 532 } 533 // Return false if we don't know. This risks not escaping 534 // things which should be escaped, but also means people won't 535 // start getting loads of unnecessary escapes. 536 return false; 537 } 538 } 539 } 540 541 542 /** 543 * Class to signify how text should be handled on output. The following 544 * table provides details. 545 * <p/> 546 * <table> 547 * <tr> 548 * <th align="left"> 549 * Text Mode 550 * </th> 551 * <th> 552 * Resulting behavior. 553 * </th> 554 * </tr> 555 * <p/> 556 * <tr valign="top"> 557 * <td> 558 * <i>PRESERVE (Default)</i> 559 * </td> 560 * <td> 561 * All content is printed in the format it was created, no whitespace 562 * or line separators are are added or removed. 563 * </td> 564 * </tr> 565 * <p/> 566 * <tr valign="top"> 567 * <td> 568 * TRIM_FULL_WHITE 569 * </td> 570 * <td> 571 * Content between tags consisting of all whitespace is not printed. 572 * If the content contains even one non-whitespace character, it is 573 * printed verbatim, whitespace and all. 574 * </td> 575 * </tr> 576 * <p/> 577 * <tr valign="top"> 578 * <td> 579 * TRIM 580 * </td> 581 * <td> 582 * Same as TrimAllWhite, plus leading/trailing whitespace are 583 * trimmed. 584 * </td> 585 * </tr> 586 * <p/> 587 * <tr valign="top"> 588 * <td> 589 * NORMALIZE 590 * </td> 591 * <td> 592 * Same as TextTrim, plus addition interior whitespace is compressed 593 * to a single space. 594 * </td> 595 * </tr> 596 * </table> 597 * <p/> 598 * In most cases textual content is aligned with the surrounding tags 599 * (after the appropriate text mode is applied). In the case where the only 600 * content between the start and end tags is textual, the start tag, text, 601 * and end tag are all printed on the same line. If the document being 602 * output already has whitespace, it's wise to turn on TRIM mode so the 603 * pre-existing whitespace can be trimmed before adding new whitespace. 604 * <p/> 605 * When a element has a xml:space attribute with the value of "preserve", 606 * all formating is turned off and reverts back to the default until the 607 * element and its contents have been printed. If a nested element contains 608 * another xml:space with the value "default" formatting is turned back on 609 * for the child element and then off for the remainder of the parent 610 * element. 611 */ 612 public static class TextMode 613 { 614 /** Mode for literal text preservation. */ 615 public static final TextMode PRESERVE = new TextMode( "PRESERVE" ); 616 617 /** Mode for text trimming (left and right trim). */ 618 public static final TextMode TRIM = new TextMode( "TRIM" ); 619 620 /** 621 * Mode for text normalization (left and right trim plus internal 622 * whitespace is normalized to a single space. 623 * 624 * @see org.jdom.Element#getTextNormalize 625 */ 626 public static final TextMode NORMALIZE = new TextMode( "NORMALIZE" ); 627 628 /** 629 * Mode for text trimming of content consisting of nothing but 630 * whitespace but otherwise not changing output. 631 */ 632 public static final TextMode TRIM_FULL_WHITE = 633 new TextMode( "TRIM_FULL_WHITE" ); 634 635 private final String name; 636 637 private TextMode( String name ) 638 { 639 this.name = name; 640 } 641 642 public String toString() 643 { 644 return name; 645 } 646 } 647 } 648