////////////////////////////////////////////////////////////////////////////////
//
// Licensed to the Apache Software Foundation (ASF) under one or more
// contributor license agreements. See the NOTICE file distributed with
// this work for additional information regarding copyright ownership.
// The ASF licenses this file to You under the Apache License, Version 2.0
// (the "License"); you may not use this file except in compliance with
// the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
////////////////////////////////////////////////////////////////////////////////
package flashx.textLayout.conversion
{
import flash.text.engine.Kerning;
import flash.utils.Dictionary;
import flashx.textLayout.debug.assert;
import flashx.textLayout.elements.BreakElement;
import flashx.textLayout.elements.FlowGroupElement;
import flashx.textLayout.elements.FlowLeafElement;
import flashx.textLayout.elements.GlobalSettings;
import flashx.textLayout.elements.IConfiguration;
import flashx.textLayout.elements.InlineGraphicElement;
import flashx.textLayout.elements.LinkElement;
import flashx.textLayout.elements.ParagraphElement;
import flashx.textLayout.elements.SpanElement;
import flashx.textLayout.elements.TabElement;
import flashx.textLayout.elements.TextFlow;
import flashx.textLayout.formats.ITextLayoutFormat;
import flashx.textLayout.formats.LeadingModel;
import flashx.textLayout.formats.TextLayoutFormat;
import flashx.textLayout.formats.TextLayoutFormatValueHolder;
import flashx.textLayout.property.Property;
import flashx.textLayout.property.StringProperty;
import flashx.textLayout.tlf_internal;
use namespace tlf_internal;
[ExcludeClass]
/**
* @private
* HtmlImporter converts from HTML to TextLayout data structures
*/
internal class HtmlImporter extends BaseTextLayoutImporter
{
// TLF formats to which attributes map directly
static internal var _fontDescription:Object = {
color:TextLayoutFormat.colorProperty,
trackingRight:TextLayoutFormat.trackingRightProperty,
fontFamily:TextLayoutFormat.fontFamilyProperty
};
// attributes that require custom logic for mapping to TLF formats
static internal const _fontMiscDescription:Object = {
size : new StringProperty("size", null, false, null),
kerning : new StringProperty("kerning", null, false, null)
};
// TLF formats to which attributes map directly
static internal var _textFormatDescription:Object = {
paragraphStartIndent:TextLayoutFormat.paragraphStartIndentProperty,
paragraphEndIndent:TextLayoutFormat.paragraphEndIndentProperty,
textIndent:TextLayoutFormat.textIndentProperty,
lineHeight:TextLayoutFormat.lineHeightProperty,
tabStops:TextLayoutFormat.tabStopsProperty
};
// attributes that require custom logic for mapping to TLF formats
static internal const _textFormatMiscDescription:Object = {
blockIndent : new StringProperty("blockIndent", null, false, null)
};
static internal var _paragraphFormatDescription:Object = {
textAlign:TextLayoutFormat.textAlignProperty
};
static internal const _linkHrefDescription:Object = {
href : new StringProperty("href", null, false, null)
};
static internal const _linkTargetDescription:Object = {
target : new StringProperty("target", null, false, null)
};
static internal const _imageDescription:Object = {
height : InlineGraphicElement.heightPropertyDefinition,
width : InlineGraphicElement.widthPropertyDefinition};
// Separate description because id value is case-sensitive unlike others
static internal const _imageMiscDescription:Object = {
src : new StringProperty("src", null, false, null),
id : new StringProperty("id", null, false, null)};
static internal const _classDescription:Object =
{
// A property named 'class' confuses the compiler.
// class : new StringProperty("class", null, false, null)
// So, we initialize _classDescription in the constructor
};
// For some reason, the following can't be initialized here
static private var _fontImporter:FontImporter;
static private var _fontMiscImporter:CaseInsensitiveTLFFormatImporter;
static private var _textFormatImporter:TextFormatImporter;
static private var _textFormatMiscImporter:CaseInsensitiveTLFFormatImporter;
static private var _paragraphFormatImporter:HtmlCustomParaFormatImporter;
static private var _linkHrefImporter:CaseInsensitiveTLFFormatImporter;
static private var _linkTargetImporter:CaseInsensitiveTLFFormatImporter;
static private var _ilgFormatImporter:CaseInsensitiveTLFFormatImporter;
static private var _ilgMiscFormatImporter:CaseInsensitiveTLFFormatImporter;
static private var _classImporter:CaseInsensitiveTLFFormatImporter;
// Formats specified by formatting elements in the ancestry of the element being parsed currently
static private var _activeFormat:TextLayoutFormatValueHolder = new TextLayoutFormatValueHolder(); // to be applied to all flow elements
static private var _activeParaFormat:TextLayoutFormatValueHolder = new TextLayoutFormatValueHolder(); // to be applied to paras only
static private var _activeImpliedParaFormat:TextLayoutFormatValueHolder = null;
// The basis for relative font size calculation
static private var _baseFontSize:Number;
/** Constructor */
public function HtmlImporter(textFlowConfiguration:IConfiguration)
{
super(textFlowConfiguration, null, createConfig());
}
private static function createConfig():ImportExportConfiguration
{
var config:ImportExportConfiguration = new ImportExportConfiguration();
// inherited
config.addIEInfo("br", BreakElement, BaseTextLayoutImporter.parseBreak, null, false);
config.addIEInfo("p", ParagraphElement, HtmlImporter.parsePara, null, true);
config.addIEInfo("span", SpanElement, HtmlImporter.parseSpan, null, false);
config.addIEInfo("a", LinkElement, HtmlImporter.parseLink, null, false);
config.addIEInfo("img", InlineGraphicElement, HtmlImporter.parseInlineGraphic, null, false);
// formatting elements
config.addIEInfo("font", null, HtmlImporter.parseFont, null, false);
config.addIEInfo("textformat", null, HtmlImporter.parseTextFormat, null, false);
config.addIEInfo("u", null, HtmlImporter.parseUnderline, null, false);
config.addIEInfo("i", null, HtmlImporter.parseItalic, null, false);
config.addIEInfo("b", null, HtmlImporter.parseBold, null, false);
// create these here - can't be done above
if (_classDescription["class"] === undefined)
{
_classDescription["class"] = new StringProperty("class", null, false, null);
_paragraphFormatImporter = new HtmlCustomParaFormatImporter(TextLayoutFormat, _paragraphFormatDescription);
_textFormatImporter = new TextFormatImporter(TextLayoutFormat, _textFormatDescription);
_fontImporter = new FontImporter(TextLayoutFormat, _fontDescription);
_fontMiscImporter = new CaseInsensitiveTLFFormatImporter(Dictionary, _fontMiscDescription);
_textFormatMiscImporter = new CaseInsensitiveTLFFormatImporter(Dictionary, _textFormatMiscDescription);
_linkHrefImporter = new CaseInsensitiveTLFFormatImporter(Dictionary,_linkHrefDescription,false);
_linkTargetImporter = new CaseInsensitiveTLFFormatImporter(Dictionary,_linkTargetDescription);
_ilgFormatImporter = new CaseInsensitiveTLFFormatImporter(Dictionary,_imageDescription);
_ilgMiscFormatImporter = new CaseInsensitiveTLFFormatImporter(Dictionary,_imageMiscDescription, false);
_classImporter = new CaseInsensitiveTLFFormatImporter(Dictionary,_classDescription);
}
return config;
}
/** Parse and convert input data
*
* @param source - the HTML string
*/
protected override function importFromString(source:String):TextFlow
{
// Use toXML rather than the XML constructor because the latter expects
// well-formed XML, which source may not be
var xml:XML = toXML(source);
return xml ? importFromXML(xml) : null;
}
/** Parse and convert input XML data
*/
protected override function importFromXML(xmlSource:XML):TextFlow
{
var textFlow:TextFlow = new TextFlow(_textFlowConfiguration);
// Use font size specified in _textFlowConfiguration.textFlowInitialFormat as the base font size
// If not specified, use 12
_baseFontSize = textFlow.fontSize === undefined ? 12 : textFlow.fontSize;
// Unlike other markup formats, the HTML format for TLF does not have a fixed root XML element.
// and
are optional, and flow elements may or may not be encapsulated in formatting
// elements like or . Use parseObject to handle any (expected) root element.
parseObject(xmlSource.name().localName, xmlSource, textFlow);
// If the last para is implied, there is nothing following it that'll trigger a reset.
// For most importers, this is fine (clear will eventually reset it), but the HTML importer has
// some special behavior associated with the reset (replacing BreakElements with para splits).
// Explicitly do so now (must happen before normalization)
resetImpliedPara();
CONFIG::debug { textFlow.debugCheckNormalizeAll() ; }
textFlow.normalize();
textFlow.applyWhiteSpaceCollapse(null);
return textFlow;
}
protected override function clear():void
{
// Reset active formats and base font size
_activeParaFormat.coreStyles = null;
_activeFormat.coreStyles = null;
super.clear();
}
tlf_internal override function createImpliedParagraph():ParagraphElement
{
var rslt:ParagraphElement;
var savedActiveFormat:TextLayoutFormatValueHolder = _activeFormat;
if (_activeImpliedParaFormat)
_activeFormat = _activeImpliedParaFormat;
try
{
rslt = super.createImpliedParagraph();
}
finally
{
_activeFormat = savedActiveFormat;
}
return rslt;
}
public override function createParagraphFromXML(xmlToParse:XML):ParagraphElement
{
var paraElem:ParagraphElement = new ParagraphElement();
// Parse xml attributes for paragraph format
var formatImporters:Array = [_paragraphFormatImporter, _classImporter];
parseAttributes(xmlToParse, formatImporters);
var paragraphFormat:TextLayoutFormat = new TextLayoutFormat(_paragraphFormatImporter.result as ITextLayoutFormat);
// Apply paragraph format inherited from formatting elements
if (_activeParaFormat)
paragraphFormat.apply(_activeParaFormat);
if (_activeFormat)
paragraphFormat.apply(_activeFormat);
// A that is the only child of a specifies formats that apply to the paragraph itself
// Otherwise (i.e., if it has siblings), the formats apply to the elements nested within the
// Check for the former case here
var fontFormattingElement:XML = getSingleFontChild (xmlToParse);
if (fontFormattingElement)
paragraphFormat.apply(parseFontAttributes(fontFormattingElement));
if (paragraphFormat.lineHeight !== undefined)
paragraphFormat.leadingModel = LeadingModel.APPROXIMATE_TEXT_FIELD;
paraElem.format = paragraphFormat;
// Use the value of the 'class' attribute (if present) as styleName
paraElem.styleName = _classImporter.getFormatValue("class");
return paraElem;
}
/** Parse the supplied XML into a paragraph. Parse the element and its children.
*
* @param importFilter parser object
* @param xmlToParse content to parse
* @param parent the parent for the new content
*/
static public function parsePara(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement):void
{
var paraElem:ParagraphElement = (importFilter as HtmlImporter).createParagraphFromXML(xmlToParse);
if (importFilter.addChild(parent, paraElem))
{
// Parse children, but if there is only one child, a , skip to *its* children.
// That's because the single chuld has already been parsed in createParagraphFromXML.
var fontFormattingElement:XML = getSingleFontChild (xmlToParse);
parseChildrenUnderNewActiveFormat (importFilter, fontFormattingElement ? fontFormattingElement : xmlToParse, paraElem, _activeFormat, null);
//if parsing an empty paragraph, create a Span for it.
if (paraElem.numChildren == 0)
paraElem.addChild(new SpanElement());
}
// Replace break elements with paragraph splits
// This must happen before normalization else BreakElements may merge or become spans
replaceBreakElementsWithParaSplits(paraElem);
}
protected override function onResetImpliedPara(para:ParagraphElement):void
{
// Replacing break elements with paragraph splits, even for implied paras
replaceBreakElementsWithParaSplits (para);
}
/** If the provided xml has a single child , get it
*/
static private function getSingleFontChild (xmlToParse:XML):XML
{
var children:XMLList = xmlToParse.children();
if (children.length() == 1)
{
var child:XML = children[0];
if (child.name().localName.toLowerCase() == "font")
return child;
}
return null;
}
private function createLinkFromXML(xmlToParse:XML):LinkElement
{
var linkElem:LinkElement = new LinkElement();
var formatImporters:Array = [ _linkHrefImporter, _linkTargetImporter ];
parseAttributes(xmlToParse, formatImporters);
linkElem.href = _linkHrefImporter.getFormatValue("href");
linkElem.target = _linkTargetImporter.getFormatValue("target");
// Handle difference in defaults between TextField and TLF
// target "_self" vs. null (equivalent to "_blank")
if (!linkElem.target)
linkElem.target = "_self";
// Apply active format
linkElem.format = _activeFormat;
return linkElem;
}
/** Parse the supplied XML into a LinkElement. Parse the element and its children.
*
* @param importFilter parser object
* @param xmlToParse content to parse
* @param parent the parent for the new content
*/
static public function parseLink(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement):void
{
var linkElem:LinkElement = HtmlImporter(importFilter).createLinkFromXML(xmlToParse);
if (importFilter.addChild(parent, linkElem))
{
parseChildrenUnderNewActiveFormat (importFilter, xmlToParse, linkElem, _activeFormat, null);
// If parsing an empty link, create a Span for it.
if (linkElem.numChildren == 0)
linkElem.addChild(new SpanElement());
}
}
/** Static method for constructing a span from XML. Parse the ... tag.
* Insert the new content into its parent
* Note: Differs from BaseTextLayoutImporter.parseSpan in that it allows nested elements.
*
* @param importFilter parser object
* @param xmlToParse content to parse
* @param parent the parent for the new content
*/
static public function parseSpan(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement):void
{
var firstSpan:SpanElement = new SpanElement();
// Use the value of the 'class' attribute (if present) as styleName
var formatImporters:Array = [_classImporter];
importFilter.parseAttributes(xmlToParse,formatImporters);
firstSpan.styleName = _classImporter.getFormatValue("class");
// Apply active format
firstSpan.format = _activeFormat;
var elemList:XMLList = xmlToParse[0].children();
if(elemList.length() == 0)
{
// Empty span, but may have formatting, so don't strip it out.
// Note: the normalizer may yet strip it out if it is not the last child, but that's the normalizer's business.
importFilter.addChild(parent, firstSpan);
return;
}
for each (var child:XML in elemList)
{
var elemName:String = child.name() ? child.name().localName : null;
if (elemName == null) // span text
{
if (firstSpan.parent == null) // hasn't been used yet
{
firstSpan.text = child.toString();
importFilter.addChild(parent, firstSpan);
}
else
{
var s:SpanElement = new SpanElement();
copyAllStyleProps(s,firstSpan);
s.text = child.toString();
importFilter.addChild(parent, s);
}
}
else
{
// Anything else: will become siblings of the spans that are (or will be) created for text nodes
// (assuming that's valid). For example A quick fox
// is treated like A quick fox. Consequently, any formatting
// associated with class "A" will not apply to "fox". This is a shortcoming in the TLF object model:
// SpanElements can't nest.
importFilter.parseObject(elemName, child, parent);
}
}
}
private function createInlineGraphicFromXML(xmlToParse:XML):InlineGraphicElement
{
var imgElem:InlineGraphicElement = new InlineGraphicElement();
var formatImporters:Array = [_ilgFormatImporter, _ilgMiscFormatImporter];
parseAttributes(xmlToParse,formatImporters);
var source:String = _ilgMiscFormatImporter.getFormatValue("src");
imgElem.source = source;
// if not defined then let InlineGraphic set its own default
imgElem.height = InlineGraphicElement.heightPropertyDefinition.setHelper(imgElem.height,_ilgFormatImporter.getFormatValue("height"));
imgElem.width = InlineGraphicElement.heightPropertyDefinition.setHelper(imgElem.width,_ilgFormatImporter.getFormatValue("width"));
/* Not currently supported
var floatVal:String = _ilgFormatImporter.getFormatValue("align");
// Handle difference in defaults between TextField and TLF
// float "left" vs. "none"
imgElem.float = floatVal ? floatVal : Float.LEFT;
*/
var id:String = _ilgMiscFormatImporter.getFormatValue("id");
imgElem.id = id;
// Apply active format
imgElem.format = _activeFormat;
return imgElem;
}
/** Parse the supplied XML into an InlineGraphicElement. Parse the element.
*
* @param importFilter parser object
* @param xmlToParse content to parse
* @param parent the parent for the new content
*/
static public function parseInlineGraphic(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement):void
{
var ilg:InlineGraphicElement = HtmlImporter(importFilter).createInlineGraphicFromXML(xmlToParse);
importFilter.addChild(parent, ilg);
}
public override function createTabFromXML(xmlToParse:XML):TabElement
{
return null; // no tabs in HTML
}
/** Parse the attributes of the formatting element and returns the corresponding TLF format
*/
private function parseFontAttributes(xmlToParse:XML):ITextLayoutFormat
{
var formatImporters:Array = [_fontImporter, _fontMiscImporter];
parseAttributes(xmlToParse, formatImporters);
var newFormat:TextLayoutFormatValueHolder = new TextLayoutFormatValueHolder(_fontImporter.result as ITextLayoutFormat);
var kerning:String = _fontMiscImporter.getFormatValue("kerning");
if (kerning)
{
var kerningVal:Number = Number(kerning);
newFormat.kerning = kerningVal == 0 ? Kerning.OFF : Kerning.AUTO;
}
var size:String = _fontMiscImporter.getFormatValue("size");
if (size)
{
var sizeVal:Number = TextLayoutFormat.fontSizeProperty.setHelper(NaN, size);
if (!isNaN(sizeVal))
{
if (size.search(/\s*(-|\+)/) != -1) // leading whitespace followed by + or -
sizeVal += _baseFontSize; // implies relative font sizes
newFormat.fontSize = sizeVal;
}
}
return newFormat;
}
/** Parse the formatting element
* Calculates the new format to apply to _activeFormat and continues parsing down the hierarchy
*/
static public function parseFont(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement):void
{
var newFormat:ITextLayoutFormat = (importFilter as HtmlImporter).parseFontAttributes (xmlToParse);
parseChildrenUnderNewActiveFormatWithImpliedParaFormat(importFilter, xmlToParse, parent, newFormat);
}
/** Parse the formatting element
* Calculates the new format to apply to _activeParaFormat and continues parsing down the hierarchy
*/
static public function parseTextFormat(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement):void
{
var formatImporters:Array = [_textFormatImporter, _textFormatMiscImporter];
importFilter.parseAttributes(xmlToParse, formatImporters);
var newFormat:TextLayoutFormatValueHolder = new TextLayoutFormatValueHolder(_textFormatImporter.result as ITextLayoutFormat);
var blockIndent:String = _textFormatMiscImporter.getFormatValue("blockIndent");
if (blockIndent)
{
// TODO: Nested ?
var blockIndentVal:Number = TextLayoutFormat.paragraphStartIndentProperty.setHelper(NaN, blockIndent);
if (!isNaN(blockIndentVal))
newFormat.paragraphStartIndent = newFormat.paragraphStartIndent === undefined ? blockIndentVal : newFormat.paragraphStartIndent + blockIndentVal;
}
parseChildrenUnderNewActiveFormat (importFilter, xmlToParse, parent, _activeParaFormat, newFormat, true);
}
/** Parse the formatting element
* Calculates the new format to apply to _activeFormat and continues parsing down the hierarchy
*/
static public function parseBold(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement):void
{
var newFormat:TextLayoutFormatValueHolder = new TextLayoutFormatValueHolder();
newFormat.fontWeight = flash.text.engine.FontWeight.BOLD;
parseChildrenUnderNewActiveFormatWithImpliedParaFormat (importFilter, xmlToParse, parent, newFormat);
}
/** Parse the formatting element
* Calculates the new format to apply to _activeFormat and continues parsing down the hierarchy
*/
static public function parseItalic(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement):void
{
var newFormat:TextLayoutFormatValueHolder = new TextLayoutFormatValueHolder();
newFormat.fontStyle = flash.text.engine.FontPosture.ITALIC;
parseChildrenUnderNewActiveFormatWithImpliedParaFormat (importFilter, xmlToParse, parent, newFormat);
}
/** Parse the formatting element
* Calculates the new format to apply to _activeFormat and continues parsing down the hierarchy
*/
static public function parseUnderline(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement):void
{
var newFormat:TextLayoutFormatValueHolder = new TextLayoutFormatValueHolder();
newFormat.textDecoration = flashx.textLayout.formats.TextDecoration.UNDERLINE;
parseChildrenUnderNewActiveFormatWithImpliedParaFormat(importFilter, xmlToParse, parent, newFormat);
}
static private function parseChildrenUnderNewActiveFormatWithImpliedParaFormat(importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement, newFormat:ITextLayoutFormat):void
{
var oldActiveImpliedParaFormat:TextLayoutFormatValueHolder = _activeImpliedParaFormat;
if (_activeImpliedParaFormat == null)
_activeImpliedParaFormat = new TextLayoutFormatValueHolder(_activeFormat);
try
{
parseChildrenUnderNewActiveFormat(importFilter, xmlToParse, parent, _activeFormat, newFormat, true);
}
finally
{
_activeImpliedParaFormat = oldActiveImpliedParaFormat;
}
}
/** Updates the current active format and base font size as specified, parses children, and restores the active format and base font size
* There are two different use cases for this method:
* - Parsing children of a formatting XML element like or . In this case, the TLF format corresponding to the formatting element
* (newFormat) is applied to the currently active format (_activeFormat in the case of and _activeParaFormat in the case of ).
* Children of the formatting element are parsed under this new active format.
* - Parsing children of a flow XML element like or . In this case, newFormat is null and the currently active format (_activeFormat) is reset.
* Children of the flow element are parsed under this newly reset format. This is to avoid redundancy (the format is already applied to the flow element).
*
* @param importFilter parser object
* @param xmlToParse content to parse
* @param parent the parent for the parsed children
* @param currFormat the active format (_activeFormat or _activeParaFormat)
* @param newFormat the format to apply to currFormat while the children are being parsed. If null, currFormat is to be reset.
* @param chainedParent whether parent actually corresponds to xmlToParse or has been chained (such as when xmlToParse is a formatting element). See BaseTextLayoutImporter.parseFlowGroupElementChildren
*/
static private function parseChildrenUnderNewActiveFormat (importFilter:BaseTextLayoutImporter, xmlToParse:XML, parent:FlowGroupElement, currFormat:TextLayoutFormatValueHolder, newFormat:ITextLayoutFormat, chainedParent:Boolean=false):void
{
// Remember the current state
var restoreBaseFontSize:Number = _baseFontSize;
var restoreCoreStyles:Object = Property.shallowCopy(currFormat.coreStyles);
if (newFormat)
{
// Update base font size based on the new format
if (newFormat.fontSize !== undefined)
_baseFontSize = newFormat.fontSize;
// Apply the new format
currFormat.apply(newFormat);
}
else
{
// Base font size remains unchanged
// Reset the new format
currFormat.coreStyles = null;
}
try
{
importFilter.parseFlowGroupElementChildren(xmlToParse, parent, null, chainedParent);
}
finally
{
// Restore
currFormat.coreStyles = restoreCoreStyles;
_baseFontSize = restoreBaseFontSize;
}
}
protected override function handleUnknownAttribute(elementName:String, propertyName:String):void
{
// A toss-up: report error or ignore? Ignore for now
// If we do end up reporting error, we should add exceptions for documented attributes that we don't handle
// like align on
}
protected override function handleUnknownElement(name:String, xmlToParse:XML, parent:FlowGroupElement):void
{
// Not an error (it may be a styling element like ); continue parsing children
parseFlowGroupElementChildren (xmlToParse, parent, null, true);
}
tlf_internal override function parseObject(name:String, xmlToParse:XML, parent:FlowGroupElement, exceptionElements:Object=null):void
{
// override to allow upper case tag names
super.parseObject(name.toLowerCase(), xmlToParse, parent, exceptionElements);
}
protected override function checkNamespace(xmlToParse:XML):Boolean
{
/* Ignore namespace */
return true;
}
/** Splits the paragraph wherever a break element occurs and removes the latter
* This is to replicate TextField handling of
: splits the containing paragraph (implied or otherwise)
* The
itself doesn't survive.
*/
static private function replaceBreakElementsWithParaSplits(para:ParagraphElement):void
{
// performance: when splitting the paragraph into multiple paragraphs take it out of the TextFlow
var paraArray:Array;
var paraIndex:int;
var paraParent:FlowGroupElement;
// Find each BreakElement and split into a new paragraph
var elem:FlowLeafElement = para.getFirstLeaf();
while (elem)
{
if (!(elem is BreakElement))
{
elem = elem.getNextLeaf(para);
continue;
}
if (!paraArray)
{
paraArray = [ para ];
paraParent = para.parent;
paraIndex = paraParent.getChildIndex(para);
paraParent.removeChildAt(paraIndex);
}
// Split the para right after the BreakElement
//CONFIG::debug { assert(elem.textLength == 1,"Bad TextLength in BreakElement"); }
CONFIG::debug {assert( para.getAbsoluteStart() == 0,"Bad paragraph in replaceBreakElementsWithParaSplits"); }
para = para.splitAtPosition(elem.getAbsoluteStart()+elem.textLength) as ParagraphElement;
paraArray.push(para);
// Remove the BreakElement
elem.parent.removeChild(elem);
// point elem to the first leaf of the new paragraph
elem = para.getFirstLeaf();
}
if (paraArray)
paraParent.replaceChildren(paraIndex,paraIndex,paraArray);
}
/** HTML parsing code
* Uses regular expressions for recognizing constructs like comments, tags etc.
* and a hand-coded parser to recognize the document structure and covert to well-formed xml
* TODO-1/16/2009:List caveats
*/
/** Regex for stuff to be stripped: a comment, processing instruction, or a declaration
*
* - comment
* later in the string)
* --> - end comment
*
* <\?(".*?"|'.*?'|[^>]+)*> - processing instruction
* <\? - start processing instruction
* (".*?"|'.*?'|[^>]+)* - 0 or more of the following (interleaved in any order)
* ".*?" - anything (including >) so long as it is within double quotes; the ? prevents a greedy match (which could match everything until a later " in the string)
* '.*?' - anything (including >) so long as it is within single quotes; the ? prevents a greedy match (which could match everything until a later ' in the string)
* [^>"']+ - one or more characters other than > (because > ends the processing instruction), " (handled above), ' (handled above)
* > - end processing instruction
*
* "']+)*> - declaration;
* TODO-1/15/2009:not sure if a declaration can contain > within quotes. Assuming it can, the regex is
* is exactly like processing instruction above except it uses a ! instead of a ?
*/
private static var stripRegex:RegExp = /|<\?(".*?"|'.*?'|[^>"']+)*>|"']+)*>/sg;
/** Regular expression for an HTML tag
* < - open
*
* (\/?) - start modifier; 0 or 1 occurance of one of /
*
* (\w+) - tag name; 1 or more name characters
*
* ((?:\s+\w+(?:\s*=\s*(?:".*?"|'.*?'|[\w\.]+))?)*) - attributes; 0 or more of the following
* (?:\s+\w+(?:\s*=\s*(?:".*?"|'.*?'|[\w\.]+))?) - attribute; 1 or more space, followed by 1 or more name characters optionally followed by
* \s*=\s*(?:".*?"|'.*?'|[\w\.]+) - attribute value assignment; optional space followed by = followed by more optional space followed by one of
* ".*?" - quoted attribute value (using double quotes); the ? prevents a greedy match (which could match everything until a later " in the string)
* '.*?' - quoted attribute value (using single quotes); the ? prevents a greedy match ((which could match everything until a later ' in the string)
* [\w\.]+ - unquoted attribute value; can only contain name characters or a period
* Note: ?: specifies a non-capturing group (i.e., match won't be recorded or used as a numbered back-reference)
*
* \s* - optional space
*
* (\/?) - end modifer (0 or 1 occurance of /)
*
* > - close*/
private static var tagRegex:RegExp = /<(\/?)(\w+)((?:\s+\w+(?:\s*=\s*(?:".*?"|'.*?'|[\w\.]+))?)*)\s*(\/?)>/sg;
/** Regular expression for an attribute. Except for grouping differences, this regex is the same as the one that appears in tagRegex
*/
private static var attrRegex:RegExp = /\s+(\w+)(?:\s*=\s*(".*?"|'.*?'|[\w\.]+))?/sg;
/** Wrapper for core HTML parsing code that manages XML settings during the process
*/
private function toXML(source:String):XML
{
var xml:XML;
var originalSettings:Object = XML.settings();
try
{
XML.ignoreProcessingInstructions = false;
XML.ignoreWhitespace = false;
xml = toXMLInternal(source);
}
finally
{
XML.setSettings(originalSettings);
}
return xml;
}
/** Convert HTML string to well-formed xml, accounting for the following HTML oddities
*
* 1) Start tags are optional for some elements.
* Optional start tag not specified
* TextField dialect: This is true for all elements.
*
* 2) End tags are optional for some elements. Elements with missing end tags may be implicitly closed by
* a) start-tag for a peer element
* p element without end tag; closed by next p start tag
*
closes previous p element with missing end tag
*
* b) end-tag for an ancestor element
* p element without end tag; closed by next end tag of an ancestor
* TextField dialect: This is true for all elements.
*
* 3) End tags are forbidden for some elements
*
and
are valid, but
is not
* TextField dialect: Does not apply.
*
* 4) Element and attribute names may use any case
*
*
* 5) Attribute values may be unquoted
*
*
* 6) Boolean attributed may assume a minimized form
*
is equivalent to
*
*/
private function toXMLInternal(source:String):XML
{
// Strip out comments, processing instructions and declaratins
source = source.replace(stripRegex, "");
// Parse the source, looking for tags and interleaved text content, creating an XML hierarchy in the process.
// At any given time, there is a chain of 'open' elements corresponding to unclosed tags, the innermost of which is
// tracked by the currElem. Content (element or text) parsed next is added as a child of currElem.
// Root of the XML hierarchy (set to because the html start tag is optional)
// Note that source may contain an html start tag, in which case we'll end up with two such elements
// This is not quite correct, but handled by the importer
var root:XML = ;
var currElem:XML = root;
var lastIndex:int = tagRegex.lastIndex = 0;
var openElemName:String;
do
{
var result:Object = tagRegex.exec(source);
if (!result)
{
// No more tags: add text (starting at search index) as a child of the innermost open element and break out
appendTextChild (currElem, source.substring(lastIndex));
break;
}
if (result.index != lastIndex)
{
// Add text between tags as a child of the innermost open element
appendTextChild (currElem, source.substring(lastIndex, result.index));
}
var tag:String = result[0]; // entire tag
var hasStartModifier:Boolean = (result[1] == "\/"); // modifier after < (/ for end tag)
var name:String = result[2].toLowerCase(); // name; use lower case
var attrs:String = result[3]; // attributes; including whitespace
var hasEndModifier:Boolean = (result[4] == "\/"); // modifier before > (/ for composite start and end tag)
if (!hasStartModifier) // start tag
{
// Special case for implicit closing of
// TODO-12/23/2008: this will need to be handled more generically
if (name == "p" && currElem.name().localName == "p")
currElem = currElem.parent();
// Create an XML element by constructing a tag that can be fed to the XML constructor. Specifically, ensure
// - it is a composite tag (start and end tag together) using the terminating slash shorthand
// - element and attribute names are lower case (this is not required, but doesn't hurt)
// - attribute values are quoted
// - boolean attributes are fully specified (e.g., selected="selected" rather than selected)
tag = "<" + name;
do
{
var innerResult:Object = attrRegex.exec(attrs);
if (!innerResult)
break;
var attrName:String = innerResult[1].toLowerCase();
tag += " " + attrName + "=";
var val:String = innerResult[2] ? innerResult[2] : attrName /* boolean attribute with implied value equal to attribute name */;
var startChar:String = val.charAt(0);
tag += ((startChar == "'" || startChar == "\"") ? val : ("\"" + val + "\""));
} while (true);
tag += "\/>";
// Add the corresponding element as a child of the innermost open element
currElem.appendChild(new XML(tag));
// The new element becomes the innermost open element unless it is already closed because
// - this is a composite start and end tag (i.e., has an end modifier)
// - the start tag itself implies closure
if (!hasEndModifier && !doesStartTagCloseElement(name))
currElem = currElem.children()[currElem.children().length()-1];
}
else // end tag
{
if (hasEndModifier || attrs.length)
{
reportError(GlobalSettings.resourceStringFunction("malformedTag",[tag]));
}
else
{
/*
// Does not apply to TextField dialect
if (isEndTagForbidden(name))
{
xxxreportError("End tag is not allowed for element " + name); NOTE : MAKE A LOCALIZABLE ERROR IF THIS COMES BACK
return null;
}*/
// Move up the chain of open elements looking for a matching name
// The matching element is closed and its parent becomes the innermost open element
// Report error if matching element is not found and it requires a start tag
// All intermediate open elements are also closed provided they don't require end tags
// Report error if an intermediate element requires end tags
var openElem:XML = currElem;
do
{
openElemName = openElem.name().localName;
openElem = openElem.parent();
if (openElemName == name)
{
currElem = openElem;
break;
}
/*
// Does not apply to TextField dialect
else if (isEndTagRequired(openElemName))
{
xxxreportError("Missing end tag for element " + openElemName);
return null;
}*/
if (!openElem)
{
// Does not apply to TextField dialect
/*if (isStartTagRequired(name))
{
xxxreportError("Unexpected end tag " + name);
return null;
}*/
break;
}
}
while (true);
}
}
lastIndex = tagRegex.lastIndex;
if (lastIndex == source.length)
break; // string completely parsed
} while (currElem); // null currElem means
has been closed, so ignore everything else
// No more string to parse, specifically, no more end tags.
// Validate that remaining open elements do not require end tags.
// Does not apply to TextField dialect
/* while (currElem)
{
openElemName = currElem.name().localName;
if (isEndTagRequired(openElemName))
{
xxxreportError("Missing end tag for element " + openElemName);
return null;
}
currElem = currElem.parent();
}*/
return root;
}
/** TODO-1/16/2009-Evaluate if following code may be better implemented using dictionaries queried at runtime
*/
/*
// TextField dialect: Not used
private function isStartTagRequired (tagName:String):Boolean
{
switch (tagName)
{
case "a":
case "b":
case "br":
case "font":
case "i":
case "img":
case "p":
case "span":
case "textformat":
case "u":
return true;
default:
// html, head, body, and unrecognized elements (which are handled leniently)
return false;
}
}
private function isEndTagRequired (tagName:String):Boolean
{
switch (tagName)
{
case "a":
case "b":
case "font":
case "i":
case "span":
case "textformat":
case "u":
return true;
default:
// html, head, body, p, br, image and unrecognized elements (which are handled leniently)
return false;
}
}
private function isEndTagForbidden (tagName:String):Boolean
{
switch (tagName)
{
case "br":
case "img":
return true;
default:
return false;
}
}*/
private function doesStartTagCloseElement (tagName:String):Boolean
{
switch (tagName)
{
case "br":
case "img":
return true;
default:
return false;
}
}
private static const anyPrintChar:RegExp = /[^\u0009\u000a\u000d\u0020]/g;
/** Adds text as a descendant of the specified XML element. Adds an intermediate element is created if parent is not a
* No action is taken for whitespace-only text
*/
private function appendTextChild(parent:XML, text:String):void
{
// No whitespace collapse
// if (text.match(anyPrintChar).length != 0)
{
var parentIsSpan:Boolean = (parent.localName() == "span");
var elemName:String = parentIsSpan ? "dummy" : "span";
//var xml:XML = <{elemName}/>;
//xml.appendChild(text);
// The commented-out code above doesn't handle character entities like <
// The following lets the XML constructor handle them
var xmlText:String = "<" + elemName + ">" + text + "<\/" + elemName + ">";
try
{
var xml:XML = new XML(xmlText);
parent.appendChild(parentIsSpan ? xml.children()[0] : xml);
}
catch (e:*)
{
// Report malformed content like "<" instead of "<"
reportError(GlobalSettings.resourceStringFunction("malformedMarkup",[text]));
}
}
}
}
}
import flashx.textLayout.conversion.TLFormatImporter;
/** Specialized to provide case insensitivity (as required by TEXT_FIELD_HTML_FORMAT)
* Keys need to be lower-cased. Values may or may not based on a flag passed to the constructor.
*/
class CaseInsensitiveTLFFormatImporter extends TLFormatImporter
{
public function CaseInsensitiveTLFFormatImporter(classType:Class,description:Object, convertValuesToLowerCase:Boolean=true)
{
_convertValuesToLowerCase = convertValuesToLowerCase;
var lowerCaseDescription:Object = new Object();
for (var prop:Object in description)
{
lowerCaseDescription[prop.toLowerCase()] = description[prop];
}
super(classType, lowerCaseDescription);
}
public override function importOneFormat(key:String,val:String):Boolean
{
return super.importOneFormat(key.toLowerCase(), _convertValuesToLowerCase ? val.toLowerCase() : val);
}
public function getFormatValue (key:String):*
{
return result ? result[key.toLowerCase()] : undefined;
}
private var _convertValuesToLowerCase:Boolean;
}
class HtmlCustomParaFormatImporter extends TLFormatImporter
{
public function HtmlCustomParaFormatImporter(classType:Class,description:Object)
{
super(classType,description);
}
public override function importOneFormat(key:String,val:String):Boolean
{
key = key.toLowerCase();
if (key == "align")
key = "textAlign";
return super.importOneFormat(key,val.toLowerCase()); // covert val to lowercase because TLF won't accept, say, "RIGHT"
}
}
class TextFormatImporter extends TLFormatImporter
{
public function TextFormatImporter(classType:Class,description:Object)
{
super(classType,description);
}
public override function importOneFormat(key:String,val:String):Boolean
{
key = key.toLowerCase();
if (key == "leftmargin")
key = "paragraphStartIndent"; // assumed to be left-to-right text since we don't handle DIR attribute
else if (key == "rightmargin")
key = "paragraphEndIndent"; // assumed to be left-to-right text since we don't handle DIR attribute
else if (key == "indent")
key = "textIndent";
else if (key == "leading")
key = "lineHeight";
else if (key == "tabstops")
{
key = "tabStops";
// Comma-delimited in TextField HTML format, space delimited in TLF
val = val.replace(/,/g, ' ');
}
return super.importOneFormat(key,val); // no case-coversion required, values for these formats in TLF are case-insensitive
}
}
class FontImporter extends TLFormatImporter
{
public function FontImporter(classType:Class,description:Object)
{
super(classType,description);
}
public override function importOneFormat(key:String,val:String):Boolean
{
key = key.toLowerCase();
if (key == "letterspacing")
key = "trackingRight";
else if (key == "face")
key = "fontFamily";
return super.importOneFormat(key,val); // no case-coversion required, values for these formats in TLF are case-insensitive
}
}