/**
* DomParser.js
*
* Copyright, Moxiecode Systems AB
* Released under LGPL License.
*
* License: http://www.tinymce.com/license
* Contributing: http://www.tinymce.com/contributing
*/
(function(tinymce) {
var Node = tinymce.html.Node;
/**
* This class parses HTML code into a DOM like structure of nodes it will remove redundant whitespace and make
* sure that the node tree is valid according to the specified schema. So for example:
a
b
c will become a
b
c
*
* @example
* var parser = new tinymce.html.DomParser({validate: true}, schema);
* var rootNode = parser.parse('content
');
*
* @class tinymce.html.DomParser
* @version 3.4
*/
/**
* Constructs a new DomParser instance.
*
* @constructor
* @method DomParser
* @param {Object} settings Name/value collection of settings. comment, cdata, text, start and end are callbacks.
* @param {tinymce.html.Schema} schema HTML Schema class to use when parsing.
*/
tinymce.html.DomParser = function(settings, schema) {
var self = this, nodeFilters = {}, attributeFilters = [], matchedNodes = {}, matchedAttributes = {};
settings = settings || {};
settings.validate = "validate" in settings ? settings.validate : true;
settings.root_name = settings.root_name || 'body';
self.schema = schema = schema || new tinymce.html.Schema();
function fixInvalidChildren(nodes) {
var ni, node, parent, parents, newParent, currentNode, tempNode, childNode, i,
childClone, nonEmptyElements, nonSplitableElements, textBlockElements, sibling, nextNode;
nonSplitableElements = tinymce.makeMap('tr,td,th,tbody,thead,tfoot,table');
nonEmptyElements = schema.getNonEmptyElements();
textBlockElements = schema.getTextBlockElements();
for (ni = 0; ni < nodes.length; ni++) {
node = nodes[ni];
// Already removed or fixed
if (!node.parent || node.fixed)
continue;
// If the invalid element is a text block and the text block is within a parent LI element
// Then unwrap the first text block and convert other sibling text blocks to LI elements similar to Word/Open Office
if (textBlockElements[node.name] && node.parent.name == 'li') {
// Move sibling text blocks after LI element
sibling = node.next;
while (sibling) {
if (textBlockElements[sibling.name]) {
sibling.name = 'li';
sibling.fixed = true;
node.parent.insert(sibling, node.parent);
} else {
break;
}
sibling = sibling.next;
}
// Unwrap current text block
node.unwrap(node);
continue;
}
// Get list of all parent nodes until we find a valid parent to stick the child into
parents = [node];
for (parent = node.parent; parent && !schema.isValidChild(parent.name, node.name) && !nonSplitableElements[parent.name]; parent = parent.parent)
parents.push(parent);
// Found a suitable parent
if (parent && parents.length > 1) {
// Reverse the array since it makes looping easier
parents.reverse();
// Clone the related parent and insert that after the moved node
newParent = currentNode = self.filterNode(parents[0].clone());
// Start cloning and moving children on the left side of the target node
for (i = 0; i < parents.length - 1; i++) {
if (schema.isValidChild(currentNode.name, parents[i].name)) {
tempNode = self.filterNode(parents[i].clone());
currentNode.append(tempNode);
} else
tempNode = currentNode;
for (childNode = parents[i].firstChild; childNode && childNode != parents[i + 1]; ) {
nextNode = childNode.next;
tempNode.append(childNode);
childNode = nextNode;
}
currentNode = tempNode;
}
if (!newParent.isEmpty(nonEmptyElements)) {
parent.insert(newParent, parents[0], true);
parent.insert(node, newParent);
} else {
parent.insert(node, parents[0], true);
}
// Check if the element is empty by looking through it's contents and special treatment for
parent = parents[0];
if (parent.isEmpty(nonEmptyElements) || parent.firstChild === parent.lastChild && parent.firstChild.name === 'br') {
parent.empty().remove();
}
} else if (node.parent) {
// If it's an LI try to find a UL/OL for it or wrap it
if (node.name === 'li') {
sibling = node.prev;
if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
sibling.append(node);
continue;
}
sibling = node.next;
if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) {
sibling.insert(node, sibling.firstChild, true);
continue;
}
node.wrap(self.filterNode(new Node('ul', 1)));
continue;
}
// Try wrapping the element in a DIV
if (schema.isValidChild(node.parent.name, 'div') && schema.isValidChild('div', node.name)) {
node.wrap(self.filterNode(new Node('div', 1)));
} else {
// We failed wrapping it, then remove or unwrap it
if (node.name === 'style' || node.name === 'script')
node.empty().remove();
else
node.unwrap();
}
}
}
};
/**
* Runs the specified node though the element and attributes filters.
*
* @param {tinymce.html.Node} Node the node to run filters on.
* @return {tinymce.html.Node} The passed in node.
*/
self.filterNode = function(node) {
var i, name, list;
// Run element filters
if (name in nodeFilters) {
list = matchedNodes[name];
if (list)
list.push(node);
else
matchedNodes[name] = [node];
}
// Run attribute filters
i = attributeFilters.length;
while (i--) {
name = attributeFilters[i].name;
if (name in node.attributes.map) {
list = matchedAttributes[name];
if (list)
list.push(node);
else
matchedAttributes[name] = [node];
}
}
return node;
};
/**
* Adds a node filter function to the parser, the parser will collect the specified nodes by name
* and then execute the callback ones it has finished parsing the document.
*
* @example
* parser.addNodeFilter('p,h1', function(nodes, name) {
* for (var i = 0; i < nodes.length; i++) {
* console.log(nodes[i].name);
* }
* });
* @method addNodeFilter
* @method {String} name Comma separated list of nodes to collect.
* @param {function} callback Callback function to execute once it has collected nodes.
*/
self.addNodeFilter = function(name, callback) {
tinymce.each(tinymce.explode(name), function(name) {
var list = nodeFilters[name];
if (!list)
nodeFilters[name] = list = [];
list.push(callback);
});
};
/**
* Adds a attribute filter function to the parser, the parser will collect nodes that has the specified attributes
* and then execute the callback ones it has finished parsing the document.
*
* @example
* parser.addAttributeFilter('src,href', function(nodes, name) {
* for (var i = 0; i < nodes.length; i++) {
* console.log(nodes[i].name);
* }
* });
* @method addAttributeFilter
* @method {String} name Comma separated list of nodes to collect.
* @param {function} callback Callback function to execute once it has collected nodes.
*/
self.addAttributeFilter = function(name, callback) {
tinymce.each(tinymce.explode(name), function(name) {
var i;
for (i = 0; i < attributeFilters.length; i++) {
if (attributeFilters[i].name === name) {
attributeFilters[i].callbacks.push(callback);
return;
}
}
attributeFilters.push({name: name, callbacks: [callback]});
});
};
/**
* Parses the specified HTML string into a DOM like node tree and returns the result.
*
* @example
* var rootNode = new DomParser({...}).parse('text');
* @method parse
* @param {String} html Html string to sax parse.
* @param {Object} args Optional args object that gets passed to all filter functions.
* @return {tinymce.html.Node} Root node containing the tree.
*/
self.parse = function(html, args) {
var parser, rootNode, node, nodes, i, l, fi, fl, list, name, validate,
blockElements, startWhiteSpaceRegExp, invalidChildren = [], isInWhiteSpacePreservedElement,
endWhiteSpaceRegExp, allWhiteSpaceRegExp, isAllWhiteSpaceRegExp, whiteSpaceElements, children, nonEmptyElements, rootBlockName;
args = args || {};
matchedNodes = {};
matchedAttributes = {};
blockElements = tinymce.extend(tinymce.makeMap('script,style,head,html,body,title,meta,param'), schema.getBlockElements());
nonEmptyElements = schema.getNonEmptyElements();
children = schema.children;
validate = settings.validate;
rootBlockName = "forced_root_block" in args ? args.forced_root_block : settings.forced_root_block;
whiteSpaceElements = schema.getWhiteSpaceElements();
startWhiteSpaceRegExp = /^[ \t\r\n]+/;
endWhiteSpaceRegExp = /[ \t\r\n]+$/;
allWhiteSpaceRegExp = /[ \t\r\n]+/g;
isAllWhiteSpaceRegExp = /^[ \t\r\n]+$/;
function addRootBlocks() {
var node = rootNode.firstChild, next, rootBlockNode;
while (node) {
next = node.next;
if (node.type == 3 || (node.type == 1 && node.name !== 'p' && !blockElements[node.name] && !node.attr('data-mce-type'))) {
if (!rootBlockNode) {
// Create a new root block element
rootBlockNode = createNode(rootBlockName, 1);
rootNode.insert(rootBlockNode, node);
rootBlockNode.append(node);
} else
rootBlockNode.append(node);
} else {
rootBlockNode = null;
}
node = next;
};
};
function createNode(name, type) {
var node = new Node(name, type), list;
if (name in nodeFilters) {
list = matchedNodes[name];
if (list)
list.push(node);
else
matchedNodes[name] = [node];
}
return node;
};
function removeWhitespaceBefore(node) {
var textNode, textVal, sibling;
for (textNode = node.prev; textNode && textNode.type === 3; ) {
textVal = textNode.value.replace(endWhiteSpaceRegExp, '');
if (textVal.length > 0) {
textNode.value = textVal;
textNode = textNode.prev;
} else {
sibling = textNode.prev;
textNode.remove();
textNode = sibling;
}
}
};
function cloneAndExcludeBlocks(input) {
var name, output = {};
for (name in input) {
if (name !== 'li' && name != 'p') {
output[name] = input[name];
}
}
return output;
};
parser = new tinymce.html.SaxParser({
validate : validate,
// Exclude P and LI from DOM parsing since it's treated better by the DOM parser
self_closing_elements: cloneAndExcludeBlocks(schema.getSelfClosingElements()),
cdata: function(text) {
node.append(createNode('#cdata', 4)).value = text;
},
text: function(text, raw) {
var textNode;
// Trim all redundant whitespace on non white space elements
if (!isInWhiteSpacePreservedElement) {
text = text.replace(allWhiteSpaceRegExp, ' ');
if (node.lastChild && blockElements[node.lastChild.name])
text = text.replace(startWhiteSpaceRegExp, '');
}
// Do we need to create the node
if (text.length !== 0) {
textNode = createNode('#text', 3);
textNode.raw = !!raw;
node.append(textNode).value = text;
}
},
comment: function(text) {
node.append(createNode('#comment', 8)).value = text;
},
pi: function(name, text) {
node.append(createNode(name, 7)).value = text;
removeWhitespaceBefore(node);
},
doctype: function(text) {
var newNode;
newNode = node.append(createNode('#doctype', 10));
newNode.value = text;
removeWhitespaceBefore(node);
},
start: function(name, attrs, empty) {
var newNode, attrFiltersLen, elementRule, textNode, attrName, text, sibling, parent;
elementRule = validate ? schema.getElementRule(name) : {};
if (elementRule) {
newNode = createNode(elementRule.outputName || name, 1);
newNode.attributes = attrs;
newNode.shortEnded = empty;
node.append(newNode);
// Check if node is valid child of the parent node is the child is
// unknown we don't collect it since it's probably a custom element
parent = children[node.name];
if (parent && children[newNode.name] && !parent[newNode.name])
invalidChildren.push(newNode);
attrFiltersLen = attributeFilters.length;
while (attrFiltersLen--) {
attrName = attributeFilters[attrFiltersLen].name;
if (attrName in attrs.map) {
list = matchedAttributes[attrName];
if (list)
list.push(newNode);
else
matchedAttributes[attrName] = [newNode];
}
}
// Trim whitespace before block
if (blockElements[name])
removeWhitespaceBefore(newNode);
// Change current node if the element wasn't empty i.e not
or
if (!empty)
node = newNode;
// Check if we are inside a whitespace preserved element
if (!isInWhiteSpacePreservedElement && whiteSpaceElements[name]) {
isInWhiteSpacePreservedElement = true;
}
}
},
end: function(name) {
var textNode, elementRule, text, sibling, tempNode;
elementRule = validate ? schema.getElementRule(name) : {};
if (elementRule) {
if (blockElements[name]) {
if (!isInWhiteSpacePreservedElement) {
// Trim whitespace of the first node in a block
textNode = node.firstChild;
if (textNode && textNode.type === 3) {
text = textNode.value.replace(startWhiteSpaceRegExp, '');
// Any characters left after trim or should we remove it
if (text.length > 0) {
textNode.value = text;
textNode = textNode.next;
} else {
sibling = textNode.next;
textNode.remove();
textNode = sibling;
}
// Remove any pure whitespace siblings
while (textNode && textNode.type === 3) {
text = textNode.value;
sibling = textNode.next;
if (text.length === 0 || isAllWhiteSpaceRegExp.test(text)) {
textNode.remove();
textNode = sibling;
}
textNode = sibling;
}
}
// Trim whitespace of the last node in a block
textNode = node.lastChild;
if (textNode && textNode.type === 3) {
text = textNode.value.replace(endWhiteSpaceRegExp, '');
// Any characters left after trim or should we remove it
if (text.length > 0) {
textNode.value = text;
textNode = textNode.prev;
} else {
sibling = textNode.prev;
textNode.remove();
textNode = sibling;
}
// Remove any pure whitespace siblings
while (textNode && textNode.type === 3) {
text = textNode.value;
sibling = textNode.prev;
if (text.length === 0 || isAllWhiteSpaceRegExp.test(text)) {
textNode.remove();
textNode = sibling;
}
textNode = sibling;
}
}
}
// Trim start white space
// Removed due to: #5424
/*textNode = node.prev;
if (textNode && textNode.type === 3) {
text = textNode.value.replace(startWhiteSpaceRegExp, '');
if (text.length > 0)
textNode.value = text;
else
textNode.remove();
}*/
}
// Check if we exited a whitespace preserved element
if (isInWhiteSpacePreservedElement && whiteSpaceElements[name]) {
isInWhiteSpacePreservedElement = false;
}
// Handle empty nodes
if (elementRule.removeEmpty || elementRule.paddEmpty) {
if (node.isEmpty(nonEmptyElements)) {
if (elementRule.paddEmpty)
node.empty().append(new Node('#text', '3')).value = '\u00a0';
else {
// Leave nodes that have a name like
if (!node.attributes.map.name && !node.attributes.map.id) {
tempNode = node.parent;
node.empty().remove();
node = tempNode;
return;
}
}
}
}
node = node.parent;
}
}
}, schema);
rootNode = node = new Node(args.context || settings.root_name, 11);
parser.parse(html);
// Fix invalid children or report invalid children in a contextual parsing
if (validate && invalidChildren.length) {
if (!args.context)
fixInvalidChildren(invalidChildren);
else
args.invalid = true;
}
// Wrap nodes in the root into block elements if the root is body
if (rootBlockName && rootNode.name == 'body')
addRootBlocks();
// Run filters only when the contents is valid
if (!args.invalid) {
// Run node filters
for (name in matchedNodes) {
list = nodeFilters[name];
nodes = matchedNodes[name];
// Remove already removed children
fi = nodes.length;
while (fi--) {
if (!nodes[fi].parent)
nodes.splice(fi, 1);
}
for (i = 0, l = list.length; i < l; i++)
list[i](nodes, name, args);
}
// Run attribute filters
for (i = 0, l = attributeFilters.length; i < l; i++) {
list = attributeFilters[i];
if (list.name in matchedAttributes) {
nodes = matchedAttributes[list.name];
// Remove already removed children
fi = nodes.length;
while (fi--) {
if (!nodes[fi].parent)
nodes.splice(fi, 1);
}
for (fi = 0, fl = list.callbacks.length; fi < fl; fi++)
list.callbacks[fi](nodes, list.name, args);
}
}
}
return rootNode;
};
// Remove
at end of block elements Gecko and WebKit injects BR elements to
// make it possible to place the caret inside empty blocks. This logic tries to remove
// these elements and keep br elements that where intended to be there intact
if (settings.remove_trailing_brs) {
self.addNodeFilter('br', function(nodes, name) {
var i, l = nodes.length, node, blockElements = tinymce.extend({}, schema.getBlockElements()),
nonEmptyElements = schema.getNonEmptyElements(), parent, lastParent, prev, prevName;
// Remove brs from body element as well
blockElements.body = 1;
// Must loop forwards since it will otherwise remove all brs in a
for (i = 0; i < l; i++) {
node = nodes[i];
parent = node.parent;
if (blockElements[node.parent.name] && node === parent.lastChild) {
// Loop all nodes to the left of the current node and check for other BR elements
// excluding bookmarks since they are invisible
prev = node.prev;
while (prev) {
prevName = prev.name;
// Ignore bookmarks
if (prevName !== "span" || prev.attr('data-mce-type') !== 'bookmark') {
// Found a non BR element
if (prevName !== "br")
break;
// Found another br it's a
structure then don't remove anything
if (prevName === 'br') {
node = null;
break;
}
}
prev = prev.prev;
}
if (node) {
node.remove();
// Is the parent to be considered empty after we removed the BR
if (parent.isEmpty(nonEmptyElements)) {
elementRule = schema.getElementRule(parent.name);
// Remove or padd the element depending on schema rule
if (elementRule) {
if (elementRule.removeEmpty)
parent.remove();
else if (elementRule.paddEmpty)
parent.empty().append(new tinymce.html.Node('#text', 3)).value = '\u00a0';
}
}
}
} else {
// Replaces BR elements inside inline elements like
so they become
lastParent = node;
while (parent.firstChild === lastParent && parent.lastChild === lastParent) {
lastParent = parent;
if (blockElements[parent.name]) {
break;
}
parent = parent.parent;
}
if (lastParent === parent) {
textNode = new tinymce.html.Node('#text', 3);
textNode.value = '\u00a0';
node.replace(textNode);
}
}
}
});
}
// Force anchor names closed, unless the setting "allow_html_in_named_anchor" is explicitly included.
if (!settings.allow_html_in_named_anchor) {
self.addAttributeFilter('id,name', function(nodes, name) {
var i = nodes.length, sibling, prevSibling, parent, node;
while (i--) {
node = nodes[i];
if (node.name === 'a' && node.firstChild && !node.attr('href')) {
parent = node.parent;
// Move children after current node
sibling = node.lastChild;
do {
prevSibling = sibling.prev;
parent.insert(sibling, node);
sibling = prevSibling;
} while (sibling);
}
}
});
}
}
})(tinymce);