/** * DomParser.js * * Copyright, Moxiecode Systems AB * Released under LGPL License. * * License: http://www.tinymce.com/license * Contributing: http://www.tinymce.com/contributing */ (function(tinymce) { var Node = tinymce.html.Node; /** * This class parses HTML code into a DOM like structure of nodes it will remove redundant whitespace and make * sure that the node tree is valid according to the specified schema. So for example:

a

b

c

will become

a

b

c

* * @example * var parser = new tinymce.html.DomParser({validate: true}, schema); * var rootNode = parser.parse('

content

'); * * @class tinymce.html.DomParser * @version 3.4 */ /** * Constructs a new DomParser instance. * * @constructor * @method DomParser * @param {Object} settings Name/value collection of settings. comment, cdata, text, start and end are callbacks. * @param {tinymce.html.Schema} schema HTML Schema class to use when parsing. */ tinymce.html.DomParser = function(settings, schema) { var self = this, nodeFilters = {}, attributeFilters = [], matchedNodes = {}, matchedAttributes = {}; settings = settings || {}; settings.validate = "validate" in settings ? settings.validate : true; settings.root_name = settings.root_name || 'body'; self.schema = schema = schema || new tinymce.html.Schema(); function fixInvalidChildren(nodes) { var ni, node, parent, parents, newParent, currentNode, tempNode, childNode, i, childClone, nonEmptyElements, nonSplitableElements, textBlockElements, sibling, nextNode; nonSplitableElements = tinymce.makeMap('tr,td,th,tbody,thead,tfoot,table'); nonEmptyElements = schema.getNonEmptyElements(); textBlockElements = schema.getTextBlockElements(); for (ni = 0; ni < nodes.length; ni++) { node = nodes[ni]; // Already removed or fixed if (!node.parent || node.fixed) continue; // If the invalid element is a text block and the text block is within a parent LI element // Then unwrap the first text block and convert other sibling text blocks to LI elements similar to Word/Open Office if (textBlockElements[node.name] && node.parent.name == 'li') { // Move sibling text blocks after LI element sibling = node.next; while (sibling) { if (textBlockElements[sibling.name]) { sibling.name = 'li'; sibling.fixed = true; node.parent.insert(sibling, node.parent); } else { break; } sibling = sibling.next; } // Unwrap current text block node.unwrap(node); continue; } // Get list of all parent nodes until we find a valid parent to stick the child into parents = [node]; for (parent = node.parent; parent && !schema.isValidChild(parent.name, node.name) && !nonSplitableElements[parent.name]; parent = parent.parent) parents.push(parent); // Found a suitable parent if (parent && parents.length > 1) { // Reverse the array since it makes looping easier parents.reverse(); // Clone the related parent and insert that after the moved node newParent = currentNode = self.filterNode(parents[0].clone()); // Start cloning and moving children on the left side of the target node for (i = 0; i < parents.length - 1; i++) { if (schema.isValidChild(currentNode.name, parents[i].name)) { tempNode = self.filterNode(parents[i].clone()); currentNode.append(tempNode); } else tempNode = currentNode; for (childNode = parents[i].firstChild; childNode && childNode != parents[i + 1]; ) { nextNode = childNode.next; tempNode.append(childNode); childNode = nextNode; } currentNode = tempNode; } if (!newParent.isEmpty(nonEmptyElements)) { parent.insert(newParent, parents[0], true); parent.insert(node, newParent); } else { parent.insert(node, parents[0], true); } // Check if the element is empty by looking through it's contents and special treatment for

parent = parents[0]; if (parent.isEmpty(nonEmptyElements) || parent.firstChild === parent.lastChild && parent.firstChild.name === 'br') { parent.empty().remove(); } } else if (node.parent) { // If it's an LI try to find a UL/OL for it or wrap it if (node.name === 'li') { sibling = node.prev; if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) { sibling.append(node); continue; } sibling = node.next; if (sibling && (sibling.name === 'ul' || sibling.name === 'ul')) { sibling.insert(node, sibling.firstChild, true); continue; } node.wrap(self.filterNode(new Node('ul', 1))); continue; } // Try wrapping the element in a DIV if (schema.isValidChild(node.parent.name, 'div') && schema.isValidChild('div', node.name)) { node.wrap(self.filterNode(new Node('div', 1))); } else { // We failed wrapping it, then remove or unwrap it if (node.name === 'style' || node.name === 'script') node.empty().remove(); else node.unwrap(); } } } }; /** * Runs the specified node though the element and attributes filters. * * @param {tinymce.html.Node} Node the node to run filters on. * @return {tinymce.html.Node} The passed in node. */ self.filterNode = function(node) { var i, name, list; // Run element filters if (name in nodeFilters) { list = matchedNodes[name]; if (list) list.push(node); else matchedNodes[name] = [node]; } // Run attribute filters i = attributeFilters.length; while (i--) { name = attributeFilters[i].name; if (name in node.attributes.map) { list = matchedAttributes[name]; if (list) list.push(node); else matchedAttributes[name] = [node]; } } return node; }; /** * Adds a node filter function to the parser, the parser will collect the specified nodes by name * and then execute the callback ones it has finished parsing the document. * * @example * parser.addNodeFilter('p,h1', function(nodes, name) { * for (var i = 0; i < nodes.length; i++) { * console.log(nodes[i].name); * } * }); * @method addNodeFilter * @method {String} name Comma separated list of nodes to collect. * @param {function} callback Callback function to execute once it has collected nodes. */ self.addNodeFilter = function(name, callback) { tinymce.each(tinymce.explode(name), function(name) { var list = nodeFilters[name]; if (!list) nodeFilters[name] = list = []; list.push(callback); }); }; /** * Adds a attribute filter function to the parser, the parser will collect nodes that has the specified attributes * and then execute the callback ones it has finished parsing the document. * * @example * parser.addAttributeFilter('src,href', function(nodes, name) { * for (var i = 0; i < nodes.length; i++) { * console.log(nodes[i].name); * } * }); * @method addAttributeFilter * @method {String} name Comma separated list of nodes to collect. * @param {function} callback Callback function to execute once it has collected nodes. */ self.addAttributeFilter = function(name, callback) { tinymce.each(tinymce.explode(name), function(name) { var i; for (i = 0; i < attributeFilters.length; i++) { if (attributeFilters[i].name === name) { attributeFilters[i].callbacks.push(callback); return; } } attributeFilters.push({name: name, callbacks: [callback]}); }); }; /** * Parses the specified HTML string into a DOM like node tree and returns the result. * * @example * var rootNode = new DomParser({...}).parse('text'); * @method parse * @param {String} html Html string to sax parse. * @param {Object} args Optional args object that gets passed to all filter functions. * @return {tinymce.html.Node} Root node containing the tree. */ self.parse = function(html, args) { var parser, rootNode, node, nodes, i, l, fi, fl, list, name, validate, blockElements, startWhiteSpaceRegExp, invalidChildren = [], isInWhiteSpacePreservedElement, endWhiteSpaceRegExp, allWhiteSpaceRegExp, isAllWhiteSpaceRegExp, whiteSpaceElements, children, nonEmptyElements, rootBlockName; args = args || {}; matchedNodes = {}; matchedAttributes = {}; blockElements = tinymce.extend(tinymce.makeMap('script,style,head,html,body,title,meta,param'), schema.getBlockElements()); nonEmptyElements = schema.getNonEmptyElements(); children = schema.children; validate = settings.validate; rootBlockName = "forced_root_block" in args ? args.forced_root_block : settings.forced_root_block; whiteSpaceElements = schema.getWhiteSpaceElements(); startWhiteSpaceRegExp = /^[ \t\r\n]+/; endWhiteSpaceRegExp = /[ \t\r\n]+$/; allWhiteSpaceRegExp = /[ \t\r\n]+/g; isAllWhiteSpaceRegExp = /^[ \t\r\n]+$/; function addRootBlocks() { var node = rootNode.firstChild, next, rootBlockNode; while (node) { next = node.next; if (node.type == 3 || (node.type == 1 && node.name !== 'p' && !blockElements[node.name] && !node.attr('data-mce-type'))) { if (!rootBlockNode) { // Create a new root block element rootBlockNode = createNode(rootBlockName, 1); rootNode.insert(rootBlockNode, node); rootBlockNode.append(node); } else rootBlockNode.append(node); } else { rootBlockNode = null; } node = next; }; }; function createNode(name, type) { var node = new Node(name, type), list; if (name in nodeFilters) { list = matchedNodes[name]; if (list) list.push(node); else matchedNodes[name] = [node]; } return node; }; function removeWhitespaceBefore(node) { var textNode, textVal, sibling; for (textNode = node.prev; textNode && textNode.type === 3; ) { textVal = textNode.value.replace(endWhiteSpaceRegExp, ''); if (textVal.length > 0) { textNode.value = textVal; textNode = textNode.prev; } else { sibling = textNode.prev; textNode.remove(); textNode = sibling; } } }; function cloneAndExcludeBlocks(input) { var name, output = {}; for (name in input) { if (name !== 'li' && name != 'p') { output[name] = input[name]; } } return output; }; parser = new tinymce.html.SaxParser({ validate : validate, // Exclude P and LI from DOM parsing since it's treated better by the DOM parser self_closing_elements: cloneAndExcludeBlocks(schema.getSelfClosingElements()), cdata: function(text) { node.append(createNode('#cdata', 4)).value = text; }, text: function(text, raw) { var textNode; // Trim all redundant whitespace on non white space elements if (!isInWhiteSpacePreservedElement) { text = text.replace(allWhiteSpaceRegExp, ' '); if (node.lastChild && blockElements[node.lastChild.name]) text = text.replace(startWhiteSpaceRegExp, ''); } // Do we need to create the node if (text.length !== 0) { textNode = createNode('#text', 3); textNode.raw = !!raw; node.append(textNode).value = text; } }, comment: function(text) { node.append(createNode('#comment', 8)).value = text; }, pi: function(name, text) { node.append(createNode(name, 7)).value = text; removeWhitespaceBefore(node); }, doctype: function(text) { var newNode; newNode = node.append(createNode('#doctype', 10)); newNode.value = text; removeWhitespaceBefore(node); }, start: function(name, attrs, empty) { var newNode, attrFiltersLen, elementRule, textNode, attrName, text, sibling, parent; elementRule = validate ? schema.getElementRule(name) : {}; if (elementRule) { newNode = createNode(elementRule.outputName || name, 1); newNode.attributes = attrs; newNode.shortEnded = empty; node.append(newNode); // Check if node is valid child of the parent node is the child is // unknown we don't collect it since it's probably a custom element parent = children[node.name]; if (parent && children[newNode.name] && !parent[newNode.name]) invalidChildren.push(newNode); attrFiltersLen = attributeFilters.length; while (attrFiltersLen--) { attrName = attributeFilters[attrFiltersLen].name; if (attrName in attrs.map) { list = matchedAttributes[attrName]; if (list) list.push(newNode); else matchedAttributes[attrName] = [newNode]; } } // Trim whitespace before block if (blockElements[name]) removeWhitespaceBefore(newNode); // Change current node if the element wasn't empty i.e not
or if (!empty) node = newNode; // Check if we are inside a whitespace preserved element if (!isInWhiteSpacePreservedElement && whiteSpaceElements[name]) { isInWhiteSpacePreservedElement = true; } } }, end: function(name) { var textNode, elementRule, text, sibling, tempNode; elementRule = validate ? schema.getElementRule(name) : {}; if (elementRule) { if (blockElements[name]) { if (!isInWhiteSpacePreservedElement) { // Trim whitespace of the first node in a block textNode = node.firstChild; if (textNode && textNode.type === 3) { text = textNode.value.replace(startWhiteSpaceRegExp, ''); // Any characters left after trim or should we remove it if (text.length > 0) { textNode.value = text; textNode = textNode.next; } else { sibling = textNode.next; textNode.remove(); textNode = sibling; } // Remove any pure whitespace siblings while (textNode && textNode.type === 3) { text = textNode.value; sibling = textNode.next; if (text.length === 0 || isAllWhiteSpaceRegExp.test(text)) { textNode.remove(); textNode = sibling; } textNode = sibling; } } // Trim whitespace of the last node in a block textNode = node.lastChild; if (textNode && textNode.type === 3) { text = textNode.value.replace(endWhiteSpaceRegExp, ''); // Any characters left after trim or should we remove it if (text.length > 0) { textNode.value = text; textNode = textNode.prev; } else { sibling = textNode.prev; textNode.remove(); textNode = sibling; } // Remove any pure whitespace siblings while (textNode && textNode.type === 3) { text = textNode.value; sibling = textNode.prev; if (text.length === 0 || isAllWhiteSpaceRegExp.test(text)) { textNode.remove(); textNode = sibling; } textNode = sibling; } } } // Trim start white space // Removed due to: #5424 /*textNode = node.prev; if (textNode && textNode.type === 3) { text = textNode.value.replace(startWhiteSpaceRegExp, ''); if (text.length > 0) textNode.value = text; else textNode.remove(); }*/ } // Check if we exited a whitespace preserved element if (isInWhiteSpacePreservedElement && whiteSpaceElements[name]) { isInWhiteSpacePreservedElement = false; } // Handle empty nodes if (elementRule.removeEmpty || elementRule.paddEmpty) { if (node.isEmpty(nonEmptyElements)) { if (elementRule.paddEmpty) node.empty().append(new Node('#text', '3')).value = '\u00a0'; else { // Leave nodes that have a name like if (!node.attributes.map.name && !node.attributes.map.id) { tempNode = node.parent; node.empty().remove(); node = tempNode; return; } } } } node = node.parent; } } }, schema); rootNode = node = new Node(args.context || settings.root_name, 11); parser.parse(html); // Fix invalid children or report invalid children in a contextual parsing if (validate && invalidChildren.length) { if (!args.context) fixInvalidChildren(invalidChildren); else args.invalid = true; } // Wrap nodes in the root into block elements if the root is body if (rootBlockName && rootNode.name == 'body') addRootBlocks(); // Run filters only when the contents is valid if (!args.invalid) { // Run node filters for (name in matchedNodes) { list = nodeFilters[name]; nodes = matchedNodes[name]; // Remove already removed children fi = nodes.length; while (fi--) { if (!nodes[fi].parent) nodes.splice(fi, 1); } for (i = 0, l = list.length; i < l; i++) list[i](nodes, name, args); } // Run attribute filters for (i = 0, l = attributeFilters.length; i < l; i++) { list = attributeFilters[i]; if (list.name in matchedAttributes) { nodes = matchedAttributes[list.name]; // Remove already removed children fi = nodes.length; while (fi--) { if (!nodes[fi].parent) nodes.splice(fi, 1); } for (fi = 0, fl = list.callbacks.length; fi < fl; fi++) list.callbacks[fi](nodes, list.name, args); } } } return rootNode; }; // Remove
at end of block elements Gecko and WebKit injects BR elements to // make it possible to place the caret inside empty blocks. This logic tries to remove // these elements and keep br elements that where intended to be there intact if (settings.remove_trailing_brs) { self.addNodeFilter('br', function(nodes, name) { var i, l = nodes.length, node, blockElements = tinymce.extend({}, schema.getBlockElements()), nonEmptyElements = schema.getNonEmptyElements(), parent, lastParent, prev, prevName; // Remove brs from body element as well blockElements.body = 1; // Must loop forwards since it will otherwise remove all brs in

a

for (i = 0; i < l; i++) { node = nodes[i]; parent = node.parent; if (blockElements[node.parent.name] && node === parent.lastChild) { // Loop all nodes to the left of the current node and check for other BR elements // excluding bookmarks since they are invisible prev = node.prev; while (prev) { prevName = prev.name; // Ignore bookmarks if (prevName !== "span" || prev.attr('data-mce-type') !== 'bookmark') { // Found a non BR element if (prevName !== "br") break; // Found another br it's a

structure then don't remove anything if (prevName === 'br') { node = null; break; } } prev = prev.prev; } if (node) { node.remove(); // Is the parent to be considered empty after we removed the BR if (parent.isEmpty(nonEmptyElements)) { elementRule = schema.getElementRule(parent.name); // Remove or padd the element depending on schema rule if (elementRule) { if (elementRule.removeEmpty) parent.remove(); else if (elementRule.paddEmpty) parent.empty().append(new tinymce.html.Node('#text', 3)).value = '\u00a0'; } } } } else { // Replaces BR elements inside inline elements like

so they become

lastParent = node; while (parent.firstChild === lastParent && parent.lastChild === lastParent) { lastParent = parent; if (blockElements[parent.name]) { break; } parent = parent.parent; } if (lastParent === parent) { textNode = new tinymce.html.Node('#text', 3); textNode.value = '\u00a0'; node.replace(textNode); } } } }); } // Force anchor names closed, unless the setting "allow_html_in_named_anchor" is explicitly included. if (!settings.allow_html_in_named_anchor) { self.addAttributeFilter('id,name', function(nodes, name) { var i = nodes.length, sibling, prevSibling, parent, node; while (i--) { node = nodes[i]; if (node.name === 'a' && node.firstChild && !node.attr('href')) { parent = node.parent; // Move children after current node sibling = node.lastChild; do { prevSibling = sibling.prev; parent.insert(sibling, node); sibling = prevSibling; } while (sibling); } } }); } } })(tinymce);