/** * Serializer.js * * Copyright 2009, Moxiecode Systems AB * Released under LGPL License. * * License: http://tinymce.moxiecode.com/license * Contributing: http://tinymce.moxiecode.com/contributing */ (function(tinymce) { // Shorten names var extend = tinymce.extend, each = tinymce.each, Dispatcher = tinymce.util.Dispatcher, isIE = tinymce.isIE, isGecko = tinymce.isGecko; function wildcardToRE(s) { return s.replace(/([?+*])/g, '.$1'); }; /** * This class is used to serialize DOM trees into a string. * Consult the TinyMCE Wiki API for more details and examples on how to use this class. * @class tinymce.dom.Serializer */ tinymce.create('tinymce.dom.Serializer', { /** * Constucts a new DOM serializer class. * * @constructor * @method Serializer * @param {Object} s Optional name/Value collection of settings for the serializer. */ Serializer : function(s) { var t = this; t.key = 0; t.onPreProcess = new Dispatcher(t); t.onPostProcess = new Dispatcher(t); try { t.writer = new tinymce.dom.XMLWriter(); } catch (ex) { // IE might throw exception if ActiveX is disabled so we then switch to the slightly slower StringWriter t.writer = new tinymce.dom.StringWriter(); } // Default settings t.settings = s = extend({ dom : tinymce.DOM, valid_nodes : 0, node_filter : 0, attr_filter : 0, invalid_attrs : /^(_mce_|_moz_|sizset|sizcache)/, closed : /^(br|hr|input|meta|img|link|param|area)$/, entity_encoding : 'named', entities : '160,nbsp,161,iexcl,162,cent,163,pound,164,curren,165,yen,166,brvbar,167,sect,168,uml,169,copy,170,ordf,171,laquo,172,not,173,shy,174,reg,175,macr,176,deg,177,plusmn,178,sup2,179,sup3,180,acute,181,micro,182,para,183,middot,184,cedil,185,sup1,186,ordm,187,raquo,188,frac14,189,frac12,190,frac34,191,iquest,192,Agrave,193,Aacute,194,Acirc,195,Atilde,196,Auml,197,Aring,198,AElig,199,Ccedil,200,Egrave,201,Eacute,202,Ecirc,203,Euml,204,Igrave,205,Iacute,206,Icirc,207,Iuml,208,ETH,209,Ntilde,210,Ograve,211,Oacute,212,Ocirc,213,Otilde,214,Ouml,215,times,216,Oslash,217,Ugrave,218,Uacute,219,Ucirc,220,Uuml,221,Yacute,222,THORN,223,szlig,224,agrave,225,aacute,226,acirc,227,atilde,228,auml,229,aring,230,aelig,231,ccedil,232,egrave,233,eacute,234,ecirc,235,euml,236,igrave,237,iacute,238,icirc,239,iuml,240,eth,241,ntilde,242,ograve,243,oacute,244,ocirc,245,otilde,246,ouml,247,divide,248,oslash,249,ugrave,250,uacute,251,ucirc,252,uuml,253,yacute,254,thorn,255,yuml,402,fnof,913,Alpha,914,Beta,915,Gamma,916,Delta,917,Epsilon,918,Zeta,919,Eta,920,Theta,921,Iota,922,Kappa,923,Lambda,924,Mu,925,Nu,926,Xi,927,Omicron,928,Pi,929,Rho,931,Sigma,932,Tau,933,Upsilon,934,Phi,935,Chi,936,Psi,937,Omega,945,alpha,946,beta,947,gamma,948,delta,949,epsilon,950,zeta,951,eta,952,theta,953,iota,954,kappa,955,lambda,956,mu,957,nu,958,xi,959,omicron,960,pi,961,rho,962,sigmaf,963,sigma,964,tau,965,upsilon,966,phi,967,chi,968,psi,969,omega,977,thetasym,978,upsih,982,piv,8226,bull,8230,hellip,8242,prime,8243,Prime,8254,oline,8260,frasl,8472,weierp,8465,image,8476,real,8482,trade,8501,alefsym,8592,larr,8593,uarr,8594,rarr,8595,darr,8596,harr,8629,crarr,8656,lArr,8657,uArr,8658,rArr,8659,dArr,8660,hArr,8704,forall,8706,part,8707,exist,8709,empty,8711,nabla,8712,isin,8713,notin,8715,ni,8719,prod,8721,sum,8722,minus,8727,lowast,8730,radic,8733,prop,8734,infin,8736,ang,8743,and,8744,or,8745,cap,8746,cup,8747,int,8756,there4,8764,sim,8773,cong,8776,asymp,8800,ne,8801,equiv,8804,le,8805,ge,8834,sub,8835,sup,8836,nsub,8838,sube,8839,supe,8853,oplus,8855,otimes,8869,perp,8901,sdot,8968,lceil,8969,rceil,8970,lfloor,8971,rfloor,9001,lang,9002,rang,9674,loz,9824,spades,9827,clubs,9829,hearts,9830,diams,338,OElig,339,oelig,352,Scaron,353,scaron,376,Yuml,710,circ,732,tilde,8194,ensp,8195,emsp,8201,thinsp,8204,zwnj,8205,zwj,8206,lrm,8207,rlm,8211,ndash,8212,mdash,8216,lsquo,8217,rsquo,8218,sbquo,8220,ldquo,8221,rdquo,8222,bdquo,8224,dagger,8225,Dagger,8240,permil,8249,lsaquo,8250,rsaquo,8364,euro', valid_elements : '*[*]', extended_valid_elements : 0, invalid_elements : 0, fix_table_elements : 1, fix_list_elements : true, fix_content_duplication : true, convert_fonts_to_spans : false, font_size_classes : 0, apply_source_formatting : 0, indent_mode : 'simple', indent_char : '\t', indent_levels : 1, remove_linebreaks : 1, remove_redundant_brs : 1, element_format : 'xhtml' }, s); t.dom = s.dom; t.schema = s.schema; // Use raw entities if no entities are defined if (s.entity_encoding == 'named' && !s.entities) s.entity_encoding = 'raw'; if (s.remove_redundant_brs) { t.onPostProcess.add(function(se, o) { // Remove single BR at end of block elements since they get rendered o.content = o.content.replace(/(
\s*)+<\/(p|h[1-6]|div|li)>/gi, function(a, b, c) { // Check if it's a single element if (/^
\s*<\//.test(a)) return ''; return a; }); }); } // Remove XHTML element endings i.e. produce crap :) XHTML is better if (s.element_format == 'html') { t.onPostProcess.add(function(se, o) { o.content = o.content.replace(/<([^>]+) \/>/g, '<$1>'); }); } if (s.fix_list_elements) { t.onPreProcess.add(function(se, o) { var nl, x, a = ['ol', 'ul'], i, n, p, r = /^(OL|UL)$/, np; function prevNode(e, n) { var a = n.split(','), i; while ((e = e.previousSibling) != null) { for (i=0; i= 1767) { each(t.dom.select('p table', o.node).reverse(), function(n) { var parent = t.dom.getParent(n.parentNode, 'table,p'); if (parent.nodeName != 'TABLE') { try { t.dom.split(parent, n); } catch (ex) { // IE can sometimes fire an unknown runtime error so we just ignore it } } }); } }); } }, /** * Sets a list of entities to use for the named entity encoded. * * @method setEntities * @param {String} s List of entities in the following format: number,name,.... */ setEntities : function(s) { var t = this, a, i, l = {}, v; // No need to setup more than once if (t.entityLookup) return; // Build regex and lookup array a = s.split(','); for (i = 0; i < a.length; i += 2) { v = a[i]; // Don't add default & " etc. if (v == 34 || v == 38 || v == 60 || v == 62) continue; l[String.fromCharCode(a[i])] = a[i + 1]; v = parseInt(a[i]).toString(16); } t.entityLookup = l; }, /** * Sets the valid elements rules of the serializer this enables you to specify things like what elements should be * outputted and what attributes specific elements might have. * Consult the Wiki for more details on this format. * * @method setRules * @param {String} s Valid elements rules string. */ setRules : function(s) { var t = this; t._setup(); t.rules = {}; t.wildRules = []; t.validElements = {}; return t.addRules(s); }, /** * Adds valid elements rules to the serializer this enables you to specify things like what elements should be * outputted and what attributes specific elements might have. * Consult the Wiki for more details on this format. * * @method addRules * @param {String} s Valid elements rules string to add. */ addRules : function(s) { var t = this, dr; if (!s) return; t._setup(); each(s.split(','), function(s) { var p = s.split(/\[|\]/), tn = p[0].split('/'), ra, at, wat, va = []; // Extend with default rules if (dr) at = tinymce.extend([], dr.attribs); // Parse attributes if (p.length > 1) { each(p[1].split('|'), function(s) { var ar = {}, i; at = at || []; // Parse attribute rule s = s.replace(/::/g, '~'); s = /^([!\-])?([\w*.?~_\-]+|)([=:<])?(.+)?$/.exec(s); s[2] = s[2].replace(/~/g, ':'); // Add required attributes if (s[1] == '!') { ra = ra || []; ra.push(s[2]); } // Remove inherited attributes if (s[1] == '-') { for (i = 0; i = 1767)) { // Create an empty HTML document doc = impl.createHTMLDocument(""); // Add the element or it's children if it's a body element to the new document each(n.nodeName == 'BODY' ? n.childNodes : [n], function(node) { doc.body.appendChild(doc.importNode(node, true)); }); // Grab first child or body element for serialization if (n.nodeName != 'BODY') n = doc.body.firstChild; else n = doc.body; // set the new document in DOMUtils so createElement etc works oldDoc = t.dom.doc; t.dom.doc = doc; } t.key = '' + (parseInt(t.key) + 1); // Pre process if (!o.no_events) { o.node = n; t.onPreProcess.dispatch(t, o); } // Serialize HTML DOM into a string t.writer.reset(); t._info = o; t._serializeNode(n, o.getInner); // Post process o.content = t.writer.getContent(); // Restore the old document if it was changed if (oldDoc) t.dom.doc = oldDoc; if (!o.no_events) t.onPostProcess.dispatch(t, o); t._postProcess(o); o.node = null; return tinymce.trim(o.content); }, // Internal functions /** * Indents the specified content object. * * @param {Object} o Content object to indent. */ _postProcess : function(o) { var t = this, s = t.settings, h = o.content, sc = [], p; if (o.format == 'html') { // Protect some elements p = t._protect({ content : h, patterns : [ {pattern : /(]*>)(.*?)(<\/script>)/g}, {pattern : /(]*>)(.*?)(<\/noscript>)/g}, {pattern : /(]*>)(.*?)(<\/style>)/g}, {pattern : /(]*>)(.*?)(<\/pre>)/g, encode : 1}, {pattern : /()/g} ] }); h = p.content; // Entity encode if (s.entity_encoding !== 'raw') h = t._encode(h); // Use BR instead of   padded P elements inside editor and use

 

outside editor /* if (o.set) h = h.replace(/

\s+( | |\u00a0|
)\s+<\/p>/g, '


'); else h = h.replace(/

\s+( | |\u00a0|
)\s+<\/p>/g, '

$1

');*/ // Since Gecko and Safari keeps whitespace in the DOM we need to // remove it inorder to match other browsers. But I think Gecko and Safari is right. // This process is only done when getting contents out from the editor. if (!o.set) { // We need to replace paragraph whitespace with an nbsp before indentation to keep the \u00a0 char h = h.replace(/

\s+<\/p>|]+)>\s+<\/p>/g, s.entity_encoding == 'numeric' ? ' 

' : ' 

'); if (s.remove_linebreaks) { h = h.replace(/\r?\n|\r/g, ' '); h = h.replace(/(<[^>]+>)\s+/g, '$1 '); h = h.replace(/\s+(<\/[^>]+>)/g, ' $1'); h = h.replace(/<(p|h[1-6]|blockquote|hr|div|table|tbody|tr|td|body|head|html|title|meta|style|pre|script|link|object) ([^>]+)>\s+/g, '<$1 $2>'); // Trim block start h = h.replace(/<(p|h[1-6]|blockquote|hr|div|table|tbody|tr|td|body|head|html|title|meta|style|pre|script|link|object)>\s+/g, '<$1>'); // Trim block start h = h.replace(/\s+<\/(p|h[1-6]|blockquote|hr|div|table|tbody|tr|td|body|head|html|title|meta|style|pre|script|link|object)>/g, ''); // Trim block end } // Simple indentation if (s.apply_source_formatting && s.indent_mode == 'simple') { // Add line breaks before and after block elements h = h.replace(/<(\/?)(ul|hr|table|meta|link|tbody|tr|object|body|head|html|map)(|[^>]+)>\s*/g, '\n<$1$2$3>\n'); h = h.replace(/\s*<(p|h[1-6]|blockquote|div|title|style|pre|script|td|li|area)(|[^>]+)>/g, '\n<$1$2>'); h = h.replace(/<\/(p|h[1-6]|blockquote|div|title|style|pre|script|td|li)>\s*/g, '\n'); h = h.replace(/\n\n/g, '\n'); } } h = t._unprotect(h, p); // Restore CDATA sections h = h.replace(//g, ''); // Restore the \u00a0 character if raw mode is enabled if (s.entity_encoding == 'raw') h = h.replace(/

 <\/p>|]+)> <\/p>/g, '\u00a0

'); // Restore noscript elements h = h.replace(/]+|)>([\s\S]*?)<\/noscript>/g, function(v, attribs, text) { return '' + t.dom.decode(text.replace(//g, '')) + ''; }); } o.content = h; }, _serializeNode : function(n, inner) { var t = this, s = t.settings, w = t.writer, hc, el, cn, i, l, a, at, no, v, nn, ru, ar, iv, closed, keep, type; if (!s.node_filter || s.node_filter(n)) { switch (n.nodeType) { case 1: // Element if (n.hasAttribute ? n.hasAttribute('_mce_bogus') : n.getAttribute('_mce_bogus')) return; iv = keep = false; hc = n.hasChildNodes(); nn = n.getAttribute('_mce_name') || n.nodeName.toLowerCase(); // Get internal type type = n.getAttribute('_mce_type'); if (type) { if (!t._info.cleanup) { iv = true; return; } else keep = 1; } // Add correct prefix on IE if (isIE) { if (n.scopeName !== 'HTML' && n.scopeName !== 'html') nn = n.scopeName + ':' + nn; } // Remove mce prefix on IE needed for the abbr element if (nn.indexOf('mce:') === 0) nn = nn.substring(4); // Check if valid if (!keep) { if (!t.validElementsRE || !t.validElementsRE.test(nn) || (t.invalidElementsRE && t.invalidElementsRE.test(nn)) || inner) { iv = true; break; } } if (isIE) { // Fix IE content duplication (DOM can have multiple copies of the same node) if (s.fix_content_duplication) { if (n._mce_serialized == t.key) return; n._mce_serialized = t.key; } // IE sometimes adds a / infront of the node name if (nn.charAt(0) == '/') nn = nn.substring(1); } else if (isGecko) { // Ignore br elements if (n.nodeName === 'BR' && n.getAttribute('type') == '_moz') return; } // Check if valid child if (s.validate_children) { if (t.elementName && !t.schema.isValid(t.elementName, nn)) { iv = true; break; } t.elementName = nn; } ru = t.findRule(nn); // No valid rule for this element could be found then skip it if (!ru) { iv = true; break; } nn = ru.name || nn; closed = s.closed.test(nn); // Skip empty nodes or empty node name in IE if ((!hc && ru.noEmpty) || (isIE && !nn)) { iv = true; break; } // Check required if (ru.requiredAttribs) { a = ru.requiredAttribs; for (i = a.length - 1; i >= 0; i--) { if (this.dom.getAttrib(n, a[i]) !== '') break; } // None of the required was there if (i == -1) { iv = true; break; } } w.writeStartElement(nn); // Add ordered attributes if (ru.attribs) { for (i=0, at = ru.attribs, l = at.length; i-1; i--) { no = at[i]; if (no.specified) { a = no.nodeName.toLowerCase(); if (s.invalid_attrs.test(a) || !ru.validAttribsRE.test(a)) continue; ar = t.findAttribRule(ru, a); v = t._getAttrib(n, ar, a); if (v !== null) w.writeAttribute(a, v); } } } // Keep type attribute if (type && keep) w.writeAttribute('_mce_type', type); // Write text from script if (nn === 'script' && tinymce.trim(n.innerHTML)) { w.writeText('// '); // Padd it with a comment so it will parse on older browsers w.writeCDATA(n.innerHTML.replace(/|<\[CDATA\[|\]\]>/g, '')); // Remove comments and cdata stuctures hc = false; break; } // Padd empty nodes with a   if (ru.padd) { // If it has only one bogus child, padd it anyway workaround for
bug if (hc && (cn = n.firstChild) && cn.nodeType === 1 && n.childNodes.length === 1) { if (cn.hasAttribute ? cn.hasAttribute('_mce_bogus') : cn.getAttribute('_mce_bogus')) w.writeText('\u00a0'); } else if (!hc) w.writeText('\u00a0'); // No children then padd it } break; case 3: // Text // Check if valid child if (s.validate_children && t.elementName && !t.schema.isValid(t.elementName, '#text')) return; return w.writeText(n.nodeValue); case 4: // CDATA return w.writeCDATA(n.nodeValue); case 8: // Comment return w.writeComment(n.nodeValue); } } else if (n.nodeType == 1) hc = n.hasChildNodes(); if (hc && !closed) { cn = n.firstChild; while (cn) { t._serializeNode(cn); t.elementName = nn; cn = cn.nextSibling; } } // Write element end if (!iv) { if (!closed) w.writeFullEndElement(); else w.writeEndElement(); } }, _protect : function(o) { var t = this; o.items = o.items || []; function enc(s) { return s.replace(/[\r\n\\]/g, function(c) { if (c === '\n') return '\\n'; else if (c === '\\') return '\\\\'; return '\\r'; }); }; function dec(s) { return s.replace(/\\[\\rn]/g, function(c) { if (c === '\\n') return '\n'; else if (c === '\\\\') return '\\'; return '\r'; }); }; each(o.patterns, function(p) { o.content = dec(enc(o.content).replace(p.pattern, function(x, a, b, c) { b = dec(b); if (p.encode) b = t._encode(b); o.items.push(b); return a + '' + c; })); }); return o; }, _unprotect : function(h, o) { h = h.replace(/\