1 /** 2 * Modified from ckeditor. Process malformed html for kissy editor. 3 * @author yiminghe@gmail.com 4 */ 5 /* 6 Copyright (c) 2003-2010, CKSource - Frederico Knabben. All rights reserved. 7 For licensing, see LICENSE.html or http://ckeditor.com/license 8 */ 9 KISSY.add("editor/core/htmlDataProcessor", function (S, Editor) { 10 11 return { 12 init:function (editor) { 13 var Node = S.Node, 14 UA = S.UA, 15 HtmlParser = S.require("htmlparser"), 16 htmlFilter = new HtmlParser.Filter(), 17 dataFilter = new HtmlParser.Filter(); 18 19 function filterSpan(element) { 20 if (element.getAttribute('class') == 'Apple-style-span' 21 || !(element.attributes.length)) { 22 element.setTagName(null); 23 return undefined; 24 } 25 if (!(element.childNodes.length) && !(element.attributes.length)) { 26 return false; 27 } 28 return undefined; 29 } 30 31 (function () { 32 33 function wrapAsComment(element) { 34 var html = HtmlParser.serialize(element); 35 return new HtmlParser.Comment(protectedSourceMarker + 36 encodeURIComponent(html).replace(/--/g, 37 "%2D%2D")); 38 } 39 40 // 过滤外边来的 html 41 var defaultDataFilterRules = { 42 tagNames:[ 43 [/^\?xml.*$/i, ''], 44 [/^.*namespace.*$/i, ''] 45 ], 46 attributeNames:[ 47 // Event attributes (onXYZ) must not be directly set. They can become 48 // active in the editing area (IE|WebKit). 49 [/^on/, 'ke_on'], 50 [/^lang$/, ''] 51 ], 52 tags:{ 53 script:wrapAsComment, 54 noscript:wrapAsComment, 55 span:filterSpan 56 } 57 }; 58 59 // 将编辑区生成 html 最终化 60 var defaultHtmlFilterRules = { 61 tagNames:[ 62 // Remove the "ke:" namespace prefix. 63 [ ( /^ke:/ ), '' ], 64 // Ignore <?xml:namespace> tags. 65 [ ( /^\?xml:namespace$/ ), '' ] 66 ], 67 tags:{ 68 $:function (element) { 69 var attributes = element.attributes; 70 71 if (attributes.length) { 72 // 先把真正属性去掉,后面会把 _ke_saved 后缀去掉的! 73 // Remove duplicated attributes - #3789. 74 var attributeNames = [ 'name', 'href', 'src' ], 75 savedAttributeName; 76 for (var i = 0; i < attributeNames.length; i++) { 77 savedAttributeName = '_ke_saved_' + attributeNames[ i ]; 78 if (element.getAttribute(savedAttributeName)) { 79 element.removeAttribute(attributeNames[i]); 80 } 81 } 82 } 83 84 return element; 85 }, 86 embed:function (element) { 87 var parent = element.parentNode; 88 // If the <embed> is child of a <object>, copy the width 89 // and height attributes from it. 90 if (parent && parent.nodeName == 'object') { 91 var parentWidth = parent.getAttribute("width"), 92 parentHeight = parent.getAttribute("height"); 93 if (parentWidth) { 94 element.setAttribute("width", parentWidth); 95 } 96 if (parentHeight) { 97 element.setAttribute("width", parentHeight); 98 } 99 } 100 }, 101 102 // Remove empty link but not empty anchor.(#3829) 103 a:function (element) { 104 if (!(element.childNodes.length) && !(element.attributes.length)) { 105 return false; 106 } 107 }, 108 span:filterSpan 109 }, 110 attributes:{ 111 // 清除空style 112 style:function (v) { 113 if (!S.trim(v)) { 114 return false; 115 } 116 } 117 }, 118 attributeNames:[ 119 // 把保存的作为真正的属性,替换掉原来的 120 // replace(/^_ke_saved_/,"") 121 // _ke_saved_href -> href 122 [ ( /^_ke_saved_/ ), '' ], 123 [ ( /^ke_on/ ), 'on' ], 124 [ ( /^_ke.*/ ), '' ], 125 [ ( /^ke:.*$/ ), '' ], 126 // kissy 相关 127 [ ( /^_ks.*/ ), '' ] 128 ], 129 comment:function (contents) { 130 // If this is a comment for protected source. 131 if (contents.substr(0, protectedSourceMarker.length) == protectedSourceMarker) { 132 contents = S.trim(decodeURIComponent(contents.substr(protectedSourceMarker.length))); 133 return HtmlParser.parse(contents).childNodes[0]; 134 } 135 } 136 }; 137 if (UA['ie']) { 138 // IE outputs style attribute in capital letters. We should convert 139 // them back to lower case. 140 // bug: style='background:url(www.G.cn)' => style='background:url(www.g.cn)' 141 // 只对 propertyName 小写 142 defaultHtmlFilterRules.attributes.style = function (value // , element 143 ) { 144 return value.replace(/(^|;)([^:]+)/g, function (match) { 145 return match.toLowerCase(); 146 }); 147 }; 148 } 149 150 htmlFilter.addRules(defaultHtmlFilterRules); 151 dataFilter.addRules(defaultDataFilterRules); 152 })(); 153 154 155 /** 156 * 去除firefox代码末尾自动添加的 <br/> 157 * 以及ie下自动添加的 158 * 以及其他浏览器段落末尾添加的占位符 159 */ 160 (function () { 161 // Regex to scan for at the end of blocks, which are actually placeholders. 162 // Safari transforms the to \xa0. (#4172) 163 var tailNbspRegex = /^[\t\r\n ]*(?: |\xa0)$/; 164 165 // Return the last non-space child node of the block (#4344). 166 function lastNoneSpaceChild(block) { 167 var childNodes = block.childNodes, 168 lastIndex = childNodes.length, 169 last = childNodes[ lastIndex - 1 ]; 170 while (last && last.nodeType == 3 && !S.trim(last.nodeValue)) 171 last = childNodes[ --lastIndex ]; 172 return last; 173 } 174 175 function trimFillers(block, fromSource) { 176 // If the current node is a block, and if we're converting from source or 177 // we're not in IE then search for and remove any tailing BR node. 178 // Also, any at the end of blocks are fillers, remove them as well. 179 // (#2886) 180 var lastChild = lastNoneSpaceChild(block); 181 if (lastChild) { 182 if (( fromSource || !UA['ie'] ) && 183 lastChild.nodeType == 1 && 184 lastChild.nodeName == 'br') { 185 block.removeChild(lastChild); 186 } 187 else if (lastChild.nodeType == 3 && 188 tailNbspRegex.test(lastChild.nodeValue)) { 189 block.removeChild(lastChild); 190 } 191 } 192 } 193 194 function blockNeedsExtension(block) { 195 var lastChild = lastNoneSpaceChild(block); 196 197 return !lastChild 198 || lastChild.nodeType == 1 && 199 lastChild.nodeName == 'br' 200 // Some of the controls in form needs extension too, 201 // to move cursor at the end of the form. (#4791) 202 || block.nodeName == 'form' && 203 lastChild.nodeName == 'input'; 204 } 205 206 function extendBlockForDisplay(block) { 207 trimFillers(block, true); 208 209 if (blockNeedsExtension(block)) { 210 // 任何浏览器都要加空格!否则空表格可能间隙太小,不能容下光标 211 if (UA['ie']) { 212 block.appendChild(new HtmlParser.Text('\xa0')); 213 } else { 214 //其他浏览器需要加空格?? 215 block.appendChild(new HtmlParser.Text(' ')); 216 block.appendChild(new HtmlParser.Tag('br')); 217 } 218 } 219 } 220 221 function extendBlockForOutput(block) { 222 trimFillers(block, false); 223 if (blockNeedsExtension(block)) { 224 block.appendChild(new HtmlParser.Text('\xa0')); 225 } 226 } 227 228 // Find out the list of block-like tags that can contain <br>. 229 var dtd = Editor.XHTML_DTD; 230 var blockLikeTags = S.merge( 231 dtd.$block, 232 dtd.$listItem, 233 dtd.$tableContent), i; 234 for (i in blockLikeTags) { 235 if (blockLikeTags.hasOwnProperty(i)) { 236 if (!( 'br' in dtd[i] )) { 237 delete blockLikeTags[i]; 238 } 239 } 240 } 241 242 // table 布局需要,不要自动往 td 中加东西 243 delete blockLikeTags.td; 244 245 // We just avoid filler in <pre> right now. 246 // TODO: Support filler for <pre>, line break is also occupy line height. 247 delete blockLikeTags.pre; 248 var defaultDataBlockFilterRules = { tags:{} }; 249 var defaultHtmlBlockFilterRules = { tags:{} }; 250 251 for (i in blockLikeTags) { 252 if (blockLikeTags.hasOwnProperty(i)) { 253 defaultDataBlockFilterRules.tags[ i ] = extendBlockForDisplay; 254 defaultHtmlBlockFilterRules.tags[ i ] = extendBlockForOutput; 255 } 256 } 257 dataFilter.addRules(defaultDataBlockFilterRules); 258 htmlFilter.addRules(defaultHtmlBlockFilterRules); 259 })(); 260 261 262 // htmlparser fragment 中的 entities 处理 263 // el.innerHTML=" " 264 // http://yiminghe.javaeye.com/blog/788929 265 htmlFilter.addRules({ 266 text:function (text) { 267 return text 268 //.replace(/ /g, "\xa0") 269 .replace(/\xa0/g, " "); 270 } 271 }); 272 273 274 var protectElementRegex = /<(a|area|img|input)\b([^>]*)>/gi, 275 protectAttributeRegex = /\b(href|src|name)\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|(?:[^ "'>]+))/gi; 276 // ie 6-7 会将 关于 url 的 content value 替换为 dom value 277 // #a -> http://xxx/#a 278 // ../x.html -> http://xx/x.html 279 function protectAttributes(html) { 280 return html.replace(protectElementRegex, function (element, tag, attributes) { 281 return '<' + tag + attributes.replace(protectAttributeRegex, function (fullAttr, attrName) { 282 // We should not rewrite the existed protected attributes, 283 // e.g. clipboard content from editor. (#5218) 284 if (attributes.indexOf('_ke_saved_' + attrName) == -1) { 285 return ' _ke_saved_' + fullAttr + ' ' + fullAttr; 286 } 287 return fullAttr; 288 }) + '>'; 289 }); 290 } 291 292 var protectedSourceMarker = '{ke_protected}'; 293 294 var protectElementsRegex = /(?:<style[^>]*>[\s\S]*<\/style>)|(?:<(:?link|meta|base)[^>]*>)/gi, 295 encodedElementsRegex = /<ke:encoded>([^<]*)<\/ke:encoded>/gi; 296 297 var protectElementNamesRegex = /(<\/?)((?:object|embed|param|html|body|head|title|script|noscript)[^>]*>)/gi, 298 unprotectElementNamesRegex = /(<\/?)ke:((?:object|embed|param|html|body|head|title|script|noscript)[^>]*>)/gi; 299 300 var protectSelfClosingRegex = /<ke:(param|embed)([^>]*?)\/?>(?!\s*<\/ke:\1)/gi; 301 302 function protectSelfClosingElements(html) { 303 return html.replace(protectSelfClosingRegex, '<ke:$1$2></ke:$1>'); 304 } 305 306 function protectElements(html) { 307 return html.replace(protectElementsRegex, function (match) { 308 return '<ke:encoded>' + encodeURIComponent(match) + '</ke:encoded>'; 309 }); 310 } 311 312 function unprotectElements(html) { 313 return html.replace(encodedElementsRegex, function (match, encoded) { 314 return decodeURIComponent(encoded); 315 }); 316 } 317 318 function protectElementsNames(html) { 319 return html.replace(protectElementNamesRegex, '$1ke:$2'); 320 } 321 322 function unprotectElementNames(html) { 323 return html.replace(unprotectElementNamesRegex, '$1$2'); 324 } 325 326 editor.htmlDataProcessor = { 327 dataFilter:dataFilter, 328 htmlFilter:htmlFilter, 329 // 编辑器 html 到外部 html 330 // fixForBody , <body>t</body> => <body><p>t</p></body> 331 toHtml:function (html) { 332 // fixForBody = fixForBody || "p"; 333 // Now use our parser to make further fixes to the structure, as 334 // well as apply the filter. 335 //使用 htmlWriter 界面美观,加入额外文字节点\n,\t空白等 336 var writer = new HtmlParser.BeautifyWriter(), 337 n = new HtmlParser.Parser(html).parse(); 338 n.writeHtml(writer, htmlFilter); 339 html = writer.getHtml(); 340 return html; 341 }, 342 // 外部html进入编辑器 343 toDataFormat:function (html, _dataFilter) { 344 //可以传 wordFilter 或 dataFilter 345 _dataFilter = _dataFilter || dataFilter; 346 347 html = protectAttributes(html); 348 349 // Protect elements than can't be set inside a DIV. E.g. IE removes 350 // style tags from innerHTML. (#3710) 351 html = protectElements(html); 352 353 // Certain elements has problem to go through DOM operation, protect 354 // them by prefixing 'ke' namespace. (#3591) 355 html = protectElementsNames(html); 356 357 // All none-IE browsers ignore self-closed custom elements, 358 // protecting them into open-close. (#3591) 359 html = protectSelfClosingElements(html); 360 361 // 标签不合法可能 parser 出错,这里先用浏览器帮我们建立棵合法的 dom 树的 html 362 // Call the browser to help us fixing a possibly invalid HTML 363 // structure. 364 var div = new Node("<div>"); 365 // Add fake character to workaround IE comments bug. (#3801) 366 div.html('a' + html); 367 html = div.html().substr(1); 368 369 // Unprotect "some" of the protected elements at this point. 370 html = unprotectElementNames(html); 371 372 html = unprotectElements(html); 373 374 // fixForBody = fixForBody || "p"; 375 // bug:qc #3710:使用 basicWriter ,去除无用的文字节点,标签间连续\n空白等 376 377 var writer = new HtmlParser.BasicWriter(), 378 n = new HtmlParser.Parser(html).parse(); 379 380 n.writeHtml(writer, _dataFilter); 381 382 html = writer.getHtml(); 383 384 return html; 385 }, 386 /* 387 最精简html传送到server 388 */ 389 toServer:function (html) { 390 var writer = new HtmlParser.MinifyWriter(), 391 n = new HtmlParser.Parser(html).parse(); 392 n.writeHtml(writer, htmlFilter); 393 return writer.getHtml(); 394 } 395 }; 396 } 397 }; 398 }, { 399 requires:['./base'] 400 }); 401