1 /** 2 * clean html pasted from word. modified from ckeditor. 3 * @author yiminghe@gmail.com 4 */ 5 KISSY.add("editor/core/dynamic/wordFilter", function (S, KEStyle, HtmlParser) { 6 var $ = S.all, 7 UA = S.UA, 8 dtd = HtmlParser.DTD, 9 wordFilter = new HtmlParser.Filter(), 10 cssLengthRelativeUnit = /^([.\d]*)+(em|ex|px|gd|rem|vw|vh|vm|ch|mm|cm|in|pt|pc|deg|rad|ms|s|hz|khz){1}?/i, 11 // e.g. 0px 0pt 0px 12 emptyMarginRegex = /^(?:\b0[^\s]*\s*){1,4}$/, 13 romanLiteralPattern = '^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$', 14 lowerRomanLiteralRegex = new RegExp(romanLiteralPattern), 15 upperRomanLiteralRegex = new RegExp(romanLiteralPattern.toUpperCase()), 16 orderedPatterns = { 17 'decimal':/\d+/, 18 'lower-roman':lowerRomanLiteralRegex, 19 'upper-roman':upperRomanLiteralRegex, 20 'lower-alpha':/^[a-z]+$/, 21 'upper-alpha':/^[A-Z]+$/ 22 }, 23 unorderedPatterns = { 24 'disc':/[l\u00B7\u2002]/, 25 'circle':/[\u006F\u00D8]/, 26 'square':/[\u006E\u25C6]/ 27 }, 28 listMarkerPatterns = { 29 'ol':orderedPatterns, 30 'ul':unorderedPatterns 31 }, 32 romans = [ 33 [1000, 'M'], 34 [900, 'CM'], 35 [500, 'D'], 36 [400, 'CD'], 37 [100, 'C'], 38 [90, 'XC'], 39 [50, 'L'], 40 [40, 'XL'], 41 [10, 'X'], 42 [9, 'IX'], 43 [5, 'V'], 44 [4, 'IV'], 45 [1, 'I'] 46 ], 47 alphabets = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"; 48 49 // Convert roman numbering back to decimal. 50 function fromRoman(str) { 51 str = str.toUpperCase(); 52 var l = romans.length, retVal = 0; 53 for (var i = 0; i < l; ++i) { 54 for (var j = romans[i], k = j[1].length; str.substr(0, k) == j[1]; str = str.substr(k)) 55 retVal += j[ 0 ]; 56 } 57 return retVal; 58 } 59 60 // Convert alphabet numbering back to decimal. 61 function fromAlphabet(str) { 62 str = str.toUpperCase(); 63 var l = alphabets.length, retVal = 1; 64 for (var x = 1; str.length > 0; x *= l) { 65 retVal += alphabets.indexOf(str.charAt(str.length - 1)) * x; 66 str = str.substr(0, str.length - 1); 67 } 68 return retVal; 69 } 70 71 function setStyle(element, str) { 72 if (str) { 73 element.setAttribute("style", str); 74 } else { 75 element.removeAttribute("style"); 76 } 77 } 78 79 /** 80 * Convert the specified CSS length value to the calculated pixel length inside this page. 81 * <strong>Note:</strong> Percentage based value is left intact. 82 * @param {String} cssLength CSS length value. 83 */ 84 var convertToPx = (function () { 85 var calculator; 86 87 return function (cssLength) { 88 if (!calculator) { 89 calculator = $( 90 '<div style="position:absolute;left:-9999px;' + 91 'top:-9999px;margin:0px;padding:0px;border:0px;"' + 92 '></div>')['prependTo']("body"); 93 94 } 95 96 if (!(/%$/).test(cssLength)) { 97 calculator.css('width', cssLength); 98 return calculator[0].clientWidth; 99 } 100 101 return cssLength; 102 }; 103 })(); 104 105 var listBaseIndent = 0, 106 previousListItemMargin = null, 107 previousListId; 108 109 function onlyChild(elem) { 110 var childNodes = elem.childNodes || [], 111 count = childNodes.length, 112 firstChild = (count == 1) && childNodes[0]; 113 return firstChild || null; 114 } 115 116 function removeAnyChildWithName(elem, tagName) { 117 var children = elem.childNodes || [], 118 ret = [], 119 child; 120 121 for (var i = 0; i < children.length; i++) { 122 child = children[ i ]; 123 if (!child.nodeName) { 124 continue; 125 } 126 if (child.nodeName == tagName) { 127 ret.push(child); 128 children.splice(i--, 1); 129 } 130 ret = ret.concat(removeAnyChildWithName(child, tagName)); 131 } 132 return ret; 133 } 134 135 function getAncestor(elem, tagNameRegex) { 136 var parent = elem.parentNode; 137 while (parent && !( parent.nodeName && parent.nodeName.match(tagNameRegex) )) { 138 parent = parent.parentNode; 139 } 140 return parent; 141 } 142 143 function firstChild(elem, evaluator) { 144 var child, 145 i, 146 children = elem.childNodes || []; 147 148 for (i = 0; i < children.length; i++) { 149 child = children[ i ]; 150 if (evaluator(child)) { 151 return child; 152 } else if (child.nodeName) { 153 child = firstChild(child, evaluator); 154 if (child) { 155 return child; 156 } 157 } 158 } 159 160 return null; 161 } 162 163 164 function addStyle(elem, name, value, isPrepend) { 165 var styleText, addingStyleText = ''; 166 // name/value pair. 167 if (typeof value == 'string') { 168 addingStyleText += name + ':' + value + ';'; 169 } else { 170 // style literal. 171 if (typeof name == 'object') { 172 for (var style in name) { 173 if (name.hasOwnProperty(style)) { 174 addingStyleText += style + ':' + name[ style ] + ';'; 175 } 176 } 177 } 178 // raw style text form. 179 else { 180 addingStyleText += name; 181 } 182 isPrepend = value; 183 } 184 185 186 styleText = elem.getAttribute("style"); 187 188 styleText = ( isPrepend ? 189 [ addingStyleText, styleText ] 190 : [ styleText, addingStyleText ] ).join(';'); 191 192 setStyle(elem, styleText.replace(/^;|;(?=;)/, '')); 193 } 194 195 196 function parentOf(tagName) { 197 var result = {}, 198 tag; 199 for (tag in dtd) { 200 if (tag.indexOf('$') == -1 && dtd[ tag ][ tagName ]) { 201 result[ tag ] = 1; 202 } 203 } 204 return result; 205 } 206 207 var filters = 208 { 209 // Transform a normal list into flat list items only presentation. 210 // E.g. <ul><li>level1<ol><li>level2</li></ol></li> => 211 // <ke:li ke:listtype="ul" ke:indent="1">level1</ke:li> 212 // <ke:li ke:listtype="ol" ke:indent="2">level2</ke:li> 213 flattenList:function (element, level) { 214 level = typeof level == 'number' ? level : 1; 215 216 var listStyleType; 217 218 // All list items are of the same type. 219 switch (element.getAttribute("type")) { 220 case 'a' : 221 listStyleType = 'lower-alpha'; 222 break; 223 case '1' : 224 listStyleType = 'decimal'; 225 break; 226 // TODO: Support more list style type from MS-Word. 227 } 228 229 var children = element.childNodes || [], 230 child; 231 232 for (var i = 0; i < children.length; i++) { 233 child = children[ i ]; 234 235 if (child.nodeName in dtd.$listItem) { 236 var listItemChildren = child.childNodes || [], 237 count = listItemChildren.length, 238 last = listItemChildren[ count - 1 ]; 239 240 // Move out nested list. 241 if (last.nodeName in dtd.$list) { 242 element.insertAfter(child); 243 // Remove the parent list item if it's just a holder. 244 if (!--listItemChildren.length) { 245 element.removeChild(children[i--]); 246 } 247 } 248 249 child.setTagName('ke:li'); 250 251 // Inherit numbering from list root on the first list item. 252 element.getAttribute("start") && 253 !i && 254 ( element.setAttribute("value", element.getAttribute("start"))); 255 256 filters.stylesFilter( 257 [ 258 [ 259 'tab-stops', null, function (val) { 260 var margin = val.split(' ')[ 1 ].match(cssLengthRelativeUnit); 261 margin && ( previousListItemMargin = convertToPx(margin[ 0 ]) ); 262 } 263 ], 264 ( level == 1 ? [ 'mso-list', null, function (val) { 265 val = val.split(' '); 266 var listId = Number(val[ 0 ].match(/\d+/)); 267 if (listId !== previousListId) { 268 child.setAttribute('ke:reset', 1) 269 } 270 previousListId = listId; 271 } ] : null ) 272 ])(child.getAttribute("style")); 273 274 child.setAttribute('ke:indent', level); 275 child.setAttribute('ke:listtype', element.nodeName); 276 child.setAttribute('ke:list-style-type', listStyleType); 277 } 278 // Flatten sub list. 279 else if (child.nodeName in dtd.$list) { 280 // Absorb sub list children. 281 arguments.callee.apply(this, [ child, level + 1 ]); 282 children = children.slice(0, i).concat(child.childNodes).concat(children.slice(i + 1)); 283 element.empty(); 284 for (var j = 0, num = children.length; j < num; j++) 285 element.appendChild(children[j]); 286 } 287 } 288 289 element.nodeName = element.tagName = null; 290 291 // We're loosing tag name here, signalize this element as a list. 292 element.setAttribute('ke:list', 1); 293 }, 294 295 /** 296 * Try to collect all list items among the children and establish one 297 * or more HTML list structures for them. 298 * @param element 299 */ 300 assembleList:function (element) { 301 var children = element.childNodes || [], 302 child, 303 listItem, // The current processing ke:li element. 304 listItemIndent, // Indent level of current list item. 305 lastIndent, 306 lastListItem, // The previous one just been added to the list. 307 list, // Current staging list and it's parent list if any. 308 openedLists = [], 309 previousListStyleType, 310 previousListType; 311 312 // Properties of the list item are to be resolved from the list bullet. 313 var bullet, 314 listType, 315 listStyleType, 316 itemNumeric; 317 318 for (var i = 0; i < children.length; i++) { 319 child = children[ i ]; 320 321 if ('ke:li' == child.nodeName) { 322 child.setTagName('li'); 323 listItem = child; 324 325 bullet = listItem.getAttribute('ke:listsymbol'); 326 bullet = bullet && bullet.match(/^(?:[(]?)([^\s]+?)([.)]?)$/); 327 listType = listStyleType = itemNumeric = null; 328 329 if (listItem.getAttribute('ke:ignored')) { 330 children.splice(i--, 1); 331 continue; 332 } 333 334 335 // This's from a new list root. 336 listItem.getAttribute('ke:reset') && ( list = lastIndent = lastListItem = null ); 337 338 // List item indent level might come from a real list indentation or 339 // been resolved from a pseudo list item's margin value, even get 340 // no indentation at all. 341 listItemIndent = Number(listItem.getAttribute('ke:indent')); 342 343 // We're moving out of the current list, cleaning up. 344 if (listItemIndent != lastIndent) 345 previousListType = previousListStyleType = null; 346 347 // List type and item style are already resolved. 348 if (!bullet) { 349 listType = listItem.getAttribute('ke:listtype') || 'ol'; 350 listStyleType = listItem.getAttribute('ke:list-style-type'); 351 } 352 else { 353 // Probably share the same list style type with previous list item, 354 // give it priority to avoid ambiguous between C(Alpha) and C.(Roman). 355 if (previousListType && 356 listMarkerPatterns[ previousListType ] [ previousListStyleType ].test(bullet[ 1 ])) { 357 listType = previousListType; 358 listStyleType = previousListStyleType; 359 } 360 else { 361 for (var type in listMarkerPatterns) { 362 for (var style in listMarkerPatterns[ type ]) { 363 if (listMarkerPatterns[ type ][ style ].test(bullet[ 1 ])) { 364 // Small numbering has higher priority, when dealing with ambiguous 365 // between C(Alpha) and C.(Roman). 366 if (type == 'ol' && ( /alpha|roman/ ).test(style)) { 367 var num = /roman/.test(style) ? fromRoman(bullet[ 1 ]) : fromAlphabet(bullet[ 1 ]); 368 if (!itemNumeric || num < itemNumeric) { 369 itemNumeric = num; 370 listType = type; 371 listStyleType = style; 372 } 373 } 374 else { 375 listType = type; 376 listStyleType = style; 377 break; 378 } 379 } 380 } 381 } 382 } 383 384 // Simply use decimal/disc for the rest forms of unrepresentable 385 // numerals, e.g. Chinese..., but as long as there a second part 386 // included, it has a bigger chance of being a order list ;) 387 !listType && ( listType = bullet[ 2 ] ? 'ol' : 'ul' ); 388 } 389 390 previousListType = listType; 391 previousListStyleType = listStyleType || ( listType == 'ol' ? 'decimal' : 'disc' ); 392 if (listStyleType && listStyleType != ( listType == 'ol' ? 'decimal' : 'disc' )) 393 addStyle(listItem, 'list-style-type', listStyleType); 394 395 // Figure out start numbering. 396 if (listType == 'ol' && bullet) { 397 switch (listStyleType) { 398 case 'decimal' : 399 itemNumeric = Number(bullet[ 1 ]); 400 break; 401 case 'lower-roman': 402 case 'upper-roman': 403 itemNumeric = fromRoman(bullet[ 1 ]); 404 break; 405 case 'lower-alpha': 406 case 'upper-alpha': 407 itemNumeric = fromAlphabet(bullet[ 1 ]); 408 break; 409 } 410 411 // Always create the numbering, swipe out unnecessary ones later. 412 listItem.setAttribute("value", itemNumeric); 413 } 414 415 // Start the list construction. 416 if (!list) { 417 openedLists.push(list = new HtmlParser.Tag(listType)); 418 list.appendChild(listItem); 419 element.replaceChild(list, children[i]); 420 } else { 421 if (listItemIndent > lastIndent) { 422 openedLists.push(list = new HtmlParser.Tag(listType)); 423 list.appendChild(listItem); 424 lastListItem.appendChild(list); 425 } 426 else if (listItemIndent < lastIndent) { 427 // There might be a negative gap between two list levels. (#4944) 428 var diff = lastIndent - listItemIndent, 429 parent; 430 while (diff-- && ( parent = list.parentNode )) { 431 list = parent.parentNode; 432 } 433 list.appendChild(listItem); 434 } 435 else { 436 list.appendChild(listItem); 437 } 438 children.splice(i--, 1); 439 } 440 441 lastListItem = listItem; 442 lastIndent = listItemIndent; 443 } 444 else if (child.nodeType == 3 && !S.trim(child.nodeValue)) { 445 // li 间的空文字节点忽略 446 } else if (list) { 447 list = lastIndent = lastListItem = null; 448 } 449 } 450 451 for (i = 0; i < openedLists.length; i++) { 452 postProcessList(openedLists[ i ]); 453 } 454 }, 455 456 /** 457 * A simple filter which always rejecting. 458 */ 459 falsyFilter:function () { 460 return false; 461 }, 462 463 /** 464 * A filter dedicated on the 'style' attribute filtering, e.g. dropping/replacing style properties. 465 * @param styles {Array} in form of [ styleNameRegexp, styleValueRegexp, 466 * newStyleValue/newStyleGenerator, newStyleName ] where only the first 467 * parameter is mandatory. 468 * @param [whitelist] {Boolean} Whether the {@param styles} will be considered as a white-list. 469 */ 470 stylesFilter:function (styles, whitelist) { 471 return function (styleText, element) { 472 var rules = []; 473 // html-encoded quote might be introduced by 'font-family' 474 // from MS-Word which confused the following regexp. e.g. 475 //'font-family: "Lucida, Console"' 476 ( styleText || '' ) 477 .replace(/"/g, '"') 478 .replace(/\s*([^ :;]+)\s*:\s*([^;]+)\s*(?=;|$)/g, 479 function (match, name, value) { 480 name = name.toLowerCase(); 481 name == 'font-family' && ( value = value.replace(/["']/g, '') ); 482 483 var namePattern, 484 valuePattern, 485 newValue, 486 newName; 487 for (var i = 0; i < styles.length; i++) { 488 if (styles[ i ]) { 489 namePattern = styles[ i ][ 0 ]; 490 valuePattern = styles[ i ][ 1 ]; 491 newValue = styles[ i ][ 2 ]; 492 newName = styles[ i ][ 3 ]; 493 494 if (name.match(namePattern) 495 && ( !valuePattern || value.match(valuePattern) )) { 496 name = newName || name; 497 whitelist && ( newValue = newValue || value ); 498 499 if (typeof newValue == 'function') { 500 newValue = newValue(value, element, name); 501 } 502 503 // Return an couple indicate both name and value 504 // changed. 505 if (newValue && newValue.push) { 506 name = newValue[ 0 ]; 507 newValue = newValue[ 1 ]; 508 } 509 510 if (typeof newValue == 'string') { 511 rules.push([ name, newValue ]); 512 } 513 514 return; 515 } 516 } 517 } 518 519 !whitelist && rules.push([ name, value ]); 520 521 }); 522 523 for (var i = 0; i < rules.length; i++) { 524 rules[ i ] = rules[ i ].join(':'); 525 } 526 527 return rules.length ? ( rules.join(';') + ';' ) : false; 528 }; 529 }, 530 531 /** 532 * A filter which will be used to apply inline css style according the stylesheet 533 * definition rules, is generated lazily when filtering. 534 */ 535 applyStyleFilter:null 536 537 }; 538 539 540 // 1. move consistent list item styles up to list root. 541 // 2. clear out unnecessary list item numbering. 542 function postProcessList(list) { 543 var children = list.childNodes || [], 544 child, 545 count = children.length, 546 match, 547 mergeStyle, 548 styleTypeRegexp = /list-style-type:(.*?)(?:;|$)/, 549 stylesFilter = filters.stylesFilter; 550 551 552 if (styleTypeRegexp.exec(list.getAttribute("style"))) 553 return; 554 555 for (var i = 0; i < count; i++) { 556 child = children[ i ]; 557 558 if (child.getAttribute("value") && Number(child.getAttribute("value")) == i + 1) { 559 child.removeAttribute("value"); 560 } 561 562 match = styleTypeRegexp.exec(child.getAttribute("style")); 563 564 if (match) { 565 if (match[ 1 ] == mergeStyle || !mergeStyle) 566 mergeStyle = match[ 1 ]; 567 else { 568 mergeStyle = null; 569 break; 570 } 571 } 572 } 573 574 if (mergeStyle) { 575 for (i = 0; i < count; i++) { 576 var style = children[ i ].getAttribute("style"); 577 578 if (style) { 579 style = stylesFilter([ 580 [ 'list-style-type'] 581 ])(style); 582 setStyle(children[ i ], style); 583 } 584 } 585 addStyle(list, 'list-style-type', mergeStyle); 586 } 587 } 588 589 var utils = { 590 // Create a <ke:listbullet> which indicate an list item type. 591 createListBulletMarker:function (bullet, bulletText) { 592 var marker = new HtmlParser.Tag('ke:listbullet'); 593 marker.setAttribute("ke:listsymbol", bullet[ 0 ]); 594 marker.appendChild(new HtmlParser.Text(bulletText)); 595 return marker; 596 }, 597 598 isListBulletIndicator:function (element) { 599 var styleText = element.getAttribute("style"); 600 if (/mso-list\s*:\s*Ignore/i.test(styleText)) { 601 return true; 602 } 603 }, 604 605 isContainingOnlySpaces:function (element) { 606 var text; 607 return ( ( text = onlyChild(element) ) 608 && ( /^(:?\s| )+$/ ).test(text.nodeValue) ); 609 }, 610 611 resolveList:function (element) { 612 // <ke:listbullet> indicate a list item. 613 var listMarker; 614 615 if (( listMarker = removeAnyChildWithName(element, 'ke:listbullet') ) 616 && listMarker.length 617 && ( listMarker = listMarker[ 0 ] )) { 618 element.setTagName('ke:li'); 619 620 if (element.getAttribute("style")) { 621 var styleStr = filters.stylesFilter( 622 [ 623 // Text-indent is not representing list item level any more. 624 [ 'text-indent' ], 625 [ 'line-height' ], 626 // First attempt is to resolve indent level from on a constant margin increment. 627 [ ( /^margin(:?-left)?$/ ), null, function (margin) { 628 // Deal with component/short-hand form. 629 var values = margin.split(' '); 630 margin = convertToPx(values[ 3 ] || values[ 1 ] || values [ 0 ]); 631 632 // Figure out the indent unit by checking the first time of incrementation. 633 if (!listBaseIndent && previousListItemMargin !== null && 634 margin > previousListItemMargin) { 635 listBaseIndent = margin - previousListItemMargin; 636 } 637 638 previousListItemMargin = margin; 639 if (listBaseIndent) { 640 element.setAttribute('ke:indent', listBaseIndent && 641 ( Math.ceil(margin / listBaseIndent) + 1 ) || 1); 642 } 643 } ], 644 // The best situation: "mso-list:l0 level1 lfo2" tells the belonged list root, list item indentation, etc. 645 [ ( /^mso-list$/ ), null, function (val) { 646 val = val.split(' '); 647 var listId = Number(val[ 0 ].match(/\d+/)), 648 indent = Number(val[ 1 ].match(/\d+/)); 649 650 if (indent == 1) { 651 listId !== previousListId && ( element.setAttribute('ke:reset', 1) ); 652 653 previousListId = listId; 654 } 655 element.setAttribute('ke:indent', indent); 656 } ] 657 ])(element.getAttribute("style"), element); 658 659 setStyle(element, styleStr); 660 } 661 662 // First level list item might be presented without a margin. 663 // In case all above doesn't apply. 664 if (!element.getAttribute("ke:indent")) { 665 previousListItemMargin = 0; 666 element.setAttribute('ke:indent', 1); 667 } 668 669 S.each(listMarker.attributes, function (a) { 670 element.setAttribute(a.name, a.value); 671 }); 672 673 return true; 674 } 675 // Current list disconnected. 676 else { 677 previousListId = previousListItemMargin = listBaseIndent = null; 678 } 679 return false; 680 }, 681 682 // Providing a shorthand style then retrieve one or more style component values. 683 getStyleComponents:(function () { 684 var calculator = $( 685 '<div style="position:absolute;left:-9999px;top:-9999px;"></div>').prependTo("body"); 686 687 return function (name, styleValue, fetchList) { 688 calculator.css(name, styleValue); 689 var styles = {}, 690 count = fetchList.length; 691 for (var i = 0; i < count; i++) 692 styles[ fetchList[ i ] ] = calculator.css(fetchList[ i ]); 693 694 return styles; 695 }; 696 })(), 697 698 listDtdParents:parentOf('ol') 699 }; 700 701 (function () { 702 var blockLike = S.merge(dtd.$block, dtd.$listItem, dtd.$tableContent), 703 falsyFilter = filters.falsyFilter, 704 stylesFilter = filters.stylesFilter, 705 createListBulletMarker = utils.createListBulletMarker, 706 flattenList = filters.flattenList, 707 assembleList = filters.assembleList, 708 isListBulletIndicator = utils.isListBulletIndicator, 709 containsNothingButSpaces = utils.isContainingOnlySpaces, 710 resolveListItem = utils.resolveList, 711 convertToPx = function (value) { 712 value = convertToPx(value); 713 return isNaN(value) ? value : value + 'px'; 714 }, 715 getStyleComponents = utils.getStyleComponents, 716 listDtdParents = utils.listDtdParents; 717 718 wordFilter.addRules({ 719 720 tagNames:[ 721 // Remove script, meta and link elements. 722 [ ( /meta|link|script/ ), '' ] 723 ], 724 725 root:function (element) { 726 element.filterChildren(); 727 assembleList(element); 728 }, 729 730 tags:{ 731 '^':function (element) { 732 // Transform CSS style declaration to inline style. 733 var applyStyleFilter; 734 if (UA.gecko && ( applyStyleFilter = filters.applyStyleFilter )) 735 applyStyleFilter(element); 736 }, 737 738 $:function (element) { 739 var tagName = element.nodeName || '' 740 741 // Convert length unit of width/height on blocks to 742 // a more editor-friendly way (px). 743 if (tagName in blockLike && element.getAttribute("style")) { 744 setStyle(element, stylesFilter( 745 [ 746 [ ( /^(:?width|height)$/ ), null, convertToPx ] 747 ])(element.getAttribute("style"))); 748 } 749 750 // Processing headings. 751 if (tagName.match(/h\d/)) { 752 element.filterChildren(); 753 // Is the heading actually a list item? 754 if (resolveListItem(element)) { 755 return; 756 } 757 } 758 // Remove inline elements which contain only empty spaces. 759 else if (tagName in dtd.$inline) { 760 element.filterChildren(); 761 if (containsNothingButSpaces(element)) { 762 element.setTagName(null); 763 } 764 } 765 // Remove element with ms-office namespace, 766 // with it's content preserved, e.g. 'o:p'. 767 else if (tagName.indexOf(':') != -1 768 && tagName.indexOf('ke') == -1) { 769 element.filterChildren(); 770 771 // Restore image real link from vml. 772 if (tagName == 'v:imagedata') { 773 var href = element.getAttribute('o:href'); 774 if (href) { 775 element.setAttribute("src", href); 776 } 777 element.setTagName('img'); 778 return; 779 } 780 element.setTagName(null); 781 } 782 783 // Assembling list items into a whole list. 784 if (tagName in listDtdParents) { 785 element.filterChildren(); 786 assembleList(element); 787 } 788 }, 789 790 // We'll drop any style sheet, but Firefox conclude 791 // certain styles in a single style element, which are 792 // required to be changed into inline ones. 793 'style':function (element) { 794 if (UA.gecko) { 795 // Grab only the style definition section. 796 var styleDefSection = onlyChild(element).nodeValue 797 .match(/\/\* Style Definitions \*\/([\s\S]*?)\/\*/), 798 styleDefText = styleDefSection && styleDefSection[ 1 ], 799 rules = {}; // Storing the parsed result. 800 801 if (styleDefText) { 802 styleDefText 803 // Remove line-breaks. 804 .replace(/[\n\r]/g, '') 805 // Extract selectors and style properties. 806 .replace(/(.+?)\{(.+?)\}/g, 807 function (rule, selectors, styleBlock) { 808 selectors = selectors.split(','); 809 var length = selectors.length; 810 for (var i = 0; i < length; i++) { 811 // Assume MS-Word mostly generate only simple 812 // selector( [Type selector][Class selector]). 813 S.trim(selectors[ i ]) 814 .replace(/^(\w+)(\.[\w-]+)?$/g, 815 function (match, tagName, className) { 816 tagName = tagName || '*'; 817 className = className.substring(1, className.length); 818 819 // Reject MS-Word Normal styles. 820 if (className.match(/MsoNormal/)) 821 return; 822 823 if (!rules[ tagName ]) { 824 rules[ tagName ] = {}; 825 } 826 if (className) { 827 rules[ tagName ][ className ] = styleBlock; 828 } else { 829 rules[ tagName ] = styleBlock; 830 } 831 }); 832 } 833 }); 834 835 filters.applyStyleFilter = function (element) { 836 var name = rules[ '*' ] ? '*' : element.nodeName, 837 className = element.getAttribute('class'), 838 style; 839 if (name in rules) { 840 style = rules[ name ]; 841 if (typeof style == 'object') 842 style = style[ className ]; 843 // Maintain style rules priorities. 844 style && addStyle(element, style, true); 845 } 846 }; 847 } 848 } 849 return false; 850 }, 851 852 'p':function (element) { 853 // This's a fall-back approach to recognize list item in FF3.6, 854 // as it's not perfect as not all list style (e.g. "heading list") is shipped 855 // with this pattern. (#6662) 856 if (/MsoListParagraph/.exec(element.getAttribute('class'))) { 857 var bulletText = firstChild(element, function (node) { 858 return node.nodeType == 3 && !containsNothingButSpaces(node.parentNode); 859 }); 860 var bullet = bulletText && bulletText.parentNode; 861 !bullet.getAttribute("style") && ( bullet.setAttribute("style", 'mso-list: Ignore;')); 862 } 863 864 element.filterChildren(); 865 // Is the paragraph actually a list item? 866 resolveListItem(element) 867 }, 868 869 'div':function (element) { 870 // Aligned table with no text surrounded is represented by a wrapper div, from which 871 // table cells inherit as text-align styles, which is wrong. 872 // Instead we use a clear-float div after the table to properly achieve the same layout. 873 var singleChild = onlyChild(element); 874 if (singleChild && singleChild.nodeName == 'table') { 875 var attrs = element.attributes; 876 877 S.each(attrs, function (attr) { 878 singleChild.setAttribute(attr.name, attr.value); 879 }); 880 881 if (element.getAttribute("style")) { 882 addStyle(singleChild, element.getAttribute("style")); 883 } 884 885 var clearFloatDiv = new HtmlParser.Tag('div'); 886 addStyle(clearFloatDiv, 'clear', 'both'); 887 element.appendChild(clearFloatDiv); 888 element.setTagName(null); 889 } 890 }, 891 892 'td':function (element) { 893 // 'td' in 'thead' is actually <th>. 894 if (getAncestor(element, 'thead')) 895 element.setTagName('th'); 896 }, 897 898 // MS-Word sometimes present list as a mixing of normal list 899 // and pseudo-list, normalize the previous ones into pseudo form. 900 'ol':flattenList, 901 'ul':flattenList, 902 'dl':flattenList, 903 904 'font':function (element) { 905 // Drop the font tag if it comes from list bullet text. 906 if (isListBulletIndicator(element.parentNode)) { 907 element.setTagName(null); 908 return; 909 } 910 911 element.filterChildren(); 912 913 var styleText = element.getAttribute("style"), 914 parent = element.parentNode; 915 916 if ('font' == parent.name) // Merge nested <font> tags. 917 { 918 S.each(element.attributes, function (attr) { 919 parent.setAttribute(attr.name, attr.value); 920 }); 921 styleText && addStyle(parent, styleText); 922 element.setTagName(null); 923 } 924 // Convert the merged into a span with all attributes preserved. 925 else { 926 styleText = styleText || ''; 927 // IE's having those deprecated attributes, normalize them. 928 if (element.getAttribute("color")) { 929 element.getAttribute("color") != '#000000' && ( styleText += 'color:' + element.getAttribute("color") + ';' ); 930 element.removeAttribute("color"); 931 } 932 if (element.getAttribute("face")) { 933 styleText += 'font-family:' + element.getAttribute("face") + ';'; 934 element.removeAttribute("face"); 935 } 936 var size = element.getAttribute("size"); 937 // TODO: Mapping size in ranges of xx-small, 938 // x-small, small, medium, large, x-large, xx-large. 939 if (size) { 940 styleText += 'font-size:' + 941 (size > 3 ? 'large' 942 : ( size < 3 ? 'small' : 'medium' ) ) + ';'; 943 element.removeAttribute("size"); 944 } 945 element.setTagName("span"); 946 addStyle(element, styleText); 947 } 948 }, 949 950 'span':function (element) { 951 // Remove the span if it comes from list bullet text. 952 if (isListBulletIndicator(element.parentNode)) { 953 return false; 954 } 955 element.filterChildren(); 956 if (containsNothingButSpaces(element)) { 957 element.setTagName(null); 958 return null; 959 } 960 961 // List item bullet type is supposed to be indicated by 962 // the text of a span with style 'mso-list : Ignore' or an image. 963 if (isListBulletIndicator(element)) { 964 var listSymbolNode = firstChild(element, function (node) { 965 return node.nodeValue || node.nodeName == 'img'; 966 }); 967 968 var listSymbol = listSymbolNode && ( listSymbolNode.nodeValue || 'l.' ), 969 listType = listSymbol && listSymbol.match(/^(?:[(]?)([^\s]+?)([.)]?)$/); 970 971 if (listType) { 972 var marker = createListBulletMarker(listType, listSymbol); 973 // Some non-existed list items might be carried by an inconsequential list, 974 // indicate by "mso-hide:all/display:none", 975 // those are to be removed later, now mark it with "ke:ignored". 976 var ancestor = getAncestor(element, 'span'); 977 if (ancestor && (/ mso-hide:\s*all|display:\s*none /). 978 test(ancestor.getAttribute("style"))) { 979 marker.setAttribute('ke:ignored', 1); 980 } 981 return marker; 982 } 983 } 984 985 // Update the src attribute of image element with href. 986 var styleText = element.getAttribute("style"); 987 988 // Assume MS-Word mostly carry font related styles on <span>, 989 // adapting them to editor's convention. 990 if (styleText) { 991 992 setStyle(element, stylesFilter( 993 [ 994 // Drop 'inline-height' style which make lines overlapping. 995 [ /^line-height$/ ], 996 [ /^font-family$/ ] , 997 [ /^font-size$/ ] , 998 [ /^color$/ ] , 999 [ /^background-color$/ ] 1000 ] 1001 )(styleText, element)); 1002 } 1003 }, 1004 // Editor doesn't support anchor with content currently (#3582), 1005 // drop such anchors with content preserved. 1006 'a':function (element) { 1007 var href; 1008 if (!(href = element.getAttribute("href")) && element.getAttribute("name")) { 1009 element.setTagName(null); 1010 } else if (UA.webkit && href && href.match(/file:\/\/\/[\S]+#/i)) { 1011 element.setAttribute("href", href.replace(/file:\/\/\/[^#]+/i, '')); 1012 } 1013 }, 1014 'ke:listbullet':function (element) { 1015 if (getAncestor(element, /h\d/)) { 1016 element.setTagName(null); 1017 } 1018 } 1019 }, 1020 1021 attributeNames:[ 1022 // Remove onmouseover and onmouseout events (from MS Word comments effect) 1023 [ ( /^onmouse(:?out|over)/ ), '' ], 1024 // Onload on image element. 1025 [ ( /^onload$/ ), '' ], 1026 // Remove office and vml attribute from elements. 1027 [ ( /(?:v|o):\w+/ ), '' ], 1028 // Remove lang/language attributes. 1029 [ ( /^lang/ ), '' ] 1030 ], 1031 1032 attributes:{ 1033 'style':stylesFilter( 1034 // Provide a white-list of styles that we preserve, those should 1035 // be the ones that could later be altered with editor tools. 1036 [ 1037 // Leave list-style-type 1038 [ ( /^list-style-type$/ ) ], 1039 1040 // Preserve margin-left/right which used as default indent style in the editor. 1041 [ ( /^margin$|^margin-(?!bottom|top)/ ), null, function (value, element, name) { 1042 if (element.nodeName in { p:1, div:1 }) { 1043 var indentStyleName = 'margin-left'; 1044 1045 // Extract component value from 'margin' shorthand. 1046 if (name == 'margin') { 1047 value = getStyleComponents(name, value, 1048 [ indentStyleName ])[ indentStyleName ]; 1049 } else if (name != indentStyleName) { 1050 return null; 1051 } 1052 1053 if (value && !emptyMarginRegex.test(value)) { 1054 return [ indentStyleName, value ]; 1055 } 1056 } 1057 1058 return null; 1059 } ], 1060 1061 // Preserve clear float style. 1062 [ ( /^clear$/ ) ], 1063 1064 [ ( /^border.*|margin.*|vertical-align|float$/ ), null, 1065 function (value, element) { 1066 if (element.nodeName == 'img') 1067 return value; 1068 } ], 1069 1070 [ (/^width|height$/ ), null, 1071 function (value, element) { 1072 if (element.nodeName in { table:1, td:1, th:1, img:1 }) 1073 return value; 1074 } ] 1075 ], 1), 1076 1077 // Prefer width styles over 'width' attributes. 1078 'width':function (value, element) { 1079 if (element.nodeName in dtd.$tableContent) 1080 return false; 1081 }, 1082 // Prefer border styles over table 'border' attributes. 1083 'border':function (value, element) { 1084 if (element.nodeName in dtd.$tableContent) 1085 return false; 1086 }, 1087 1088 // Only Firefox carry style sheet from MS-Word, which 1089 // will be applied by us manually. For other browsers 1090 // the css className is useless. 1091 'class':falsyFilter, 1092 1093 // MS-Word always generate 'background-color' along with 'bgcolor', 1094 // simply drop the deprecated attributes. 1095 'bgcolor':falsyFilter, 1096 1097 // Deprecate 'valign' attribute in favor of 'vertical-align'. 1098 'valign':function (value, element) { 1099 addStyle(element, 'vertical-align', value); 1100 return false; 1101 } 1102 }, 1103 1104 1105 // Fore none-IE, some useful data might be buried under these IE-conditional 1106 // comments where RegExp were the right approach to dig them out where usual approach 1107 // is transform it into a fake element node which hold the desired data. 1108 comment:UA.ie ? 1109 function (value, node) { 1110 var imageInfo = value.match(/<img.*?>/), 1111 listInfo = value.match(/^\[if !supportLists\]([\s\S]*?)\[endif\]$/); 1112 1113 // Seek for list bullet indicator. 1114 if (listInfo) { 1115 // Bullet symbol could be either text or an image. 1116 var listSymbol = listInfo[ 1 ] || ( imageInfo && 'l.' ), 1117 listType = listSymbol && listSymbol.match(/>(?:[(]?)([^\s]+?)([.)]?)</); 1118 return createListBulletMarker(listType, listSymbol); 1119 } 1120 1121 // Reveal the <img> element in conditional comments for Firefox. 1122 if (UA.gecko && imageInfo) { 1123 var img = new HtmlParser.Parser(imageInfo[0]).parse().childNodes[ 0 ], 1124 previousComment = node.previousSibling, 1125 // Try to dig the real image link from vml markup from previous comment text. 1126 imgSrcInfo = previousComment && previousComment.toHtml().match(/<v:imagedata[^>]*o:href=['"](.*?)['"]/), 1127 imgSrc = imgSrcInfo && imgSrcInfo[ 1 ]; 1128 1129 // Is there a real 'src' url to be used? 1130 imgSrc && ( img.setAttribute("src", imgSrc) ); 1131 return img; 1132 } 1133 1134 return false; 1135 } 1136 : falsyFilter 1137 }); 1138 })(); 1139 1140 return { 1141 1142 toDataFormat:function (html, editor) { 1143 // Firefox will be confused by those downlevel-revealed IE conditional 1144 // comments, fixing them first( convert it to upperlevel-revealed one ). 1145 // e.g. <![if !vml]>...<![endif]> 1146 //<!--[if !supportLists]--> 1147 // <span style=\"font-family: Wingdings;\" lang=\"EN-US\"> 1148 // <span style=\"\">l<span style=\"font: 7pt "Times New Roman";\"> 1149 // </span></span></span> 1150 // <!--[endif]--> 1151 1152 //变成: 1153 1154 //<!--[if !supportLists] 1155 // <span style=\"font-family: Wingdings;\" lang=\"EN-US\"> 1156 // <span style=\"\">l<span style=\"font: 7pt "Times New Roman";\"> 1157 // </span></span></span> 1158 // [endif]--> 1159 if (UA.gecko) { 1160 html = html.replace(/(<!--\[if[^<]*?\])-->([\S\s]*?)<!--(\[endif\]-->)/gi, 1161 '$1$2$3'); 1162 } 1163 1164 // 针对 word 一次 1165 html = editor.htmlDataProcessor.toDataFormat(html, wordFilter); 1166 1167 // 普通的一次 1168 html = editor.htmlDataProcessor.toDataFormat(html); 1169 1170 return html; 1171 } 1172 1173 }; 1174 1175 1176 }, { 1177 requires:['../styles', 'htmlparser'] 1178 });