Example #1
0
        it("filter will run only once", function () {
            var count = 0;
            var rules = {
                tags: {
                    p: function (el) {
                        el.filterChildren();
                    },
                    span: function () {
                        count++;
                    }
                }
            };

            var filter = new HtmlParser.Filter();
            filter.addRules(rules);

            var writer = new HtmlParser.BasicWriter();

            var before = "<p><span></span></p>";

            var n = new HtmlParser.Parser(before).parse();

            n.writeHtml(writer, filter);
            expect(writer.getHtml()).toBe("<p><span></span></p>");
            expect(count).toBe(1);
        });
Example #2
0
        it("can filter attributeNames", function () {
            var rules = {
                attributeNames: [
                    // 把保存的作为真正的属性,替换掉原来的
                    // replace(/^_keSaved_/,"")
                    // _keSavedHref -> href
                    [ ( /^_keSaved_/ ), '' ],
                    [ ( /^ke_on/ ), 'on' ],
                    [ ( /^_ke.*/ ), '' ],
                    //!TODO 不知道怎么回事会引入
                    [ ( /^_ks.*/ ), '' ],
                    [ ( /^ke:.*$/ ), '' ]
                ]
            };

            var filter = new HtmlParser.Filter();
            filter.addRules(rules);

            var writer = new HtmlParser.BasicWriter();

            var before = "<p _keSaved_src='y'>x</p>";

            var n = new HtmlParser.Parser(before).parse();

            n.writeHtml(writer, filter);
            expect(writer.getHtml()).toBe("<p src=\"y\">x</p>");
        });
Example #3
0
        it('can filter elementNames', function () {
            var dataFilterRules = {
                tagNames: [
                    [  /^script$/i , '' ],
                    [  /^iframe$/i , '' ],
                    [  /^style$/i , '' ],
                    [  /^link$/i , '' ],
                    [  /^meta$/i , '' ],
                    [/^\?xml.*$/i, ''],
                    [/^.*namespace.*$/i, '']
                ]
            };

            var filter = new HtmlParser.Filter();
            filter.addRules(dataFilterRules);

            var writer = new HtmlParser.BasicWriter(),
                before = '<script>alert(1);</script>x<link/>' +
                    '<?xml:namespace prefix = v ns = "urn:schemas-microsoft-com:vml" />' +
                    'y';

            var n = new HtmlParser.Parser(before).parse();

            n.writeHtml(writer, filter);

            expect(writer.getHtml()).toBe("xy");
        });
Example #4
0
        it("filter children works while modify html", function () {
            var rules = {
                tags: {

                    p: function (el) {
                        el.filterChildren();
                    }
                }
            };

            var rules2 = {
                tags: {
                    p: function (el) {
                        el.appendChild(new HtmlParser.Text("&nbsp;"));
                    }
                }
            };

            var filter = new HtmlParser.Filter();
            filter.addRules(rules);
            filter.addRules(rules2);

            var writer = new HtmlParser.BasicWriter();

            var before = "<p></p>";

            var n = new HtmlParser.Parser(before).parse();

            n.writeHtml(writer, filter);
            expect(writer.getHtml()).toBe("<p>&nbsp;</p>");

        });
Example #5
0
        it("filterChildren should works", function () {
            var html = "<div class='ul'><div class='li'>1</div><div class='li'>2</div></div>",
                parser = new Parser(html),
                node = parser.parse(),
                writer = new HtmlParser.BasicWriter(),
                filter = new HtmlParser.Filter();

            filter.addRules({
                tags: {
                    $: function (el) {
                        if (el.getAttribute("class") === "li") {
                            el.nodeName = el.tagName = "li";
                            el.removeAttribute("class");
                        } else if (el.getAttribute("class") === 'ul') {
                            // filter its children first, root node need children info after filtering
                            el.filterChildren();
                            var childNodes = el.childNodes;
                            for (var i = 0, c = childNodes[i]; i < childNodes.length; i++) {
                                if (c.nodeType === 1 && c.tagName !== "li") {
                                    return;
                                }
                            }
                            el.nodeName = el.tagName = 'ul';
                            el.removeAttribute("class");
                        }
                    }
                }
            });
            node.writeHtml(writer, filter);
            expect(writer.getHtml()).toBe("<ul><li>1</li><li>2</li></ul>");
        });
Example #6
0
        it('can replace text', function () {

            var html = "<li>12</li><li>21</li>",
                parser = new Parser(html),
                node = parser.parse(),
                writer = new HtmlParser.BasicWriter(),
                filter = new HtmlParser.Filter();

            filter.addRules({
                text: function (value) {
                    return value.replace(/2/g, '3');
                }
            });
            node.writeHtml(writer, filter);
            expect(writer.getHtml()).toBe("<li>13</li><li>31</li>");

        });
Example #7
0
        it("can filter imagedata in vml@ie", function () {
            var dataFilterRules = {
                tagNames: [
                    [/^\?xml.*$/i, '']
                ],
                tags: {
                    $: function (el) {
                        var tagName = el.tagName || "";
                        if (tagName.indexOf(':') !== -1 && !/^ke/.test(tagName)) {
                            if (tagName === 'v:imagedata') {
                                var href = el.getAttribute('o:href');
                                if (href) {
                                    el.setAttribute('src', href);
                                    el.removeAttribute('o:href');
                                }
                                var title = el.getAttribute('o:title');
                                if (title) {
                                    el.setAttribute('title', title);
                                    el.removeAttribute("o:title");
                                }
                                el.setTagName("img");
                            } else {
                                el.setTagName("");
                            }
                        }
                    }
                }
            };

            var filter = new HtmlParser.Filter();
            filter.addRules(dataFilterRules);

            var writer = new HtmlParser.BasicWriter();

            var before = '';

            getTextSync('/kissy/src/html-parser/tests/others/editor/vml_img.html', function (d) {
                before = d;
            });

            var n = new HtmlParser.Parser(before).parse();
            n.writeHtml(writer, filter);
            // S.log(writer.getHtml());
            expect(writer.getHtml().indexOf('<img src="xx.jpg" title="me" />') !== -1).toBe(true);
        });
Example #8
0
        it("can modify html structure on fly", function () {
            var tailNbspRegex = /^[\t\r\n ]*(?:&nbsp;|\xa0)$/;

            // Return the last non-space child node of the block (#4344).
            function lastNoneSpaceChild(block) {
                var childNodes = block.childNodes,
                    lastIndex = childNodes.length,
                    last = childNodes[ lastIndex - 1 ];
                while (last && last.nodeType === 3 && !util.trim(last.nodeValue)) {
                    last = childNodes[ --lastIndex ];
                }
                return last;
            }

            function trimFillers(block, fromSource) {
                // If the current node is a block, and if we're converting from source or
                // we're not in IE then search for and remove any tailing BR node.
                // Also, any &nbsp; at the end of blocks are fillers, remove them as well.
                // (#2886)
                var lastChild = lastNoneSpaceChild(block);
                if (lastChild) {
                    if (( fromSource || !isIE() ) &&
                        lastChild.nodeType === 1 &&
                        lastChild.nodeName === 'br') {
                        block.removeChild(lastChild);
                    }
                    else if (lastChild.nodeType === 3 &&
                        tailNbspRegex.test(lastChild.nodeValue)) {
                        block.removeChild(lastChild);
                    }
                }
            }

            function blockNeedsExtension(block) {
                var lastChild = lastNoneSpaceChild(block);

                return !lastChild || lastChild.nodeType === 1 &&
                    lastChild.nodeName === 'br' ||
                    // Some of the controls in form needs extension too,
                    // to move cursor at the end of the form. (#4791)
                    block.nodeName === 'form' &&
                    lastChild.nodeName === 'input';
            }

            function extendBlockForDisplay(block) {

                trimFillers(block, true);

                if (blockNeedsExtension(block)) {
                    //任何浏览器都要加空格!,否则空表格可能间隙太小,不能容下光标
                    if (isIE()) {
                        block.appendChild(new HtmlParser.Text('\xa0'));
                    } else {
                        //其他浏览器需要加空格??
                        block.appendChild(new HtmlParser.Text('&nbsp;'));
                        block.appendChild(new HtmlParser.Tag('br'));
                    }

                }
            }


            var dataFilterRules = {
                tags: {
                    p: extendBlockForDisplay
                }
            };

            var filter = new HtmlParser.Filter();

            filter.addRules(dataFilterRules);

            var writer = new HtmlParser.BasicWriter();

            var before = "<p></p><p>1</p>";

            var n = new HtmlParser.Parser(before).parse();

            n.writeHtml(writer, filter);
            if (isIE()) {
                expect(writer.getHtml()).toBe("<p>\xa0</p><p>1</p>");
            } else {
                expect(writer.getHtml()).toBe("<p>&nbsp;<br /></p><p>1</p>");
            }
        });
Example #9
0
        it('can filter attribute', function () {
            function stylesFilter(styles, whitelist) {
                return function (styleText, element) {
                    var rules = [];
                    // html-encoded quote might be introduced by 'font-family'
                    // from MS-Word which confused the following regexp. e.g.
                    //'font-family: &quot;Lucida, Console&quot;'
                    String(styleText)
                        .replace(/&quot;/g, '"')
                        .replace(/\s*([^ :;]+)\s*:\s*([^;]+)\s*(?=;|$)/g,
                        function (match, name, value) {
                            name = name.toLowerCase();
                            if (name === 'font-family') {
                                ( value = value.replace(/["']/g, ''));
                            }

                            var namePattern,
                                valuePattern,
                                newValue,
                                newName;
                            for (var i = 0; i < styles.length; i++) {
                                if (styles[ i ]) {
                                    namePattern = styles[ i ][ 0 ];
                                    valuePattern = styles[ i ][ 1 ];
                                    newValue = styles[ i ][ 2 ];
                                    newName = styles[ i ][ 3 ];

                                    if (name.match(namePattern) &&
                                        ( !valuePattern || value.match(valuePattern) )) {
                                        name = newName || name;
                                        if (whitelist) {
                                            ( newValue = newValue || value );
                                        }

                                        if (typeof newValue === 'function')
                                            newValue = newValue(value, element, name);

                                        // Return an couple indicate both name and value
                                        // changed.
                                        if (newValue && newValue.push) {
                                            name = newValue[ 0 ];
                                            newValue = newValue[ 1 ];
                                        }

                                        if (typeof newValue === 'string')
                                            rules.push([ name, newValue ]);
                                        return;
                                    }
                                }
                            }

                            if (!whitelist) {
                                rules.push([ name, value ]);
                            }
                        });

                    for (var i = 0; i < rules.length; i++) {
                        rules[ i ] = rules[ i ].join(':');
                    }

                    return rules.length ?
                        ( rules.join(';') + ';' ) : false;
                };
            }

            var filterStyle = stylesFilter([
                // word 自有属性名去除
                [/mso/i],
                [/w:WordDocument/i],
                // ie 自有属性名[/mso/i],
                [/^-ms/i],
                // firefox 自有属性名
                [/^-moz/i],
                // webkit 自有属性名
                [/^-webkit/i]//
            ]);

            var dataFilterRules = {
                attributes: {
                    // word
                    "class": function (value) {

                        if (
                            !value ||
                            /(^|\s+)Mso/.test(value)
                            ) {
                            return false;
                        }
                        return value;
                    },
                    'style': function (value) {
                        //去除<i style="mso-bidi-font-style: normal">微软垃圾
                        var re = filterStyle(value);
                        if (!re) {
                            return false;
                        }
                        return re;
                    }
                }
            };

            var filter = new HtmlParser.Filter();
            filter.addRules(dataFilterRules);

            var writer = new HtmlParser.BasicWriter(),
                before = '<div class="Mso-list">1</div>' +
                    '<b class="">2</b>' +
                    '<span style="mso-bidi-font-style: normal;-ms-k:1;">3</span>';

            var n = new HtmlParser.Parser(before).parse();

            n.writeHtml(writer, filter);

            expect(writer.getHtml()).toBe("<div>1</div><" + "b>2</b><span>3</span>");
        });
(function () {
    var blockLike = util.merge(dtd.$block, dtd.$listItem, dtd.$tableContent),
        falsyFilter = filters.falsyFilter,
        stylesFilter = filters.stylesFilter,
        createListBulletMarker = utils.createListBulletMarker,
        flattenList = filters.flattenList,
        assembleList = filters.assembleList,
        isListBulletIndicator = utils.isListBulletIndicator,
        containsNothingButSpaces = utils.isContainingOnlySpaces,
        resolveListItem = utils.resolveList,
        convertToPxStr = function (value) {
            value = convertToPx(value);
            return isNaN(value) ? value : value + 'px';
        },
        getStyleComponents = utils.getStyleComponents,
        listDtdParents = utils.listDtdParents;

    wordFilter.addRules({

        tagNames: [
            // Remove script, meta and link elements.
            [ ( /meta|link|script/ ), '' ]
        ],

        root: function (element) {
            element.filterChildren();
            assembleList(element);
        },

        tags: {
            '^': function (element) {
                // Transform CSS style declaration to inline style.
                var applyStyleFilter;
                if (UA.gecko && ( applyStyleFilter = filters.applyStyleFilter )) {
                    applyStyleFilter(element);
                }
            },

            $: function (element) {
                var tagName = element.nodeName || '';

                // Convert length unit of width/height on blocks to
                // a more editor-friendly way (px).
                if (tagName in blockLike && element.getAttribute('style')) {
                    setStyle(element, stylesFilter(
                        [
                            [ ( /^(:?width|height)$/ ), null, convertToPxStr ]
                        ])(element.getAttribute('style')));
                }

                // Processing headings.
                if (tagName.match(/h\d/)) {
                    element.filterChildren();
                    // Is the heading actually a list item?
                    if (resolveListItem(element)) {
                        return;
                    }
                }
                // Remove inline elements which contain only empty spaces.
                else if (tagName in dtd.$inline) {
                    element.filterChildren();
                    if (containsNothingButSpaces(element)) {
                        element.setTagName(null);
                    }
                }
                // Remove element with ms-office namespace,
                // with it's content preserved, e.g. 'o:p'.
                else if (tagName.indexOf(':') !== -1 && tagName.indexOf('ke') === -1) {
                    element.filterChildren();

                    // Restore image real link from vml.
                    if (tagName === 'v:imagedata') {
                        var href = element.getAttribute('o:href');
                        if (href) {
                            element.setAttribute('src', href);
                        }
                        element.setTagName('img');
                        return;
                    }
                    element.setTagName(null);
                }

                // Assembling list items into a whole list.
                if (tagName in listDtdParents) {
                    element.filterChildren();
                    assembleList(element);
                }
            },

            // We'll drop any style sheet, but Firefox conclude
            // certain styles in a single style element, which are
            // required to be changed into inline ones.
            'style': function (element) {
                if (UA.gecko) {
                    // Grab only the style definition section.
                    var styleDefSection = onlyChild(element).nodeValue
                            .match(/\/\* Style Definitions \*\/([\s\S]*?)\/\*/),
                        styleDefText = styleDefSection && styleDefSection[ 1 ],
                        rules = {}; // Storing the parsed result.

                    if (styleDefText) {
                        styleDefText
                            // Remove line-breaks.
                            .replace(/[\n\r]/g, '')
                            // Extract selectors and style properties.
                            .replace(/(.+?)\{(.+?)\}/g,
                            function (rule, selectors, styleBlock) {
                                selectors = selectors.split(',');
                                var length = selectors.length;
                                for (var i = 0; i < length; i++) {
                                    // Assume MS-Word mostly generate only simple
                                    // selector( [Type selector][Class selector]).
                                    util.trim(selectors[ i ])
                                        .replace(/^(\w+)(\.[\w-]+)?$/g,
                                        function (match, tagName, className) {
                                            tagName = tagName || '*';
                                            className = className.substring(1, className.length);

                                            // Reject MS-Word Normal styles.
                                            if (className.match(/MsoNormal/)) {
                                                return;
                                            }

                                            if (!rules[ tagName ]) {
                                                rules[ tagName ] = {};
                                            }
                                            if (className) {
                                                rules[ tagName ][ className ] = styleBlock;
                                            } else {
                                                rules[ tagName ] = styleBlock;
                                            }
                                        });
                                }
                            });

                        filters.applyStyleFilter = function (element) {
                            var name = rules[ '*' ] ? '*' : element.nodeName,
                                className = element.getAttribute('class'),
                                style;
                            if (name in rules) {
                                style = rules[ name ];
                                if (typeof style === 'object') {
                                    style = style[ className ];
                                }
                                // Maintain style rules priorities.
                                if (style) {
                                    addStyle(element, style, true);
                                }
                            }
                        };
                    }
                }
                return false;
            },

            'p': function (element) {
                // This's a fall-back approach to recognize list item in FF3.6,
                // as it's not perfect as not all list style (e.g. 'heading list') is shipped
                // with this pattern. (#6662)
                if (/MsoListParagraph/.exec(element.getAttribute('class'))) {
                    var bulletText = firstChild(element, function (node) {
                        return node.nodeType === 3 && !containsNothingButSpaces(node.parentNode);
                    });
                    var bullet = bulletText && bulletText.parentNode;
                    if (bullet && !bullet.getAttribute('style')) {
                        bullet.setAttribute('style', 'mso-list: Ignore;');
                    }
                }

                element.filterChildren();
                // Is the paragraph actually a list item?
                resolveListItem(element);
            },

            'div': function (element) {
                // Aligned table with no text surrounded is represented by a wrapper div, from which
                // table cells inherit as text-align styles, which is wrong.
                // Instead we use a clear-float div after the table to properly achieve the same layout.
                var singleChild = onlyChild(element);
                if (singleChild && singleChild.nodeName === 'table') {
                    var attrs = element.attributes;

                    util.each(attrs, function (attr) {
                        singleChild.setAttribute(attr.name, attr.value);
                    });

                    if (element.getAttribute('style')) {
                        addStyle(singleChild, element.getAttribute('style'));
                    }

                    var clearFloatDiv = new HtmlParser.Tag('div');
                    addStyle(clearFloatDiv, 'clear', 'both');
                    element.appendChild(clearFloatDiv);
                    element.setTagName(null);
                }
            },

            'td': function (element) {
                // 'td' in 'thead' is actually <th>.
                if (getAncestor(element, 'thead')) {
                    element.setTagName('th');
                }
            },

            // MS-Word sometimes present list as a mixing of normal list
            // and pseudo-list, normalize the previous ones into pseudo form.
            'ol': flattenList,
            'ul': flattenList,
            'dl': flattenList,

            'font': function (element) {
                // Drop the font tag if it comes from list bullet text.
                if (isListBulletIndicator(element.parentNode)) {
                    element.setTagName(null);
                    return;
                }

                element.filterChildren();

                var styleText = element.getAttribute('style'),
                    parent = element.parentNode;

                if ('font' === parent.name)     // Merge nested <font> tags.
                {
                    util.each(element.attributes, function (attr) {
                        parent.setAttribute(attr.name, attr.value);
                    });
                    if (styleText) {
                        addStyle(parent, styleText);
                    }
                    element.setTagName(null);
                }
                // Convert the merged into a span with all attributes preserved.
                else {
                    styleText = styleText || '';
                    // IE's having those deprecated attributes, normalize them.
                    if (element.getAttribute('color')) {
                        if (element.getAttribute('color') !== '#000000') {
                            styleText += 'color:' + element.getAttribute('color') + ';';
                        }
                        element.removeAttribute('color');
                    }
                    if (element.getAttribute('face')) {
                        styleText += 'font-family:' + element.getAttribute('face') + ';';
                        element.removeAttribute('face');
                    }
                    var size = element.getAttribute('size');
                    // TODO: Mapping size in ranges of xx-small,
                    // x-small, small, medium, large, x-large, xx-large.
                    if (size) {
                        styleText += 'font-size:' +
                            (size > 3 ? 'large'
                                : ( size < 3 ? 'small' : 'medium' ) ) + ';';
                        element.removeAttribute('size');
                    }
                    element.setTagName('span');
                    addStyle(element, styleText);
                }
            },

            'span': function (element) {
                // Remove the span if it comes from list bullet text.
                if (isListBulletIndicator(element.parentNode)) {
                    return false;
                }
                element.filterChildren();
                if (containsNothingButSpaces(element)) {
                    element.setTagName(null);
                    return null;
                }

                // List item bullet type is supposed to be indicated by
                // the text of a span with style 'mso-list : Ignore' or an image.
                if (isListBulletIndicator(element)) {
                    var listSymbolNode = firstChild(element, function (node) {
                        return node.nodeValue || node.nodeName === 'img';
                    });

                    var listSymbol = listSymbolNode && ( listSymbolNode.nodeValue || 'l.' ),
                        listType = listSymbol && listSymbol.match(/^(?:[(]?)([^\s]+?)([.)]?)$/);

                    if (listType) {
                        var marker = createListBulletMarker(listType, listSymbol);
                        // Some non-existed list items might be carried by an inconsequential list,
                        // indicate by 'mso-hide:all/display:none',
                        // those are to be removed later, now mark it with 'ke:ignored'.
                        var ancestor = getAncestor(element, 'span');
                        if (ancestor && (/ mso-hide:\s*all|display:\s*none /).
                            test(ancestor.getAttribute('style'))) {
                            marker.setAttribute('ke:ignored', 1);
                        }
                        return marker;
                    }
                }

                // Update the src attribute of image element with href.
                var styleText = element.getAttribute('style');

                // Assume MS-Word mostly carry font related styles on <span>,
                // adapting them to editor's convention.
                if (styleText) {

                    setStyle(element, stylesFilter(
                        [
                            // Drop 'inline-height' style which make lines overlapping.
                            [ /^line-height$/ ],
                            [  /^font-family$/  ] ,
                            [  /^font-size$/  ] ,
                            [  /^color$/  ] ,
                            [  /^background-color$/  ]
                        ]
                    )(styleText, element));
                }
            },
            // Editor doesn't support anchor with content currently (#3582),
            // drop such anchors with content preserved.
            'a': function (element) {
                var href;
                if (!(href = element.getAttribute('href')) && element.getAttribute('name')) {
                    element.setTagName(null);
                } else if (UA.webkit && href && href.match(/file:\/\/\/[\S]+#/i)) {
                    element.setAttribute('href', href.replace(/file:\/\/\/[^#]+/i, ''));
                }
            },
            'ke:listbullet': function (element) {
                if (getAncestor(element, /h\d/)) {
                    element.setTagName(null);
                }
            }
        },

        attributeNames: [
            // Remove onmouseover and onmouseout events (from MS Word comments effect)
            [ ( /^onmouse(:?out|over)/ ), '' ],
            // Onload on image element.
            [ ( /^onload$/ ), '' ],
            // Remove office and vml attribute from elements.
            [ ( /(?:v|o):\w+/ ), '' ],
            // Remove lang/language attributes.
            [ ( /^lang/ ), '' ]
        ],

        attributes: {
            // Provide a white-list of styles that we preserve, those should
            // be the ones that could later be altered with editor tools.
            'style': stylesFilter([
                    [ ( /^list-style-type$/ ) ],

                    // Preserve margin-left/right which used as default indent style in the editor.
                    [ ( /^margin$|^margin-(?!bottom|top)/ ), null, function (value, element, name) {
                        if (element.nodeName in { p: 1, div: 1 }) {
                            var indentStyleName = 'margin-left';

                            // Extract component value from 'margin' shorthand.
                            if (name === 'margin') {
                                value = getStyleComponents(name, value,
                                    [ indentStyleName ])[ indentStyleName ];
                            } else if (name !== indentStyleName) {
                                return null;
                            }

                            if (value && !emptyMarginRegex.test(value)) {
                                return [ indentStyleName, value ];
                            }
                        }

                        return null;
                    } ],

                    // Preserve clear float style.
                    [ ( /^clear$/ ) ],

                    [ ( /^border.*|margin.*|vertical-align|float$/ ), null,
                        function (value, element) {
                            if (element.nodeName === 'img') {
                                return value;
                            }
                        } ],

                    [ (/^width|height$/ ), null,
                        function (value, element) {
                            if (element.nodeName in { table: 1, td: 1, th: 1, img: 1 }) {
                                return value;
                            }
                        } ]
                ],
                1),

            // Prefer width styles over 'width' attributes.
            'width': function (value, element) {
                if (element.nodeName in dtd.$tableContent) {
                    return false;
                }
            },
            // Prefer border styles over table 'border' attributes.
            'border': function (value, element) {
                if (element.nodeName in dtd.$tableContent) {
                    return false;
                }
            },

            // Only Firefox carry style sheet from MS-Word, which
            // will be applied by us manually. For other browsers
            // the css className is useless.
            'class': falsyFilter,

            // MS-Word always generate 'background-color' along with 'bgcolor',
            // simply drop the deprecated attributes.
            'bgcolor': falsyFilter,

            // Deprecate 'valign' attribute in favor of 'vertical-align'.
            'valign': function (value, element) {
                addStyle(element, 'vertical-align', value);
                return false;
            }
        },


        // Fore none-IE, some useful data might be buried under these IE-conditional
        // comments where RegExp were the right approach to dig them out where usual approach
        // is transform it into a fake element node which hold the desired data.
        comment: UA.ie ?
            function (value, node) {
                var imageInfo = value.match(/<img.*?>/),
                    listInfo = value.match(/^\[if !supportLists\]([\s\S]*?)\[endif\]$/);

                // Seek for list bullet indicator.
                if (listInfo) {
                    // Bullet symbol could be either text or an image.
                    var listSymbol = listInfo[ 1 ] || ( imageInfo && 'l.' ),
                        listType = listSymbol && listSymbol.match(/>(?:[(]?)([^\s]+?)([.)]?)</);
                    return createListBulletMarker(listType, listSymbol);
                }

                // Reveal the <img> element in conditional comments for Firefox.
                if (UA.gecko && imageInfo) {
                    var img = new HtmlParser.Parser(imageInfo[0]).parse().childNodes[ 0 ],
                        previousComment = node.previousSibling,
                    // Try to dig the real image link from vml markup from previous comment text.
                        imgSrcInfo = previousComment && previousComment.toHtml().match(/<v:imagedata[^>]*o:href=[''](.*?)['']/),
                        imgSrc = imgSrcInfo && imgSrcInfo[ 1 ];

                    // Is there a real 'src' url to be used?
                    if (imgSrc) {
                        ( img.setAttribute('src', imgSrc) );
                    }
                    return img;
                }

                return false;
            }
            : falsyFilter
    });
})();
define('kg/editor-plugins/1.1.3/word-filter',["html-parser","util","node","ua"],function(require, exports, module) {
/**
 * @ignore
 * clean html pasted from word. modified from ckeditor.
 * @author yiminghe@gmail.com
 */

/*jshint loopfunc:true*/
var HtmlParser = require('html-parser');
var util = require('util');
var $ = require('node'),
    UA = require('ua'),
    dtd = HtmlParser.DTD,
    wordFilter = new HtmlParser.Filter(),
    cssLengthRelativeUnit = /^([.\d]*)+(em|ex|px|gd|rem|vw|vh|vm|ch|mm|cm|in|pt|pc|deg|rad|ms|s|hz|khz){1}?/i,
// e.g. 0px 0pt 0px
    emptyMarginRegex = /^(?:\b0[^\s]*\s*){1,4}$/,
    romanLiteralPattern = '^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$',
    lowerRomanLiteralRegex = new RegExp(romanLiteralPattern),
    upperRomanLiteralRegex = new RegExp(romanLiteralPattern.toUpperCase()),
    orderedPatterns = {
        'decimal': /\d+/,
        'lower-roman': lowerRomanLiteralRegex,
        'upper-roman': upperRomanLiteralRegex,
        'lower-alpha': /^[a-z]+$/,
        'upper-alpha': /^[A-Z]+$/
    },
    unorderedPatterns = {
        'disc': /[l\u00B7\u2002]/,
        'circle': /[\u006F\u00D8]/,
        'square': /[\u006E\u25C6]/
    },
    listMarkerPatterns = {
        'ol': orderedPatterns,
        'ul': unorderedPatterns
    },
    romans = [
        [1000, 'M'],
        [900, 'CM'],
        [500, 'D'],
        [400, 'CD'],
        [100, 'C'],
        [90, 'XC'],
        [50, 'L'],
        [40, 'XL'],
        [10, 'X'],
        [9, 'IX'],
        [5, 'V'],
        [4, 'IV'],
        [1, 'I']
    ],
    alphabets = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ';

// Convert roman numbering back to decimal.
function fromRoman(str) {
    str = str.toUpperCase();
    var l = romans.length, retVal = 0;
    for (var i = 0; i < l; ++i) {
        for (var j = romans[i], k = j[1].length; str.substr(0, k) === j[1]; str = str.substr(k)) {
            retVal += j[ 0 ];
        }
    }
    return retVal;
}

// Convert alphabet numbering back to decimal.
function fromAlphabet(str) {
    str = str.toUpperCase();
    var l = alphabets.length, retVal = 1;
    for (var x = 1; str.length > 0; x *= l) {
        retVal += alphabets.indexOf(str.charAt(str.length - 1)) * x;
        str = str.substr(0, str.length - 1);
    }
    return retVal;
}

function setStyle(element, str) {
    if (str) {
        element.setAttribute('style', str);
    } else {
        element.removeAttribute('style');
    }
}

/*
 Convert the specified CSS length value to the calculated pixel length inside this page.
 <strong>Note:</strong> Percentage based value is left intact.
 */
var convertToPx = (function () {
    var calculator;

    return function (cssLength) {
        if (!calculator) {
            calculator = $('<div style="position:absolute;left:-9999px;' +
                'top:-9999px;margin:0px;padding:0px;border:0px;"' + '></div>')
                .prependTo('body');
        }

        if (!(/%$/).test(cssLength)) {
            calculator.css('width', cssLength);
            return calculator[0].clientWidth;
        }

        return cssLength;
    };
})();

var listBaseIndent = 0,
    previousListItemMargin = null,
    previousListId;

function onlyChild(elem) {
    var childNodes = elem.childNodes || [],
        count = childNodes.length,
        firstChild = (count === 1) && childNodes[0];
    return firstChild || null;
}

function removeAnyChildWithName(elem, tagName) {
    var children = elem.childNodes || [],
        ret = [],
        child;

    for (var i = 0; i < children.length; i++) {
        child = children[ i ];
        if (!child.nodeName) {
            continue;
        }
        if (child.nodeName === tagName) {
            ret.push(child);
            children.splice(i--, 1);
        }
        ret = ret.concat(removeAnyChildWithName(child, tagName));
    }
    return ret;
}

function getAncestor(elem, tagNameRegex) {
    var parent = elem.parentNode;
    while (parent && !( parent.nodeName && parent.nodeName.match(tagNameRegex) )) {
        parent = parent.parentNode;
    }
    return parent;
}

function firstChild(elem, evaluator) {
    var child,
        i,
        children = elem.childNodes || [];

    for (i = 0; i < children.length; i++) {
        child = children[ i ];
        if (evaluator(child)) {
            return child;
        } else if (child.nodeName) {
            child = firstChild(child, evaluator);
            if (child) {
                return child;
            }
        }
    }

    return null;
}


function addStyle(elem, name, value, isPrepend) {
    var styleText, addingStyleText = '', style;
    // name/value pair.
    if (typeof value === 'string') {
        addingStyleText += name + ':' + value + ';';
    } else {
        // style literal.
        if (typeof name === 'object') {
            for (style in name) {

                addingStyleText += style + ':' + name[ style ] + ';';

            }
        }
        // raw style text form.
        else {
            addingStyleText += name;
        }
        isPrepend = value;
    }


    styleText = elem.getAttribute('style');

    styleText = ( isPrepend ?
        [ addingStyleText, styleText ]
        : [ styleText, addingStyleText ] ).join(';');

    setStyle(elem, styleText.replace(/^;|;(?=;)/, ''));
}


function parentOf(tagName) {
    var result = {},
        tag;
    for (tag in dtd) {

        if (tag.indexOf('$') === -1 && dtd[ tag ][ tagName ]) {
            result[ tag ] = 1;
        }

    }
    return result;
}

var filters = {
    // Transform a normal list into flat list items only presentation.
    // E.g. <ul><li>level1<ol><li>level2</li></ol></li> =>
    // <ke:li ke:listtype='ul' ke:indent='1'>level1</ke:li>
    // <ke:li ke:listtype='ol' ke:indent='2'>level2</ke:li>
    flattenList: function (element, level) {
        level = typeof level === 'number' ? level : 1;

        var listStyleType;

        // All list items are of the same type.
        switch (element.getAttribute('type')) {
            case 'a' :
                listStyleType = 'lower-alpha';
                break;
            case '1' :
                listStyleType = 'decimal';
                break;
            // TODO: Support more list style type from MS-Word.
        }

        var children = element.childNodes || [],
            child;

        for (var i = 0; i < children.length; i++) {
            child = children[ i ];

            if (child.nodeName in dtd.$listItem) {
                var listItemChildren = child.childNodes || [],
                    count = listItemChildren.length,
                    last = listItemChildren[ count - 1 ];

                // Move out nested list.
                if (last.nodeName in dtd.$list) {
                    element.insertAfter(child);
                    // Remove the parent list item if it's just a holder.
                    if (!--listItemChildren.length) {
                        element.removeChild(children[i--]);
                    }
                }

                child.setTagName('ke:li');

                // Inherit numbering from list root on the first list item.
                if (element.getAttribute('start') && !i) {
                    element.setAttribute('value', element.getAttribute('start'));
                }

                filters.stylesFilter(
                    [
                        ['tab-stops', null, function (val) {
                            var margin = val.split(' ')[ 1 ].match(cssLengthRelativeUnit);
                            if (margin) {
                                ( previousListItemMargin = convertToPx(margin[ 0 ]) );
                            }
                        }
                        ],
                        ( level === 1 ? [ 'mso-list', null, function (val) {
                            val = val.split(' ');
                            var listId = Number(val[ 0 ].match(/\d+/));
                            if (listId !== previousListId) {
                                child.setAttribute('ke:reset', 1);
                            }
                            previousListId = listId;
                        } ] : null )
                    ])(child.getAttribute('style'));

                child.setAttribute('ke:indent', level);
                child.setAttribute('ke:listtype', element.nodeName);
                child.setAttribute('ke:list-style-type', listStyleType);
            }
            // Flatten sub list.
            else if (child.nodeName in dtd.$list) {
                /*jshint noarg:false*/
                // Absorb sub list children.
                arguments.callee.apply(this, [ child, level + 1 ]);
                children = children.slice(0, i).concat(child.childNodes).concat(children.slice(i + 1));
                element.empty();
                for (var j = 0, num = children.length; j < num; j++) {
                    element.appendChild(children[j]);
                }
            }
        }

        element.nodeName = element.tagName = null;

        // We're loosing tag name here, signalize this element as a list.
        element.setAttribute('ke:list', 1);
    },

    /*
     Try to collect all list items among the children and establish one
     or more HTML list structures for them.
     */
    assembleList: function (element) {
        var children = element.childNodes || [],
            child,
            listItem, // The current processing ke:li element.
            listItemIndent, // Indent level of current list item.
            lastIndent,
            lastListItem, // The previous one just been added to the list.
            list, // Current staging list and it's parent list if any.
            openedLists = [],
            previousListStyleType,
            previousListType;

        // Properties of the list item are to be resolved from the list bullet.
        var bullet,
            listType,
            listStyleType,
            itemNumeric;

        for (var i = 0; i < children.length; i++) {
            child = children[ i ];

            if ('ke:li' === child.nodeName) {
                child.setTagName('li');
                listItem = child;

                bullet = listItem.getAttribute('ke:listsymbol');
                bullet = bullet && bullet.match(/^(?:[(]?)([^\s]+?)([.)]?)$/);
                listType = listStyleType = itemNumeric = null;

                if (listItem.getAttribute('ke:ignored')) {
                    children.splice(i--, 1);
                    continue;
                }


                // This's from a new list root.
                if (listItem.getAttribute('ke:reset')) {
                    ( list = lastIndent = lastListItem = null );
                }

                // List item indent level might come from a real list indentation or
                // been resolved from a pseudo list item's margin value, even get
                // no indentation at all.
                listItemIndent = Number(listItem.getAttribute('ke:indent'));

                // We're moving out of the current list, cleaning up.
                if (listItemIndent !== lastIndent) {
                    previousListType = previousListStyleType = null;
                }

                // List type and item style are already resolved.
                if (!bullet) {
                    listType = listItem.getAttribute('ke:listtype') || 'ol';
                    listStyleType = listItem.getAttribute('ke:list-style-type');
                }
                else {
                    // Probably share the same list style type with previous list item,
                    // give it priority to avoid ambiguous between C(Alpha) and C.(Roman).
                    if (previousListType &&
                        listMarkerPatterns[ previousListType ] [ previousListStyleType ].test(bullet[ 1 ])) {
                        listType = previousListType;
                        listStyleType = previousListStyleType;
                    }
                    else {
                        for (var type in listMarkerPatterns) {

                            for (var style in listMarkerPatterns[ type ]) {

                                if (listMarkerPatterns[ type ][ style ].test(bullet[ 1 ])) {
                                    // Small numbering has higher priority, when dealing with ambiguous
                                    // between C(Alpha) and C.(Roman).
                                    if (type === 'ol' && ( /alpha|roman/ ).test(style)) {
                                        var num = /roman/.test(style) ? fromRoman(bullet[ 1 ]) : fromAlphabet(bullet[ 1 ]);
                                        if (!itemNumeric || num < itemNumeric) {
                                            itemNumeric = num;
                                            listType = type;
                                            listStyleType = style;
                                        }
                                    }
                                    else {
                                        listType = type;
                                        listStyleType = style;
                                        break;
                                    }
                                }
                            }
                        }
                    }

                    // Simply use decimal/disc for the rest forms of unrepresentable
                    // numerals, e.g. Chinese..., but as long as there a second part
                    // included, it has a bigger chance of being a order list ;)
                    if (!listType) {
                        ( listType = bullet[ 2 ] ? 'ol' : 'ul' );
                    }
                }

                previousListType = listType;
                previousListStyleType = listStyleType || ( listType === 'ol' ? 'decimal' : 'disc' );
                if (listStyleType && listStyleType !== ( listType === 'ol' ? 'decimal' : 'disc' )) {
                    addStyle(listItem, 'list-style-type', listStyleType);
                }

                // Figure out start numbering.
                if (listType === 'ol' && bullet) {
                    switch (listStyleType) {
                        case 'decimal' :
                            itemNumeric = Number(bullet[ 1 ]);
                            break;
                        case 'lower-roman':
                        case 'upper-roman':
                            itemNumeric = fromRoman(bullet[ 1 ]);
                            break;
                        case 'lower-alpha':
                        case 'upper-alpha':
                            itemNumeric = fromAlphabet(bullet[ 1 ]);
                            break;
                    }

                    // Always create the numbering, swipe out unnecessary ones later.
                    listItem.setAttribute('value', itemNumeric);
                }

                // Start the list construction.
                if (!list) {
                    openedLists.push(list = new HtmlParser.Tag(listType));
                    list.appendChild(listItem);
                    element.replaceChild(list, children[i]);
                } else {
                    if (listItemIndent > lastIndent) {
                        openedLists.push(list = new HtmlParser.Tag(listType));
                        list.appendChild(listItem);
                        lastListItem.appendChild(list);
                    }
                    else if (listItemIndent < lastIndent) {
                        // There might be a negative gap between two list levels. (#4944)
                        var diff = lastIndent - listItemIndent,
                            parent;
                        while (diff-- && ( parent = list.parentNode )) {
                            list = parent.parentNode;
                        }
                        list.appendChild(listItem);
                    }
                    else {
                        list.appendChild(listItem);
                    }
                    children.splice(i--, 1);
                }

                lastListItem = listItem;
                lastIndent = listItemIndent;
            }
            else if (list && !(child.nodeType === 3 && !util.trim(child.nodeValue))) {
                list = lastIndent = lastListItem = null;
            }
        }

        for (i = 0; i < openedLists.length; i++) {
            postProcessList(openedLists[ i ]);
        }
    },

    /*
     A simple filter which always rejecting.
     */
    falsyFilter: function () {
        return false;
    },

    /*
     A filter dedicated on the 'style' attribute filtering, e.g. dropping/replacing style properties.
     @param styles {Array} in form of [ styleNameRegexp, styleValueRegexp,
     newStyleValue/newStyleGenerator, newStyleName ] where only the first
     parameter is mandatory.
     @param [whitelist] {Boolean} Whether the {@param styles} will be considered as a white-list.
     */
    stylesFilter: function (styles, whitelist) {
        return function (styleText, element) {
            var rules = [];
            // html-encoded quote might be introduced by 'font-family'
            // from MS-Word which confused the following regexp. e.g.
            //'font-family: &quot;Lucida, Console&quot;'
            ( styleText || '' )
                .replace(/&quot;/g, '"')
                .replace(/\s*([^ :;]+)\s*:\s*([^;]+)\s*(?=;|$)/g,
                function (match, name, value) {
                    name = name.toLowerCase();
                    if (name === 'font-family') {
                        ( value = value.replace(/['']/g, '') );
                    }

                    var namePattern,
                        valuePattern,
                        newValue,
                        newName;
                    for (var i = 0; i < styles.length; i++) {
                        if (styles[ i ]) {
                            namePattern = styles[ i ][ 0 ];
                            valuePattern = styles[ i ][ 1 ];
                            newValue = styles[ i ][ 2 ];
                            newName = styles[ i ][ 3 ];

                            if (name.match(namePattern) && ( !valuePattern || value.match(valuePattern) )) {
                                name = newName || name;
                                if (whitelist) {
                                    ( newValue = newValue || value );
                                }

                                if (typeof newValue === 'function') {
                                    newValue = newValue(value, element, name);
                                }

                                // Return an couple indicate both name and value
                                // changed.
                                if (newValue && newValue.push) {
                                    name = newValue[ 0 ];
                                    newValue = newValue[ 1 ];
                                }

                                if (typeof newValue === 'string') {
                                    rules.push([ name, newValue ]);
                                }

                                return;
                            }
                        }
                    }

                    if (!whitelist) {
                        rules.push([ name, value ]);
                    }

                });

            for (var i = 0; i < rules.length; i++) {
                rules[ i ] = rules[ i ].join(':');
            }

            return rules.length ? ( rules.join(';') + ';' ) : false;
        };
    },

    /*
     A filter which will be used to apply inline css style according the stylesheet
     definition rules, is generated lazily when filtering.
     */
    applyStyleFilter: null

};


// 1. move consistent list item styles up to list root.
// 2. clear out unnecessary list item numbering.
function postProcessList(list) {
    var children = list.childNodes || [],
        child,
        count = children.length,
        match,
        mergeStyle,
        styleTypeRegexp = /list-style-type:(.*?)(?:;|$)/,
        stylesFilter = filters.stylesFilter;


    if (styleTypeRegexp.exec(list.getAttribute('style'))) {
        return;
    }

    for (var i = 0; i < count; i++) {
        child = children[ i ];

        if (child.getAttribute('value') && Number(child.getAttribute('value')) === i + 1) {
            child.removeAttribute('value');
        }

        match = styleTypeRegexp.exec(child.getAttribute('style'));

        if (match) {
            if (match[ 1 ] === mergeStyle || !mergeStyle) {
                mergeStyle = match[ 1 ];
            }
            else {
                mergeStyle = null;
                break;
            }
        }
    }

    if (mergeStyle) {
        for (i = 0; i < count; i++) {
            var style = children[ i ].getAttribute('style');

            if (style) {
                style = stylesFilter([
                    [ 'list-style-type']
                ])(style);
                setStyle(children[ i ], style);
            }
        }
        addStyle(list, 'list-style-type', mergeStyle);
    }
}

var utils = {
    // Create a <ke:listbullet> which indicate an list item type.
    createListBulletMarker: function (bullet, bulletText) {
        var marker = new HtmlParser.Tag('ke:listbullet');
        marker.setAttribute('ke:listsymbol', bullet[ 0 ]);
        marker.appendChild(new HtmlParser.Text(bulletText));
        return marker;
    },

    isListBulletIndicator: function (element) {
        var styleText = element.getAttribute('style');
        if (/mso-list\s*:\s*Ignore/i.test(styleText)) {
            return true;
        }
    },

    isContainingOnlySpaces: function (element) {
        var text;
        return ( ( text = onlyChild(element) ) && ( /^(:?\s|&nbsp;)+$/ ).test(text.nodeValue) );
    },

    resolveList: function (element) {
        // <ke:listbullet> indicate a list item.
        var listMarker;

        if (( listMarker = removeAnyChildWithName(element, 'ke:listbullet') ) &&
            listMarker.length && ( listMarker = listMarker[ 0 ] )) {
            element.setTagName('ke:li');

            if (element.getAttribute('style')) {
                var styleStr = filters.stylesFilter(
                    [
                        // Text-indent is not representing list item level any more.
                        [ 'text-indent' ],
                        [ 'line-height' ],
                        // First attempt is to resolve indent level from on a constant margin increment.
                        [ ( /^margin(:?-left)?$/ ), null, function (margin) {
                            // Deal with component/short-hand form.
                            var values = margin.split(' ');
                            margin = convertToPx(values[ 3 ] || values[ 1 ] || values [ 0 ]);

                            // Figure out the indent unit by checking the first time of incrementation.
                            if (!listBaseIndent && previousListItemMargin !== null &&
                                margin > previousListItemMargin) {
                                listBaseIndent = margin - previousListItemMargin;
                            }

                            previousListItemMargin = margin;
                            if (listBaseIndent) {
                                element.setAttribute('ke:indent', listBaseIndent &&
                                    ( Math.ceil(margin / listBaseIndent) + 1 ) || 1);
                            }
                        } ],
                        // The best situation: 'mso-list:l0 level1 lfo2' tells the belonged list root, list item indentation, etc.
                        [ ( /^mso-list$/ ), null, function (val) {
                            val = val.split(' ');
                            var listId = Number(val[ 0 ].match(/\d+/)),
                                indent = Number(val[ 1 ].match(/\d+/));

                            if (indent === 1) {
                                if (listId !== previousListId) {
                                    ( element.setAttribute('ke:reset', 1) );
                                }

                                previousListId = listId;
                            }
                            element.setAttribute('ke:indent', indent);
                        } ]
                    ])(element.getAttribute('style'), element);

                setStyle(element, styleStr);
            }

            // First level list item might be presented without a margin.
            // In case all above doesn't apply.
            if (!element.getAttribute('ke:indent')) {
                previousListItemMargin = 0;
                element.setAttribute('ke:indent', 1);
            }

            util.each(listMarker.attributes, function (a) {
                element.setAttribute(a.name, a.value);
            });

            return true;
        }
        // Current list disconnected.
        else {
            previousListId = previousListItemMargin = listBaseIndent = null;
        }
        return false;
    },

    // Providing a shorthand style then retrieve one or more style component values.
    getStyleComponents: (function () {
        var calculator = $('<div style="position:absolute;left:-9999px;top:-9999px;"></div>').prependTo('body');

        return function (name, styleValue, fetchList) {
            calculator.css(name, styleValue);
            var styles = {},
                count = fetchList.length;
            for (var i = 0; i < count; i++) {
                styles[ fetchList[ i ] ] = calculator.css(fetchList[ i ]);
            }

            return styles;
        };
    })(),

    listDtdParents: parentOf('ol')
};

(function () {
    var blockLike = util.merge(dtd.$block, dtd.$listItem, dtd.$tableContent),
        falsyFilter = filters.falsyFilter,
        stylesFilter = filters.stylesFilter,
        createListBulletMarker = utils.createListBulletMarker,
        flattenList = filters.flattenList,
        assembleList = filters.assembleList,
        isListBulletIndicator = utils.isListBulletIndicator,
        containsNothingButSpaces = utils.isContainingOnlySpaces,
        resolveListItem = utils.resolveList,
        convertToPxStr = function (value) {
            value = convertToPx(value);
            return isNaN(value) ? value : value + 'px';
        },
        getStyleComponents = utils.getStyleComponents,
        listDtdParents = utils.listDtdParents;

    wordFilter.addRules({

        tagNames: [
            // Remove script, meta and link elements.
            [ ( /meta|link|script/ ), '' ]
        ],

        root: function (element) {
            element.filterChildren();
            assembleList(element);
        },

        tags: {
            '^': function (element) {
                // Transform CSS style declaration to inline style.
                var applyStyleFilter;
                if (UA.gecko && ( applyStyleFilter = filters.applyStyleFilter )) {
                    applyStyleFilter(element);
                }
            },

            $: function (element) {
                var tagName = element.nodeName || '';

                // Convert length unit of width/height on blocks to
                // a more editor-friendly way (px).
                if (tagName in blockLike && element.getAttribute('style')) {
                    setStyle(element, stylesFilter(
                        [
                            [ ( /^(:?width|height)$/ ), null, convertToPxStr ]
                        ])(element.getAttribute('style')));
                }

                // Processing headings.
                if (tagName.match(/h\d/)) {
                    element.filterChildren();
                    // Is the heading actually a list item?
                    if (resolveListItem(element)) {
                        return;
                    }
                }
                // Remove inline elements which contain only empty spaces.
                else if (tagName in dtd.$inline) {
                    element.filterChildren();
                    if (containsNothingButSpaces(element)) {
                        element.setTagName(null);
                    }
                }
                // Remove element with ms-office namespace,
                // with it's content preserved, e.g. 'o:p'.
                else if (tagName.indexOf(':') !== -1 && tagName.indexOf('ke') === -1) {
                    element.filterChildren();

                    // Restore image real link from vml.
                    if (tagName === 'v:imagedata') {
                        var href = element.getAttribute('o:href');
                        if (href) {
                            element.setAttribute('src', href);
                        }
                        element.setTagName('img');
                        return;
                    }
                    element.setTagName(null);
                }

                // Assembling list items into a whole list.
                if (tagName in listDtdParents) {
                    element.filterChildren();
                    assembleList(element);
                }
            },

            // We'll drop any style sheet, but Firefox conclude
            // certain styles in a single style element, which are
            // required to be changed into inline ones.
            'style': function (element) {
                if (UA.gecko) {
                    // Grab only the style definition section.
                    var styleDefSection = onlyChild(element).nodeValue
                            .match(/\/\* Style Definitions \*\/([\s\S]*?)\/\*/),
                        styleDefText = styleDefSection && styleDefSection[ 1 ],
                        rules = {}; // Storing the parsed result.

                    if (styleDefText) {
                        styleDefText
                            // Remove line-breaks.
                            .replace(/[\n\r]/g, '')
                            // Extract selectors and style properties.
                            .replace(/(.+?)\{(.+?)\}/g,
                            function (rule, selectors, styleBlock) {
                                selectors = selectors.split(',');
                                var length = selectors.length;
                                for (var i = 0; i < length; i++) {
                                    // Assume MS-Word mostly generate only simple
                                    // selector( [Type selector][Class selector]).
                                    util.trim(selectors[ i ])
                                        .replace(/^(\w+)(\.[\w-]+)?$/g,
                                        function (match, tagName, className) {
                                            tagName = tagName || '*';
                                            className = className.substring(1, className.length);

                                            // Reject MS-Word Normal styles.
                                            if (className.match(/MsoNormal/)) {
                                                return;
                                            }

                                            if (!rules[ tagName ]) {
                                                rules[ tagName ] = {};
                                            }
                                            if (className) {
                                                rules[ tagName ][ className ] = styleBlock;
                                            } else {
                                                rules[ tagName ] = styleBlock;
                                            }
                                        });
                                }
                            });

                        filters.applyStyleFilter = function (element) {
                            var name = rules[ '*' ] ? '*' : element.nodeName,
                                className = element.getAttribute('class'),
                                style;
                            if (name in rules) {
                                style = rules[ name ];
                                if (typeof style === 'object') {
                                    style = style[ className ];
                                }
                                // Maintain style rules priorities.
                                if (style) {
                                    addStyle(element, style, true);
                                }
                            }
                        };
                    }
                }
                return false;
            },

            'p': function (element) {
                // This's a fall-back approach to recognize list item in FF3.6,
                // as it's not perfect as not all list style (e.g. 'heading list') is shipped
                // with this pattern. (#6662)
                if (/MsoListParagraph/.exec(element.getAttribute('class'))) {
                    var bulletText = firstChild(element, function (node) {
                        return node.nodeType === 3 && !containsNothingButSpaces(node.parentNode);
                    });
                    var bullet = bulletText && bulletText.parentNode;
                    if (bullet && !bullet.getAttribute('style')) {
                        bullet.setAttribute('style', 'mso-list: Ignore;');
                    }
                }

                element.filterChildren();
                // Is the paragraph actually a list item?
                resolveListItem(element);
            },

            'div': function (element) {
                // Aligned table with no text surrounded is represented by a wrapper div, from which
                // table cells inherit as text-align styles, which is wrong.
                // Instead we use a clear-float div after the table to properly achieve the same layout.
                var singleChild = onlyChild(element);
                if (singleChild && singleChild.nodeName === 'table') {
                    var attrs = element.attributes;

                    util.each(attrs, function (attr) {
                        singleChild.setAttribute(attr.name, attr.value);
                    });

                    if (element.getAttribute('style')) {
                        addStyle(singleChild, element.getAttribute('style'));
                    }

                    var clearFloatDiv = new HtmlParser.Tag('div');
                    addStyle(clearFloatDiv, 'clear', 'both');
                    element.appendChild(clearFloatDiv);
                    element.setTagName(null);
                }
            },

            'td': function (element) {
                // 'td' in 'thead' is actually <th>.
                if (getAncestor(element, 'thead')) {
                    element.setTagName('th');
                }
            },

            // MS-Word sometimes present list as a mixing of normal list
            // and pseudo-list, normalize the previous ones into pseudo form.
            'ol': flattenList,
            'ul': flattenList,
            'dl': flattenList,

            'font': function (element) {
                // Drop the font tag if it comes from list bullet text.
                if (isListBulletIndicator(element.parentNode)) {
                    element.setTagName(null);
                    return;
                }

                element.filterChildren();

                var styleText = element.getAttribute('style'),
                    parent = element.parentNode;

                if ('font' === parent.name)     // Merge nested <font> tags.
                {
                    util.each(element.attributes, function (attr) {
                        parent.setAttribute(attr.name, attr.value);
                    });
                    if (styleText) {
                        addStyle(parent, styleText);
                    }
                    element.setTagName(null);
                }
                // Convert the merged into a span with all attributes preserved.
                else {
                    styleText = styleText || '';
                    // IE's having those deprecated attributes, normalize them.
                    if (element.getAttribute('color')) {
                        if (element.getAttribute('color') !== '#000000') {
                            styleText += 'color:' + element.getAttribute('color') + ';';
                        }
                        element.removeAttribute('color');
                    }
                    if (element.getAttribute('face')) {
                        styleText += 'font-family:' + element.getAttribute('face') + ';';
                        element.removeAttribute('face');
                    }
                    var size = element.getAttribute('size');
                    // TODO: Mapping size in ranges of xx-small,
                    // x-small, small, medium, large, x-large, xx-large.
                    if (size) {
                        styleText += 'font-size:' +
                            (size > 3 ? 'large'
                                : ( size < 3 ? 'small' : 'medium' ) ) + ';';
                        element.removeAttribute('size');
                    }
                    element.setTagName('span');
                    addStyle(element, styleText);
                }
            },

            'span': function (element) {
                // Remove the span if it comes from list bullet text.
                if (isListBulletIndicator(element.parentNode)) {
                    return false;
                }
                element.filterChildren();
                if (containsNothingButSpaces(element)) {
                    element.setTagName(null);
                    return null;
                }

                // List item bullet type is supposed to be indicated by
                // the text of a span with style 'mso-list : Ignore' or an image.
                if (isListBulletIndicator(element)) {
                    var listSymbolNode = firstChild(element, function (node) {
                        return node.nodeValue || node.nodeName === 'img';
                    });

                    var listSymbol = listSymbolNode && ( listSymbolNode.nodeValue || 'l.' ),
                        listType = listSymbol && listSymbol.match(/^(?:[(]?)([^\s]+?)([.)]?)$/);

                    if (listType) {
                        var marker = createListBulletMarker(listType, listSymbol);
                        // Some non-existed list items might be carried by an inconsequential list,
                        // indicate by 'mso-hide:all/display:none',
                        // those are to be removed later, now mark it with 'ke:ignored'.
                        var ancestor = getAncestor(element, 'span');
                        if (ancestor && (/ mso-hide:\s*all|display:\s*none /).
                            test(ancestor.getAttribute('style'))) {
                            marker.setAttribute('ke:ignored', 1);
                        }
                        return marker;
                    }
                }

                // Update the src attribute of image element with href.
                var styleText = element.getAttribute('style');

                // Assume MS-Word mostly carry font related styles on <span>,
                // adapting them to editor's convention.
                if (styleText) {

                    setStyle(element, stylesFilter(
                        [
                            // Drop 'inline-height' style which make lines overlapping.
                            [ /^line-height$/ ],
                            [  /^font-family$/  ] ,
                            [  /^font-size$/  ] ,
                            [  /^color$/  ] ,
                            [  /^background-color$/  ]
                        ]
                    )(styleText, element));
                }
            },
            // Editor doesn't support anchor with content currently (#3582),
            // drop such anchors with content preserved.
            'a': function (element) {
                var href;
                if (!(href = element.getAttribute('href')) && element.getAttribute('name')) {
                    element.setTagName(null);
                } else if (UA.webkit && href && href.match(/file:\/\/\/[\S]+#/i)) {
                    element.setAttribute('href', href.replace(/file:\/\/\/[^#]+/i, ''));
                }
            },
            'ke:listbullet': function (element) {
                if (getAncestor(element, /h\d/)) {
                    element.setTagName(null);
                }
            }
        },

        attributeNames: [
            // Remove onmouseover and onmouseout events (from MS Word comments effect)
            [ ( /^onmouse(:?out|over)/ ), '' ],
            // Onload on image element.
            [ ( /^onload$/ ), '' ],
            // Remove office and vml attribute from elements.
            [ ( /(?:v|o):\w+/ ), '' ],
            // Remove lang/language attributes.
            [ ( /^lang/ ), '' ]
        ],

        attributes: {
            // Provide a white-list of styles that we preserve, those should
            // be the ones that could later be altered with editor tools.
            'style': stylesFilter([
                    [ ( /^list-style-type$/ ) ],

                    // Preserve margin-left/right which used as default indent style in the editor.
                    [ ( /^margin$|^margin-(?!bottom|top)/ ), null, function (value, element, name) {
                        if (element.nodeName in { p: 1, div: 1 }) {
                            var indentStyleName = 'margin-left';

                            // Extract component value from 'margin' shorthand.
                            if (name === 'margin') {
                                value = getStyleComponents(name, value,
                                    [ indentStyleName ])[ indentStyleName ];
                            } else if (name !== indentStyleName) {
                                return null;
                            }

                            if (value && !emptyMarginRegex.test(value)) {
                                return [ indentStyleName, value ];
                            }
                        }

                        return null;
                    } ],

                    // Preserve clear float style.
                    [ ( /^clear$/ ) ],

                    [ ( /^border.*|margin.*|vertical-align|float$/ ), null,
                        function (value, element) {
                            if (element.nodeName === 'img') {
                                return value;
                            }
                        } ],

                    [ (/^width|height$/ ), null,
                        function (value, element) {
                            if (element.nodeName in { table: 1, td: 1, th: 1, img: 1 }) {
                                return value;
                            }
                        } ]
                ],
                1),

            // Prefer width styles over 'width' attributes.
            'width': function (value, element) {
                if (element.nodeName in dtd.$tableContent) {
                    return false;
                }
            },
            // Prefer border styles over table 'border' attributes.
            'border': function (value, element) {
                if (element.nodeName in dtd.$tableContent) {
                    return false;
                }
            },

            // Only Firefox carry style sheet from MS-Word, which
            // will be applied by us manually. For other browsers
            // the css className is useless.
            'class': falsyFilter,

            // MS-Word always generate 'background-color' along with 'bgcolor',
            // simply drop the deprecated attributes.
            'bgcolor': falsyFilter,

            // Deprecate 'valign' attribute in favor of 'vertical-align'.
            'valign': function (value, element) {
                addStyle(element, 'vertical-align', value);
                return false;
            }
        },


        // Fore none-IE, some useful data might be buried under these IE-conditional
        // comments where RegExp were the right approach to dig them out where usual approach
        // is transform it into a fake element node which hold the desired data.
        comment: UA.ie ?
            function (value, node) {
                var imageInfo = value.match(/<img.*?>/),
                    listInfo = value.match(/^\[if !supportLists\]([\s\S]*?)\[endif\]$/);

                // Seek for list bullet indicator.
                if (listInfo) {
                    // Bullet symbol could be either text or an image.
                    var listSymbol = listInfo[ 1 ] || ( imageInfo && 'l.' ),
                        listType = listSymbol && listSymbol.match(/>(?:[(]?)([^\s]+?)([.)]?)</);
                    return createListBulletMarker(listType, listSymbol);
                }

                // Reveal the <img> element in conditional comments for Firefox.
                if (UA.gecko && imageInfo) {
                    var img = new HtmlParser.Parser(imageInfo[0]).parse().childNodes[ 0 ],
                        previousComment = node.previousSibling,
                    // Try to dig the real image link from vml markup from previous comment text.
                        imgSrcInfo = previousComment && previousComment.toHtml().match(/<v:imagedata[^>]*o:href=[''](.*?)['']/),
                        imgSrc = imgSrcInfo && imgSrcInfo[ 1 ];

                    // Is there a real 'src' url to be used?
                    if (imgSrc) {
                        ( img.setAttribute('src', imgSrc) );
                    }
                    return img;
                }

                return false;
            }
            : falsyFilter
    });
})();

module.exports = {
    toDataFormat: function (html, editor) {
        // Firefox will be confused by those downlevel-revealed IE conditional
        // comments, fixing them first( convert it to upperlevel-revealed one ).
        // e.g. <![if !vml]>...<![endif]>
        //<!--[if !supportLists]-->
        // <span style=\'font-family: Wingdings;\' lang=\'EN-US\'>
        // <span style=\'\'>l<span style=\'font: 7pt &quot;Times New Roman&quot;;\'>&nbsp;
        // </span></span></span>
        // <!--[endif]-->

        //变成:

        //<!--[if !supportLists]
        // <span style=\'font-family: Wingdings;\' lang=\'EN-US\'>
        // <span style=\'\'>l<span style=\'font: 7pt &quot;Times New Roman&quot;;\'>&nbsp;
        // </span></span></span>
        // [endif]-->
        if (UA.gecko) {
            html = html.replace(/(<!--\[if[^<]*?\])-->([\S\s]*?)<!--(\[endif\]-->)/gi,
                '$1$2$3');
        }

        // 针对 word 一次
        html = editor.htmlDataProcessor.toDataFormat(html, wordFilter);

        return html;
    }
};
});
Example #12
0
 (function() {
   var blockLike = S.merge(dtd.$block, dtd.$listItem, dtd.$tableContent), falsyFilter = filters.falsyFilter, stylesFilter = filters.stylesFilter, createListBulletMarker = utils.createListBulletMarker, flattenList = filters.flattenList, assembleList = filters.assembleList, isListBulletIndicator = utils.isListBulletIndicator, containsNothingButSpaces = utils.isContainingOnlySpaces, resolveListItem = utils.resolveList, convertToPxStr = function(value) {
     value = convertToPx(value);
     return isNaN(value) ? value : value + "px"
   }, getStyleComponents = utils.getStyleComponents, listDtdParents = utils.listDtdParents;
   wordFilter.addRules({tagNames:[[/meta|link|script/, ""]], root:function(element) {
     element.filterChildren();
     assembleList(element)
   }, tags:{"^":function(element) {
     var applyStyleFilter;
     if(UA.gecko && (applyStyleFilter = filters.applyStyleFilter)) {
       applyStyleFilter(element)
     }
   }, $:function(element) {
     var tagName = element.nodeName || "";
     if(tagName in blockLike && element.getAttribute("style")) {
       setStyle(element, stylesFilter([[/^(:?width|height)$/, null, convertToPxStr]])(element.getAttribute("style")))
     }
     if(tagName.match(/h\d/)) {
       element.filterChildren();
       if(resolveListItem(element)) {
         return
       }
     }else {
       if(tagName in dtd.$inline) {
         element.filterChildren();
         if(containsNothingButSpaces(element)) {
           element.setTagName(null)
         }
       }else {
         if(tagName.indexOf(":") !== -1 && tagName.indexOf("ke") === -1) {
           element.filterChildren();
           if(tagName === "v:imagedata") {
             var href = element.getAttribute("o:href");
             if(href) {
               element.setAttribute("src", href)
             }
             element.setTagName("img");
             return
           }
           element.setTagName(null)
         }
       }
     }
     if(tagName in listDtdParents) {
       element.filterChildren();
       assembleList(element)
     }
   }, style:function(element) {
     if(UA.gecko) {
       var styleDefSection = onlyChild(element).nodeValue.match(/\/\* Style Definitions \*\/([\s\S]*?)\/\*/), styleDefText = styleDefSection && styleDefSection[1], rules = {};
       if(styleDefText) {
         styleDefText.replace(/[\n\r]/g, "").replace(/(.+?)\{(.+?)\}/g, function(rule, selectors, styleBlock) {
           selectors = selectors.split(",");
           var length = selectors.length;
           for(var i = 0;i < length;i++) {
             S.trim(selectors[i]).replace(/^(\w+)(\.[\w-]+)?$/g, function(match, tagName, className) {
               tagName = tagName || "*";
               className = className.substring(1, className.length);
               if(className.match(/MsoNormal/)) {
                 return
               }
               if(!rules[tagName]) {
                 rules[tagName] = {}
               }
               if(className) {
                 rules[tagName][className] = styleBlock
               }else {
                 rules[tagName] = styleBlock
               }
             })
           }
         });
         filters.applyStyleFilter = function(element) {
           var name = rules["*"] ? "*" : element.nodeName, className = element.getAttribute("class"), style;
           if(name in rules) {
             style = rules[name];
             if(typeof style === "object") {
               style = style[className]
             }
             if(style) {
               addStyle(element, style, true)
             }
           }
         }
       }
     }
     return false
   }, p:function(element) {
     if(/MsoListParagraph/.exec(element.getAttribute("class"))) {
       var bulletText = firstChild(element, function(node) {
         return node.nodeType === 3 && !containsNothingButSpaces(node.parentNode)
       });
       var bullet = bulletText && bulletText.parentNode;
       if(bullet && !bullet.getAttribute("style")) {
         bullet.setAttribute("style", "mso-list: Ignore;")
       }
     }
     element.filterChildren();
     resolveListItem(element)
   }, div:function(element) {
     var singleChild = onlyChild(element);
     if(singleChild && singleChild.nodeName === "table") {
       var attrs = element.attributes;
       S.each(attrs, function(attr) {
         singleChild.setAttribute(attr.name, attr.value)
       });
       if(element.getAttribute("style")) {
         addStyle(singleChild, element.getAttribute("style"))
       }
       var clearFloatDiv = new HtmlParser.Tag("div");
       addStyle(clearFloatDiv, "clear", "both");
       element.appendChild(clearFloatDiv);
       element.setTagName(null)
     }
   }, td:function(element) {
     if(getAncestor(element, "thead")) {
       element.setTagName("th")
     }
   }, ol:flattenList, ul:flattenList, dl:flattenList, font:function(element) {
     if(isListBulletIndicator(element.parentNode)) {
       element.setTagName(null);
       return
     }
     element.filterChildren();
     var styleText = element.getAttribute("style"), parent = element.parentNode;
     if("font" === parent.name) {
       S.each(element.attributes, function(attr) {
         parent.setAttribute(attr.name, attr.value)
       });
       if(styleText) {
         addStyle(parent, styleText)
       }
       element.setTagName(null)
     }else {
       styleText = styleText || "";
       if(element.getAttribute("color")) {
         if(element.getAttribute("color") !== "#000000") {
           styleText += "color:" + element.getAttribute("color") + ";"
         }
         element.removeAttribute("color")
       }
       if(element.getAttribute("face")) {
         styleText += "font-family:" + element.getAttribute("face") + ";";
         element.removeAttribute("face")
       }
       var size = element.getAttribute("size");
       if(size) {
         styleText += "font-size:" + (size > 3 ? "large" : size < 3 ? "small" : "medium") + ";";
         element.removeAttribute("size")
       }
       element.setTagName("span");
       addStyle(element, styleText)
     }
   }, span:function(element) {
     if(isListBulletIndicator(element.parentNode)) {
       return false
     }
     element.filterChildren();
     if(containsNothingButSpaces(element)) {
       element.setTagName(null);
       return null
     }
     if(isListBulletIndicator(element)) {
       var listSymbolNode = firstChild(element, function(node) {
         return node.nodeValue || node.nodeName === "img"
       });
       var listSymbol = listSymbolNode && (listSymbolNode.nodeValue || "l."), listType = listSymbol && listSymbol.match(/^(?:[(]?)([^\s]+?)([.)]?)$/);
       if(listType) {
         var marker = createListBulletMarker(listType, listSymbol);
         var ancestor = getAncestor(element, "span");
         if(ancestor && / mso-hide:\s*all|display:\s*none /.test(ancestor.getAttribute("style"))) {
           marker.setAttribute("ke:ignored", 1)
         }
         return marker
       }
     }
     var styleText = element.getAttribute("style");
     if(styleText) {
       setStyle(element, stylesFilter([[/^line-height$/], [/^font-family$/], [/^font-size$/], [/^color$/], [/^background-color$/]])(styleText, element))
     }
   }, a:function(element) {
     var href;
     if(!(href = element.getAttribute("href")) && element.getAttribute("name")) {
       element.setTagName(null)
     }else {
       if(UA.webkit && href && href.match(/file:\/\/\/[\S]+#/i)) {
         element.setAttribute("href", href.replace(/file:\/\/\/[^#]+/i, ""))
       }
     }
   }, "ke:listbullet":function(element) {
     if(getAncestor(element, /h\d/)) {
       element.setTagName(null)
     }
   }}, attributeNames:[[/^onmouse(:?out|over)/, ""], [/^onload$/, ""], [/(?:v|o):\w+/, ""], [/^lang/, ""]], attributes:{style:stylesFilter([[/^list-style-type$/], [/^margin$|^margin-(?!bottom|top)/, null, function(value, element, name) {
     if(element.nodeName in {p:1, div:1}) {
       var indentStyleName = "margin-left";
       if(name === "margin") {
         value = getStyleComponents(name, value, [indentStyleName])[indentStyleName]
       }else {
         if(name !== indentStyleName) {
           return null
         }
       }
       if(value && !emptyMarginRegex.test(value)) {
         return[indentStyleName, value]
       }
     }
     return null
   }], [/^clear$/], [/^border.*|margin.*|vertical-align|float$/, null, function(value, element) {
     if(element.nodeName === "img") {
       return value
     }
   }], [/^width|height$/, null, function(value, element) {
     if(element.nodeName in {table:1, td:1, th:1, img:1}) {
       return value
     }
   }]], 1), width:function(value, element) {
     if(element.nodeName in dtd.$tableContent) {
       return false
     }
   }, border:function(value, element) {
     if(element.nodeName in dtd.$tableContent) {
       return false
     }
   }, "class":falsyFilter, bgcolor:falsyFilter, valign:function(value, element) {
     addStyle(element, "vertical-align", value);
     return false
   }}, comment:UA.ie ? function(value, node) {
     var imageInfo = value.match(/<img.*?>/), listInfo = value.match(/^\[if !supportLists\]([\s\S]*?)\[endif\]$/);
     if(listInfo) {
       var listSymbol = listInfo[1] || imageInfo && "l.", listType = listSymbol && listSymbol.match(/>(?:[(]?)([^\s]+?)([.)]?)</);
       return createListBulletMarker(listType, listSymbol)
     }
     if(UA.gecko && imageInfo) {
       var img = (new HtmlParser.Parser(imageInfo[0])).parse().childNodes[0], previousComment = node.previousSibling, imgSrcInfo = previousComment && previousComment.toHtml().match(/<v:imagedata[^>]*o:href=[''](.*?)['']/), imgSrc = imgSrcInfo && imgSrcInfo[1];
       if(imgSrc) {
         img.setAttribute("src", imgSrc)
       }
       return img
     }
     return false
   } : falsyFilter})
 })();
Example #13
0
KISSY.add("editor/plugin/word-filter", ["html-parser"], function(S, require) {
  var HtmlParser = require("html-parser");
  var $ = S.all, UA = S.UA, dtd = HtmlParser.DTD, wordFilter = new HtmlParser.Filter, cssLengthRelativeUnit = /^([.\d]*)+(em|ex|px|gd|rem|vw|vh|vm|ch|mm|cm|in|pt|pc|deg|rad|ms|s|hz|khz){1}?/i, emptyMarginRegex = /^(?:\b0[^\s]*\s*){1,4}$/, romanLiteralPattern = "^m{0,4}(cm|cd|d?c{0,3})(xc|xl|l?x{0,3})(ix|iv|v?i{0,3})$", lowerRomanLiteralRegex = new RegExp(romanLiteralPattern), upperRomanLiteralRegex = new RegExp(romanLiteralPattern.toUpperCase()), orderedPatterns = {decimal:/\d+/, "lower-roman":lowerRomanLiteralRegex, 
  "upper-roman":upperRomanLiteralRegex, "lower-alpha":/^[a-z]+$/, "upper-alpha":/^[A-Z]+$/}, unorderedPatterns = {disc:/[l\u00B7\u2002]/, circle:/[\u006F\u00D8]/, square:/[\u006E\u25C6]/}, listMarkerPatterns = {ol:orderedPatterns, ul:unorderedPatterns}, romans = [[1E3, "M"], [900, "CM"], [500, "D"], [400, "CD"], [100, "C"], [90, "XC"], [50, "L"], [40, "XL"], [10, "X"], [9, "IX"], [5, "V"], [4, "IV"], [1, "I"]], alphabets = "ABCDEFGHIJKLMNOPQRSTUVWXYZ";
  function fromRoman(str) {
    str = str.toUpperCase();
    var l = romans.length, retVal = 0;
    for(var i = 0;i < l;++i) {
      for(var j = romans[i], k = j[1].length;str.substr(0, k) === j[1];str = str.substr(k)) {
        retVal += j[0]
      }
    }
    return retVal
  }
  function fromAlphabet(str) {
    str = str.toUpperCase();
    var l = alphabets.length, retVal = 1;
    for(var x = 1;str.length > 0;x *= l) {
      retVal += alphabets.indexOf(str.charAt(str.length - 1)) * x;
      str = str.substr(0, str.length - 1)
    }
    return retVal
  }
  function setStyle(element, str) {
    if(str) {
      element.setAttribute("style", str)
    }else {
      element.removeAttribute("style")
    }
  }
  var convertToPx = function() {
    var calculator;
    return function(cssLength) {
      if(!calculator) {
        calculator = $('<div style="position:absolute;left:-9999px;' + 'top:-9999px;margin:0px;padding:0px;border:0px;"' + "></div>").prependTo("body")
      }
      if(!/%$/.test(cssLength)) {
        calculator.css("width", cssLength);
        return calculator[0].clientWidth
      }
      return cssLength
    }
  }();
  var listBaseIndent = 0, previousListItemMargin = null, previousListId;
  function onlyChild(elem) {
    var childNodes = elem.childNodes || [], count = childNodes.length, firstChild = count === 1 && childNodes[0];
    return firstChild || null
  }
  function removeAnyChildWithName(elem, tagName) {
    var children = elem.childNodes || [], ret = [], child;
    for(var i = 0;i < children.length;i++) {
      child = children[i];
      if(!child.nodeName) {
        continue
      }
      if(child.nodeName === tagName) {
        ret.push(child);
        children.splice(i--, 1)
      }
      ret = ret.concat(removeAnyChildWithName(child, tagName))
    }
    return ret
  }
  function getAncestor(elem, tagNameRegex) {
    var parent = elem.parentNode;
    while(parent && !(parent.nodeName && parent.nodeName.match(tagNameRegex))) {
      parent = parent.parentNode
    }
    return parent
  }
  function firstChild(elem, evaluator) {
    var child, i, children = elem.childNodes || [];
    for(i = 0;i < children.length;i++) {
      child = children[i];
      if(evaluator(child)) {
        return child
      }else {
        if(child.nodeName) {
          child = firstChild(child, evaluator);
          if(child) {
            return child
          }
        }
      }
    }
    return null
  }
  function addStyle(elem, name, value, isPrepend) {
    var styleText, addingStyleText = "", style;
    if(typeof value === "string") {
      addingStyleText += name + ":" + value + ";"
    }else {
      if(typeof name === "object") {
        for(style in name) {
          addingStyleText += style + ":" + name[style] + ";"
        }
      }else {
        addingStyleText += name
      }
      isPrepend = value
    }
    styleText = elem.getAttribute("style");
    styleText = (isPrepend ? [addingStyleText, styleText] : [styleText, addingStyleText]).join(";");
    setStyle(elem, styleText.replace(/^;|;(?=;)/, ""))
  }
  function parentOf(tagName) {
    var result = {}, tag;
    for(tag in dtd) {
      if(tag.indexOf("$") === -1 && dtd[tag][tagName]) {
        result[tag] = 1
      }
    }
    return result
  }
  var filters = {flattenList:function(element, level) {
    level = typeof level === "number" ? level : 1;
    var listStyleType;
    switch(element.getAttribute("type")) {
      case "a":
        listStyleType = "lower-alpha";
        break;
      case "1":
        listStyleType = "decimal";
        break
    }
    var children = element.childNodes || [], child;
    for(var i = 0;i < children.length;i++) {
      child = children[i];
      if(child.nodeName in dtd.$listItem) {
        var listItemChildren = child.childNodes || [], count = listItemChildren.length, last = listItemChildren[count - 1];
        if(last.nodeName in dtd.$list) {
          element.insertAfter(child);
          if(!--listItemChildren.length) {
            element.removeChild(children[i--])
          }
        }
        child.setTagName("ke:li");
        if(element.getAttribute("start") && !i) {
          element.setAttribute("value", element.getAttribute("start"))
        }
        filters.stylesFilter([["tab-stops", null, function(val) {
          var margin = val.split(" ")[1].match(cssLengthRelativeUnit);
          if(margin) {
            previousListItemMargin = convertToPx(margin[0])
          }
        }], level === 1 ? ["mso-list", null, function(val) {
          val = val.split(" ");
          var listId = Number(val[0].match(/\d+/));
          if(listId !== previousListId) {
            child.setAttribute("ke:reset", 1)
          }
          previousListId = listId
        }] : null])(child.getAttribute("style"));
        child.setAttribute("ke:indent", level);
        child.setAttribute("ke:listtype", element.nodeName);
        child.setAttribute("ke:list-style-type", listStyleType)
      }else {
        if(child.nodeName in dtd.$list) {
          arguments.callee.apply(this, [child, level + 1]);
          children = children.slice(0, i).concat(child.childNodes).concat(children.slice(i + 1));
          element.empty();
          for(var j = 0, num = children.length;j < num;j++) {
            element.appendChild(children[j])
          }
        }
      }
    }
    element.nodeName = element.tagName = null;
    element.setAttribute("ke:list", 1)
  }, assembleList:function(element) {
    var children = element.childNodes || [], child, listItem, listItemIndent, lastIndent, lastListItem, list, openedLists = [], previousListStyleType, previousListType;
    var bullet, listType, listStyleType, itemNumeric;
    for(var i = 0;i < children.length;i++) {
      child = children[i];
      if("ke:li" === child.nodeName) {
        child.setTagName("li");
        listItem = child;
        bullet = listItem.getAttribute("ke:listsymbol");
        bullet = bullet && bullet.match(/^(?:[(]?)([^\s]+?)([.)]?)$/);
        listType = listStyleType = itemNumeric = null;
        if(listItem.getAttribute("ke:ignored")) {
          children.splice(i--, 1);
          continue
        }
        if(listItem.getAttribute("ke:reset")) {
          list = lastIndent = lastListItem = null
        }
        listItemIndent = Number(listItem.getAttribute("ke:indent"));
        if(listItemIndent !== lastIndent) {
          previousListType = previousListStyleType = null
        }
        if(!bullet) {
          listType = listItem.getAttribute("ke:listtype") || "ol";
          listStyleType = listItem.getAttribute("ke:list-style-type")
        }else {
          if(previousListType && listMarkerPatterns[previousListType][previousListStyleType].test(bullet[1])) {
            listType = previousListType;
            listStyleType = previousListStyleType
          }else {
            for(var type in listMarkerPatterns) {
              for(var style in listMarkerPatterns[type]) {
                if(listMarkerPatterns[type][style].test(bullet[1])) {
                  if(type === "ol" && /alpha|roman/.test(style)) {
                    var num = /roman/.test(style) ? fromRoman(bullet[1]) : fromAlphabet(bullet[1]);
                    if(!itemNumeric || num < itemNumeric) {
                      itemNumeric = num;
                      listType = type;
                      listStyleType = style
                    }
                  }else {
                    listType = type;
                    listStyleType = style;
                    break
                  }
                }
              }
            }
          }
          if(!listType) {
            listType = bullet[2] ? "ol" : "ul"
          }
        }
        previousListType = listType;
        previousListStyleType = listStyleType || (listType === "ol" ? "decimal" : "disc");
        if(listStyleType && listStyleType !== (listType === "ol" ? "decimal" : "disc")) {
          addStyle(listItem, "list-style-type", listStyleType)
        }
        if(listType === "ol" && bullet) {
          switch(listStyleType) {
            case "decimal":
              itemNumeric = Number(bullet[1]);
              break;
            case "lower-roman":
            ;
            case "upper-roman":
              itemNumeric = fromRoman(bullet[1]);
              break;
            case "lower-alpha":
            ;
            case "upper-alpha":
              itemNumeric = fromAlphabet(bullet[1]);
              break
          }
          listItem.setAttribute("value", itemNumeric)
        }
        if(!list) {
          openedLists.push(list = new HtmlParser.Tag(listType));
          list.appendChild(listItem);
          element.replaceChild(list, children[i])
        }else {
          if(listItemIndent > lastIndent) {
            openedLists.push(list = new HtmlParser.Tag(listType));
            list.appendChild(listItem);
            lastListItem.appendChild(list)
          }else {
            if(listItemIndent < lastIndent) {
              var diff = lastIndent - listItemIndent, parent;
              while(diff-- && (parent = list.parentNode)) {
                list = parent.parentNode
              }
              list.appendChild(listItem)
            }else {
              list.appendChild(listItem)
            }
          }
          children.splice(i--, 1)
        }
        lastListItem = listItem;
        lastIndent = listItemIndent
      }else {
        if(list && !(child.nodeType === 3 && !S.trim(child.nodeValue))) {
          list = lastIndent = lastListItem = null
        }
      }
    }
    for(i = 0;i < openedLists.length;i++) {
      postProcessList(openedLists[i])
    }
  }, falsyFilter:function() {
    return false
  }, stylesFilter:function(styles, whitelist) {
    return function(styleText, element) {
      var rules = [];
      (styleText || "").replace(/&quot;/g, '"').replace(/\s*([^ :;]+)\s*:\s*([^;]+)\s*(?=;|$)/g, function(match, name, value) {
        name = name.toLowerCase();
        if(name === "font-family") {
          value = value.replace(/['']/g, "")
        }
        var namePattern, valuePattern, newValue, newName;
        for(var i = 0;i < styles.length;i++) {
          if(styles[i]) {
            namePattern = styles[i][0];
            valuePattern = styles[i][1];
            newValue = styles[i][2];
            newName = styles[i][3];
            if(name.match(namePattern) && (!valuePattern || value.match(valuePattern))) {
              name = newName || name;
              if(whitelist) {
                newValue = newValue || value
              }
              if(typeof newValue === "function") {
                newValue = newValue(value, element, name)
              }
              if(newValue && newValue.push) {
                name = newValue[0];
                newValue = newValue[1]
              }
              if(typeof newValue === "string") {
                rules.push([name, newValue])
              }
              return
            }
          }
        }
        if(!whitelist) {
          rules.push([name, value])
        }
      });
      for(var i = 0;i < rules.length;i++) {
        rules[i] = rules[i].join(":")
      }
      return rules.length ? rules.join(";") + ";" : false
    }
  }, applyStyleFilter:null};
  function postProcessList(list) {
    var children = list.childNodes || [], child, count = children.length, match, mergeStyle, styleTypeRegexp = /list-style-type:(.*?)(?:;|$)/, stylesFilter = filters.stylesFilter;
    if(styleTypeRegexp.exec(list.getAttribute("style"))) {
      return
    }
    for(var i = 0;i < count;i++) {
      child = children[i];
      if(child.getAttribute("value") && Number(child.getAttribute("value")) === i + 1) {
        child.removeAttribute("value")
      }
      match = styleTypeRegexp.exec(child.getAttribute("style"));
      if(match) {
        if(match[1] === mergeStyle || !mergeStyle) {
          mergeStyle = match[1]
        }else {
          mergeStyle = null;
          break
        }
      }
    }
    if(mergeStyle) {
      for(i = 0;i < count;i++) {
        var style = children[i].getAttribute("style");
        if(style) {
          style = stylesFilter([["list-style-type"]])(style);
          setStyle(children[i], style)
        }
      }
      addStyle(list, "list-style-type", mergeStyle)
    }
  }
  var utils = {createListBulletMarker:function(bullet, bulletText) {
    var marker = new HtmlParser.Tag("ke:listbullet");
    marker.setAttribute("ke:listsymbol", bullet[0]);
    marker.appendChild(new HtmlParser.Text(bulletText));
    return marker
  }, isListBulletIndicator:function(element) {
    var styleText = element.getAttribute("style");
    if(/mso-list\s*:\s*Ignore/i.test(styleText)) {
      return true
    }
  }, isContainingOnlySpaces:function(element) {
    var text;
    return(text = onlyChild(element)) && /^(:?\s|&nbsp;)+$/.test(text.nodeValue)
  }, resolveList:function(element) {
    var listMarker;
    if((listMarker = removeAnyChildWithName(element, "ke:listbullet")) && listMarker.length && (listMarker = listMarker[0])) {
      element.setTagName("ke:li");
      if(element.getAttribute("style")) {
        var styleStr = filters.stylesFilter([["text-indent"], ["line-height"], [/^margin(:?-left)?$/, null, function(margin) {
          var values = margin.split(" ");
          margin = convertToPx(values[3] || values[1] || values[0]);
          if(!listBaseIndent && previousListItemMargin !== null && margin > previousListItemMargin) {
            listBaseIndent = margin - previousListItemMargin
          }
          previousListItemMargin = margin;
          if(listBaseIndent) {
            element.setAttribute("ke:indent", listBaseIndent && Math.ceil(margin / listBaseIndent) + 1 || 1)
          }
        }], [/^mso-list$/, null, function(val) {
          val = val.split(" ");
          var listId = Number(val[0].match(/\d+/)), indent = Number(val[1].match(/\d+/));
          if(indent === 1) {
            if(listId !== previousListId) {
              element.setAttribute("ke:reset", 1)
            }
            previousListId = listId
          }
          element.setAttribute("ke:indent", indent)
        }]])(element.getAttribute("style"), element);
        setStyle(element, styleStr)
      }
      if(!element.getAttribute("ke:indent")) {
        previousListItemMargin = 0;
        element.setAttribute("ke:indent", 1)
      }
      S.each(listMarker.attributes, function(a) {
        element.setAttribute(a.name, a.value)
      });
      return true
    }else {
      previousListId = previousListItemMargin = listBaseIndent = null
    }
    return false
  }, getStyleComponents:function() {
    var calculator = $('<div style="position:absolute;left:-9999px;top:-9999px;"></div>').prependTo("body");
    return function(name, styleValue, fetchList) {
      calculator.css(name, styleValue);
      var styles = {}, count = fetchList.length;
      for(var i = 0;i < count;i++) {
        styles[fetchList[i]] = calculator.css(fetchList[i])
      }
      return styles
    }
  }(), listDtdParents:parentOf("ol")};
  (function() {
    var blockLike = S.merge(dtd.$block, dtd.$listItem, dtd.$tableContent), falsyFilter = filters.falsyFilter, stylesFilter = filters.stylesFilter, createListBulletMarker = utils.createListBulletMarker, flattenList = filters.flattenList, assembleList = filters.assembleList, isListBulletIndicator = utils.isListBulletIndicator, containsNothingButSpaces = utils.isContainingOnlySpaces, resolveListItem = utils.resolveList, convertToPxStr = function(value) {
      value = convertToPx(value);
      return isNaN(value) ? value : value + "px"
    }, getStyleComponents = utils.getStyleComponents, listDtdParents = utils.listDtdParents;
    wordFilter.addRules({tagNames:[[/meta|link|script/, ""]], root:function(element) {
      element.filterChildren();
      assembleList(element)
    }, tags:{"^":function(element) {
      var applyStyleFilter;
      if(UA.gecko && (applyStyleFilter = filters.applyStyleFilter)) {
        applyStyleFilter(element)
      }
    }, $:function(element) {
      var tagName = element.nodeName || "";
      if(tagName in blockLike && element.getAttribute("style")) {
        setStyle(element, stylesFilter([[/^(:?width|height)$/, null, convertToPxStr]])(element.getAttribute("style")))
      }
      if(tagName.match(/h\d/)) {
        element.filterChildren();
        if(resolveListItem(element)) {
          return
        }
      }else {
        if(tagName in dtd.$inline) {
          element.filterChildren();
          if(containsNothingButSpaces(element)) {
            element.setTagName(null)
          }
        }else {
          if(tagName.indexOf(":") !== -1 && tagName.indexOf("ke") === -1) {
            element.filterChildren();
            if(tagName === "v:imagedata") {
              var href = element.getAttribute("o:href");
              if(href) {
                element.setAttribute("src", href)
              }
              element.setTagName("img");
              return
            }
            element.setTagName(null)
          }
        }
      }
      if(tagName in listDtdParents) {
        element.filterChildren();
        assembleList(element)
      }
    }, style:function(element) {
      if(UA.gecko) {
        var styleDefSection = onlyChild(element).nodeValue.match(/\/\* Style Definitions \*\/([\s\S]*?)\/\*/), styleDefText = styleDefSection && styleDefSection[1], rules = {};
        if(styleDefText) {
          styleDefText.replace(/[\n\r]/g, "").replace(/(.+?)\{(.+?)\}/g, function(rule, selectors, styleBlock) {
            selectors = selectors.split(",");
            var length = selectors.length;
            for(var i = 0;i < length;i++) {
              S.trim(selectors[i]).replace(/^(\w+)(\.[\w-]+)?$/g, function(match, tagName, className) {
                tagName = tagName || "*";
                className = className.substring(1, className.length);
                if(className.match(/MsoNormal/)) {
                  return
                }
                if(!rules[tagName]) {
                  rules[tagName] = {}
                }
                if(className) {
                  rules[tagName][className] = styleBlock
                }else {
                  rules[tagName] = styleBlock
                }
              })
            }
          });
          filters.applyStyleFilter = function(element) {
            var name = rules["*"] ? "*" : element.nodeName, className = element.getAttribute("class"), style;
            if(name in rules) {
              style = rules[name];
              if(typeof style === "object") {
                style = style[className]
              }
              if(style) {
                addStyle(element, style, true)
              }
            }
          }
        }
      }
      return false
    }, p:function(element) {
      if(/MsoListParagraph/.exec(element.getAttribute("class"))) {
        var bulletText = firstChild(element, function(node) {
          return node.nodeType === 3 && !containsNothingButSpaces(node.parentNode)
        });
        var bullet = bulletText && bulletText.parentNode;
        if(bullet && !bullet.getAttribute("style")) {
          bullet.setAttribute("style", "mso-list: Ignore;")
        }
      }
      element.filterChildren();
      resolveListItem(element)
    }, div:function(element) {
      var singleChild = onlyChild(element);
      if(singleChild && singleChild.nodeName === "table") {
        var attrs = element.attributes;
        S.each(attrs, function(attr) {
          singleChild.setAttribute(attr.name, attr.value)
        });
        if(element.getAttribute("style")) {
          addStyle(singleChild, element.getAttribute("style"))
        }
        var clearFloatDiv = new HtmlParser.Tag("div");
        addStyle(clearFloatDiv, "clear", "both");
        element.appendChild(clearFloatDiv);
        element.setTagName(null)
      }
    }, td:function(element) {
      if(getAncestor(element, "thead")) {
        element.setTagName("th")
      }
    }, ol:flattenList, ul:flattenList, dl:flattenList, font:function(element) {
      if(isListBulletIndicator(element.parentNode)) {
        element.setTagName(null);
        return
      }
      element.filterChildren();
      var styleText = element.getAttribute("style"), parent = element.parentNode;
      if("font" === parent.name) {
        S.each(element.attributes, function(attr) {
          parent.setAttribute(attr.name, attr.value)
        });
        if(styleText) {
          addStyle(parent, styleText)
        }
        element.setTagName(null)
      }else {
        styleText = styleText || "";
        if(element.getAttribute("color")) {
          if(element.getAttribute("color") !== "#000000") {
            styleText += "color:" + element.getAttribute("color") + ";"
          }
          element.removeAttribute("color")
        }
        if(element.getAttribute("face")) {
          styleText += "font-family:" + element.getAttribute("face") + ";";
          element.removeAttribute("face")
        }
        var size = element.getAttribute("size");
        if(size) {
          styleText += "font-size:" + (size > 3 ? "large" : size < 3 ? "small" : "medium") + ";";
          element.removeAttribute("size")
        }
        element.setTagName("span");
        addStyle(element, styleText)
      }
    }, span:function(element) {
      if(isListBulletIndicator(element.parentNode)) {
        return false
      }
      element.filterChildren();
      if(containsNothingButSpaces(element)) {
        element.setTagName(null);
        return null
      }
      if(isListBulletIndicator(element)) {
        var listSymbolNode = firstChild(element, function(node) {
          return node.nodeValue || node.nodeName === "img"
        });
        var listSymbol = listSymbolNode && (listSymbolNode.nodeValue || "l."), listType = listSymbol && listSymbol.match(/^(?:[(]?)([^\s]+?)([.)]?)$/);
        if(listType) {
          var marker = createListBulletMarker(listType, listSymbol);
          var ancestor = getAncestor(element, "span");
          if(ancestor && / mso-hide:\s*all|display:\s*none /.test(ancestor.getAttribute("style"))) {
            marker.setAttribute("ke:ignored", 1)
          }
          return marker
        }
      }
      var styleText = element.getAttribute("style");
      if(styleText) {
        setStyle(element, stylesFilter([[/^line-height$/], [/^font-family$/], [/^font-size$/], [/^color$/], [/^background-color$/]])(styleText, element))
      }
    }, a:function(element) {
      var href;
      if(!(href = element.getAttribute("href")) && element.getAttribute("name")) {
        element.setTagName(null)
      }else {
        if(UA.webkit && href && href.match(/file:\/\/\/[\S]+#/i)) {
          element.setAttribute("href", href.replace(/file:\/\/\/[^#]+/i, ""))
        }
      }
    }, "ke:listbullet":function(element) {
      if(getAncestor(element, /h\d/)) {
        element.setTagName(null)
      }
    }}, attributeNames:[[/^onmouse(:?out|over)/, ""], [/^onload$/, ""], [/(?:v|o):\w+/, ""], [/^lang/, ""]], attributes:{style:stylesFilter([[/^list-style-type$/], [/^margin$|^margin-(?!bottom|top)/, null, function(value, element, name) {
      if(element.nodeName in {p:1, div:1}) {
        var indentStyleName = "margin-left";
        if(name === "margin") {
          value = getStyleComponents(name, value, [indentStyleName])[indentStyleName]
        }else {
          if(name !== indentStyleName) {
            return null
          }
        }
        if(value && !emptyMarginRegex.test(value)) {
          return[indentStyleName, value]
        }
      }
      return null
    }], [/^clear$/], [/^border.*|margin.*|vertical-align|float$/, null, function(value, element) {
      if(element.nodeName === "img") {
        return value
      }
    }], [/^width|height$/, null, function(value, element) {
      if(element.nodeName in {table:1, td:1, th:1, img:1}) {
        return value
      }
    }]], 1), width:function(value, element) {
      if(element.nodeName in dtd.$tableContent) {
        return false
      }
    }, border:function(value, element) {
      if(element.nodeName in dtd.$tableContent) {
        return false
      }
    }, "class":falsyFilter, bgcolor:falsyFilter, valign:function(value, element) {
      addStyle(element, "vertical-align", value);
      return false
    }}, comment:UA.ie ? function(value, node) {
      var imageInfo = value.match(/<img.*?>/), listInfo = value.match(/^\[if !supportLists\]([\s\S]*?)\[endif\]$/);
      if(listInfo) {
        var listSymbol = listInfo[1] || imageInfo && "l.", listType = listSymbol && listSymbol.match(/>(?:[(]?)([^\s]+?)([.)]?)</);
        return createListBulletMarker(listType, listSymbol)
      }
      if(UA.gecko && imageInfo) {
        var img = (new HtmlParser.Parser(imageInfo[0])).parse().childNodes[0], previousComment = node.previousSibling, imgSrcInfo = previousComment && previousComment.toHtml().match(/<v:imagedata[^>]*o:href=[''](.*?)['']/), imgSrc = imgSrcInfo && imgSrcInfo[1];
        if(imgSrc) {
          img.setAttribute("src", imgSrc)
        }
        return img
      }
      return false
    } : falsyFilter})
  })();
  return{toDataFormat:function(html, editor) {
    if(UA.gecko) {
      html = html.replace(/(<!--\[if[^<]*?\])--\>([\S\s]*?)<!--(\[endif\]--\>)/gi, "$1$2$3")
    }
    html = editor.htmlDataProcessor.toDataFormat(html, wordFilter);
    return html
  }}
});
Example #14
0
            (function () {

                function wrapAsComment(element) {
                    var html = HtmlParser.serialize(element);
                    return new HtmlParser.Comment(protectedSourceMarker +
                        encodeURIComponent(html).replace(/--/g,'%2D%2D'));
                }

                // 过滤外边来的 html
                var defaultDataFilterRules = {
                    tagNames: [
                        [/^\?xml.*$/i, ''],
                        [/^.*namespace.*$/i, '']
                    ],
                    attributeNames: [
                        // Event attributes (onXYZ) must not be directly set. They can become
                        // active in the editing area (IE|WebKit).
                        [/^on/, 'ke_on'],
                        [/^lang$/, '']
                    ],
                    tags: {
                        script: wrapAsComment,
                        noscript: wrapAsComment,
                        span: filterInline
                    }
                };

                // 将编辑区生成 html 最终化
                var defaultHTMLFilterRules = {
                    tagNames: [
                        // Remove the "ke:" namespace prefix.
                        [ ( /^ke:/ ), '' ],
                        // Ignore <?xml:namespace> tags.
                        [ ( /^\?xml:namespace$/ ), '' ]
                    ],
                    tags: {
                        $: function (element) {
                            var attributes = element.attributes;

                            if (attributes.length) {
                                // 先把真正属性去掉,后面会把 _ke_saved 后缀去掉的!
                                // Remove duplicated attributes - #3789.
                                var attributeNames = [ 'name', 'href', 'src' ],
                                    savedAttributeName;
                                for (var i = 0; i < attributeNames.length; i++) {
                                    savedAttributeName = '_keSaved_' + attributeNames[ i ];
                                    if (element.getAttribute(savedAttributeName)) {
                                        element.removeAttribute(attributeNames[i]);
                                    }
                                }
                            }

                            return element;
                        },
                        embed: function (element) {
                            var parent = element.parentNode;
                            // If the <embed> is child of a <object>, copy the width
                            // and height attributes from it.
                            if (parent && parent.nodeName === 'object') {
                                var parentWidth = parent.getAttribute('width'),
                                    parentHeight = parent.getAttribute('height');
                                if (parentWidth) {
                                    element.setAttribute('width', parentWidth);
                                }
                                if (parentHeight) {
                                    element.setAttribute('width', parentHeight);
                                }
                            }
                        },

                        // Remove empty link but not empty anchor.(#3829)
                        a: function (element) {
                            if (!(element.childNodes.length) && !(element.attributes.length)) {
                                return false;
                            }
                            return undefined;
                        },
                        span: filterInline,
                        strong: filterInline,
                        em: filterInline,
                        del: filterInline,
                        u: filterInline
                    },
                    attributes: {
                        // 清除空style
                        style: function (v) {
                            if (!S.trim(v)) {
                                return false;
                            }
                            return undefined;
                        }
                    },
                    attributeNames: [
                        // 把保存的作为真正的属性,替换掉原来的
                        // replace(/^_keSaved_/,"")
                        // _keSavedHref -> href
                        [ ( /^_keSaved_/ ), '' ],
                        [ ( /^ke_on/ ), 'on' ],
                        [ ( /^_ke.*/ ), '' ],
                        [ ( /^ke:.*$/ ), '' ],
                        // kissy 相关
                        [ ( /^_ks.*/ ), '' ]
                    ],
                    comment: function (contents) {
                        // If this is a comment for protected source.
                        if (contents.substr(0, protectedSourceMarker.length) === protectedSourceMarker) {
                            contents = S.trim(S.urlDecode(contents.substr(protectedSourceMarker.length)));
                            return HtmlParser.parse(contents).childNodes[0];
                        }
                        return undefined;
                    }
                };
                if (OLD_IE) {
                    // IE outputs style attribute in capital letters. We should convert
                    // them back to lower case.
                    // bug: style='background:url(www.G.cn)' =>  style='background:url(www.g.cn)'
                    // 只对 propertyName 小写
                    defaultHTMLFilterRules.attributes.style = function (value // , element
                        ) {
                        return value.replace(/(^|;)([^:]+)/g, function (match) {
                            return match.toLowerCase();
                        });
                    };
                }

                htmlFilter.addRules(defaultHTMLFilterRules);
                dataFilter.addRules(defaultDataFilterRules);
            })();
Example #15
0
            (function () {
                // Regex to scan for &nbsp; at the end of blocks,
                // which are actually placeholders.
                // Safari transforms the &nbsp; to \xa0. (#4172)
                // html will auto indent by kissy html-parser to add \r \n at the end of line
                var tailNbspRegex = /^[\t\r\n ]*(?:&nbsp;|\xa0)[\t\r\n ]*$/;

                // Return the last non-space child node of the block (#4344).
                function lastNoneSpaceChild(block) {
                    var childNodes = block.childNodes,
                        lastIndex = childNodes.length,
                        last = childNodes[ lastIndex - 1 ];
                    while (last && last.nodeType === 3 && !S.trim(last.nodeValue)) {
                        last = childNodes[ --lastIndex ];
                    }
                    return last;
                }

                function trimFillers(block) {
                    var lastChild = lastNoneSpaceChild(block);
                    if (lastChild) {
                        if (lastChild.nodeType === 1 && lastChild.nodeName === 'br') {
                            block.removeChild(lastChild);
                        }
                        else if (lastChild.nodeType === 3 && tailNbspRegex.test(lastChild.nodeValue)) {
                            block.removeChild(lastChild);
                        }
                    }
                }

                function blockNeedsExtension(block) {
                    var lastChild = lastNoneSpaceChild(block);
                    // empty block <p></p> <td></td>
                    return !lastChild ||
                        // Some of the controls in form needs extension too,
                        // to move cursor at the end of the form. (#4791)
                        block.nodeName === 'form' &&
                        lastChild.nodeName === 'input';
                }

                // 外部 html 到编辑器 html
                function extendBlockForDisplay(block) {
                    trimFillers(block);
                    if (blockNeedsExtension(block)) {
                        // non-ie need br for cursor and height
                        // ie does not need!
                        if (!OLD_IE) {
                            block.appendChild(new HtmlParser.Tag('br'));
                        }
                    }
                }

                // 编辑器 html 到外部 html
                function extendBlockForOutput(block) {
                    trimFillers(block);
                    if (blockNeedsExtension(block)) {
                        // allow browser need!
                        // <p></p> does not has height!
                        block.appendChild(new HtmlParser.Text('\xa0'));
                    }
                }

                // Find out the list of block-like tags that can contain <br>.
                var dtd = Editor.XHTML_DTD;
                var blockLikeTags = S.merge(
                    dtd.$block,
                    dtd.$listItem,
                    dtd.$tableContent), i;
                for (i in blockLikeTags) {
                    if (!( 'br' in dtd[i] )) {
                        delete blockLikeTags[i];
                    }
                }

                // We just avoid filler in <pre> right now.
                // TODO: Support filler for <pre>, line break is also occupy line height.
                delete blockLikeTags.pre;
                var defaultDataBlockFilterRules = { tags: {} };
                var defaultHTMLBlockFilterRules = { tags: {} };

                for (i in blockLikeTags) {
                    defaultDataBlockFilterRules.tags[ i ] = extendBlockForDisplay;
                    defaultHTMLBlockFilterRules.tags[ i ] = extendBlockForOutput;
                }

                dataFilter.addRules(defaultDataBlockFilterRules);
                htmlFilter.addRules(defaultHTMLBlockFilterRules);
            })();
Example #16
0
        init: function (editor) {
            var Node = S.Node,
                UA = S.UA,
                htmlFilter = new HtmlParser.Filter(),
                dataFilter = new HtmlParser.Filter();

            // remove empty inline element
            function filterInline(element) {
                var childNodes = element.childNodes,
                    i,
                    child,
                    allEmpty,
                    l = childNodes.length;
                if (l) {
                    allEmpty = 1;
                    for (i = 0; i < l; i++) {
                        child = childNodes[i];
                        if (!(child.nodeType === S.DOM.NodeType.TEXT_NODE && !child.nodeValue)) {
                            allEmpty = 0;
                            break;
                        }
                    }
                    return allEmpty ? false : undefined;
                } else {
                    return false;
                }
            }

            (function () {

                function wrapAsComment(element) {
                    var html = HtmlParser.serialize(element);
                    return new HtmlParser.Comment(protectedSourceMarker +
                        encodeURIComponent(html).replace(/--/g,'%2D%2D'));
                }

                // 过滤外边来的 html
                var defaultDataFilterRules = {
                    tagNames: [
                        [/^\?xml.*$/i, ''],
                        [/^.*namespace.*$/i, '']
                    ],
                    attributeNames: [
                        // Event attributes (onXYZ) must not be directly set. They can become
                        // active in the editing area (IE|WebKit).
                        [/^on/, 'ke_on'],
                        [/^lang$/, '']
                    ],
                    tags: {
                        script: wrapAsComment,
                        noscript: wrapAsComment,
                        span: filterInline
                    }
                };

                // 将编辑区生成 html 最终化
                var defaultHTMLFilterRules = {
                    tagNames: [
                        // Remove the "ke:" namespace prefix.
                        [ ( /^ke:/ ), '' ],
                        // Ignore <?xml:namespace> tags.
                        [ ( /^\?xml:namespace$/ ), '' ]
                    ],
                    tags: {
                        $: function (element) {
                            var attributes = element.attributes;

                            if (attributes.length) {
                                // 先把真正属性去掉,后面会把 _ke_saved 后缀去掉的!
                                // Remove duplicated attributes - #3789.
                                var attributeNames = [ 'name', 'href', 'src' ],
                                    savedAttributeName;
                                for (var i = 0; i < attributeNames.length; i++) {
                                    savedAttributeName = '_keSaved_' + attributeNames[ i ];
                                    if (element.getAttribute(savedAttributeName)) {
                                        element.removeAttribute(attributeNames[i]);
                                    }
                                }
                            }

                            return element;
                        },
                        embed: function (element) {
                            var parent = element.parentNode;
                            // If the <embed> is child of a <object>, copy the width
                            // and height attributes from it.
                            if (parent && parent.nodeName === 'object') {
                                var parentWidth = parent.getAttribute('width'),
                                    parentHeight = parent.getAttribute('height');
                                if (parentWidth) {
                                    element.setAttribute('width', parentWidth);
                                }
                                if (parentHeight) {
                                    element.setAttribute('width', parentHeight);
                                }
                            }
                        },

                        // Remove empty link but not empty anchor.(#3829)
                        a: function (element) {
                            if (!(element.childNodes.length) && !(element.attributes.length)) {
                                return false;
                            }
                            return undefined;
                        },
                        span: filterInline,
                        strong: filterInline,
                        em: filterInline,
                        del: filterInline,
                        u: filterInline
                    },
                    attributes: {
                        // 清除空style
                        style: function (v) {
                            if (!S.trim(v)) {
                                return false;
                            }
                            return undefined;
                        }
                    },
                    attributeNames: [
                        // 把保存的作为真正的属性,替换掉原来的
                        // replace(/^_keSaved_/,"")
                        // _keSavedHref -> href
                        [ ( /^_keSaved_/ ), '' ],
                        [ ( /^ke_on/ ), 'on' ],
                        [ ( /^_ke.*/ ), '' ],
                        [ ( /^ke:.*$/ ), '' ],
                        // kissy 相关
                        [ ( /^_ks.*/ ), '' ]
                    ],
                    comment: function (contents) {
                        // If this is a comment for protected source.
                        if (contents.substr(0, protectedSourceMarker.length) === protectedSourceMarker) {
                            contents = S.trim(S.urlDecode(contents.substr(protectedSourceMarker.length)));
                            return HtmlParser.parse(contents).childNodes[0];
                        }
                        return undefined;
                    }
                };
                if (OLD_IE) {
                    // IE outputs style attribute in capital letters. We should convert
                    // them back to lower case.
                    // bug: style='background:url(www.G.cn)' =>  style='background:url(www.g.cn)'
                    // 只对 propertyName 小写
                    defaultHTMLFilterRules.attributes.style = function (value // , element
                        ) {
                        return value.replace(/(^|;)([^:]+)/g, function (match) {
                            return match.toLowerCase();
                        });
                    };
                }

                htmlFilter.addRules(defaultHTMLFilterRules);
                dataFilter.addRules(defaultDataFilterRules);
            })();

            /*
             去除firefox代码末尾自动添加的 <br/>
             以及ie下自动添加的 &nbsp;
             以及其他浏览器段落末尾添加的占位符
             */
            (function () {
                // Regex to scan for &nbsp; at the end of blocks,
                // which are actually placeholders.
                // Safari transforms the &nbsp; to \xa0. (#4172)
                // html will auto indent by kissy html-parser to add \r \n at the end of line
                var tailNbspRegex = /^[\t\r\n ]*(?:&nbsp;|\xa0)[\t\r\n ]*$/;

                // Return the last non-space child node of the block (#4344).
                function lastNoneSpaceChild(block) {
                    var childNodes = block.childNodes,
                        lastIndex = childNodes.length,
                        last = childNodes[ lastIndex - 1 ];
                    while (last && last.nodeType === 3 && !S.trim(last.nodeValue)) {
                        last = childNodes[ --lastIndex ];
                    }
                    return last;
                }

                function trimFillers(block) {
                    var lastChild = lastNoneSpaceChild(block);
                    if (lastChild) {
                        if (lastChild.nodeType === 1 && lastChild.nodeName === 'br') {
                            block.removeChild(lastChild);
                        }
                        else if (lastChild.nodeType === 3 && tailNbspRegex.test(lastChild.nodeValue)) {
                            block.removeChild(lastChild);
                        }
                    }
                }

                function blockNeedsExtension(block) {
                    var lastChild = lastNoneSpaceChild(block);
                    // empty block <p></p> <td></td>
                    return !lastChild ||
                        // Some of the controls in form needs extension too,
                        // to move cursor at the end of the form. (#4791)
                        block.nodeName === 'form' &&
                        lastChild.nodeName === 'input';
                }

                // 外部 html 到编辑器 html
                function extendBlockForDisplay(block) {
                    trimFillers(block);
                    if (blockNeedsExtension(block)) {
                        // non-ie need br for cursor and height
                        // ie does not need!
                        if (!OLD_IE) {
                            block.appendChild(new HtmlParser.Tag('br'));
                        }
                    }
                }

                // 编辑器 html 到外部 html
                function extendBlockForOutput(block) {
                    trimFillers(block);
                    if (blockNeedsExtension(block)) {
                        // allow browser need!
                        // <p></p> does not has height!
                        block.appendChild(new HtmlParser.Text('\xa0'));
                    }
                }

                // Find out the list of block-like tags that can contain <br>.
                var dtd = Editor.XHTML_DTD;
                var blockLikeTags = S.merge(
                    dtd.$block,
                    dtd.$listItem,
                    dtd.$tableContent), i;
                for (i in blockLikeTags) {
                    if (!( 'br' in dtd[i] )) {
                        delete blockLikeTags[i];
                    }
                }

                // We just avoid filler in <pre> right now.
                // TODO: Support filler for <pre>, line break is also occupy line height.
                delete blockLikeTags.pre;
                var defaultDataBlockFilterRules = { tags: {} };
                var defaultHTMLBlockFilterRules = { tags: {} };

                for (i in blockLikeTags) {
                    defaultDataBlockFilterRules.tags[ i ] = extendBlockForDisplay;
                    defaultHTMLBlockFilterRules.tags[ i ] = extendBlockForOutput;
                }

                dataFilter.addRules(defaultDataBlockFilterRules);
                htmlFilter.addRules(defaultHTMLBlockFilterRules);
            })();


            // html-parser fragment 中的 entities 处理
            // el.innerHTML="&nbsp;"
            // http://yiminghe.javaeye.com/blog/788929
            htmlFilter.addRules({
                text: function (text) {
                    return text
                        //.replace(/&nbsp;/g, "\xa0")
                        .replace(/\xa0/g, '&nbsp;');
                }
            });


            var protectElementRegex = /<(a|area|img|input)\b([^>]*)>/gi,
                protectAttributeRegex = /\b(href|src|name)\s*=\s*(?:(?:"[^"]*")|(?:'[^']*')|(?:[^ "'>]+))/gi;
            // ie 6-7 会将 关于 url 的 content value 替换为 dom value
            // #a -> http://xxx/#a
            // ../x.html -> http://xx/x.html
            function protectAttributes(html) {
                return html.replace(protectElementRegex, function (element, tag, attributes) {
                    return '<' + tag + attributes.replace(protectAttributeRegex, function (fullAttr, attrName) {
                        // We should not rewrite the existed protected attributes,
                        // e.g. clipboard content from editor. (#5218)
                        if (attributes.indexOf('_keSaved_' + attrName) === -1) {
                            return ' _keSaved_' + fullAttr + ' ' + fullAttr;
                        }
                        return fullAttr;
                    }) + '>';
                });
            }

            var protectedSourceMarker = '{ke_protected}';

            var protectElementsRegex = /(?:<textarea[^>]*>[\s\S]*<\/textarea>)|(?:<style[^>]*>[\s\S]*<\/style>)|(?:<script[^>]*>[\s\S]*<\/script>)|(?:<(:?link|meta|base)[^>]*>)/gi,
                encodedElementsRegex = /<ke:encoded>([^<]*)<\/ke:encoded>/gi;

            var protectElementNamesRegex = /(<\/?)((?:object|embed|param|html|body|head|title|noscript)[^>]*>)/gi,
                unprotectElementNamesRegex = /(<\/?)ke:((?:object|embed|param|html|body|head|title|noscript)[^>]*>)/gi;

            var protectSelfClosingRegex = /<ke:(param|embed)([^>]*?)\/?>(?!\s*<\/ke:\1)/gi;

            function protectSelfClosingElements(html) {
                return html.replace(protectSelfClosingRegex, '<ke:$1$2></ke:$1>');
            }

            function protectElements(html) {
                return html.replace(protectElementsRegex, function (match) {
                    return '<ke:encoded>' + encodeURIComponent(match) + '</ke:encoded>';
                });
            }

            function unprotectElements(html) {
                return html.replace(encodedElementsRegex, function (match, encoded) {
                    return S.urlDecode(encoded);
                });
            }

            function protectElementsNames(html) {
                return html.replace(protectElementNamesRegex, '$1ke:$2');
            }

            function unprotectElementNames(html) {
                return html.replace(unprotectElementNamesRegex, '$1$2');
            }

            editor.htmlDataProcessor = {
                dataFilter: dataFilter,
                htmlFilter: htmlFilter,
                // 编辑器 html 到外部 html
                // fixForBody, <body>t</body> => <body><p>t</p></body>
                toHtml: function (html) {
                    if (UA.webkit) {
                        // remove filling char for webkit
                        html = html.replace(/\u200b/g, '');
                    }
                    // fixForBody = fixForBody || 'p';
                    // Now use our parser to make further fixes to the structure, as
                    // well as apply the filter.
                    //使用 htmlWriter 界面美观,加入额外文字节点\n,\t空白等
                    var writer = new HtmlParser.BeautifyWriter(),
                        n = new HtmlParser.Parser(html).parse();
                    n.writeHtml(writer, htmlFilter);
                    html = writer.getHtml();
                    return html;
                },
                // 外部html进入编辑器
                toDataFormat: function (html, _dataFilter) {
                    //可以传 wordFilter 或 dataFilter
                    _dataFilter = _dataFilter || dataFilter;

                    // Protect elements than can't be set inside a DIV. E.g. IE removes
                    // style tags from innerHTML. (#3710)
                    // and protect textarea, in case textarea has un-encoded html
                    // protect script too, in case script has un-encoded html
                    // https://github.com/kissyteam/kissy/issues/420
                    html = protectElements(html);

                    html = protectAttributes(html);

                    // Certain elements has problem to go through Dom operation, protect
                    // them by prefixing 'ke' namespace. (#3591)
                    html = protectElementsNames(html);

                    // All none-IE browsers ignore self-closed custom elements,
                    // protecting them into open-close. (#3591)
                    html = protectSelfClosingElements(html);

                    // 标签不合法可能 parser 出错,这里先用浏览器帮我们建立棵合法的 dom 树的 html
                    // Call the browser to help us fixing a possibly invalid HTML
                    // structure.
                    var div = new Node('<div>');
                    // Add fake character to workaround IE comments bug. (#3801)
                    div.html('a' + html);
                    html = div.html().substr(1);

                    // Unprotect "some" of the protected elements at this point.
                    html = unprotectElementNames(html);

                    html = unprotectElements(html);

                    // fixForBody = fixForBody || 'p';
                    // bug:qc #3710:使用 basicWriter ,去除无用的文字节点,标签间连续\n空白等

                    var writer = new HtmlParser.BasicWriter(),
                        n = new HtmlParser.Parser(html).parse();

                    n.writeHtml(writer, _dataFilter);

                    html = writer.getHtml();

                    return html;
                },
                /*
                 最精简html传送到server
                 */
                toServer: function (html) {
                    var writer = new HtmlParser.MinifyWriter(),
                        n = new HtmlParser.Parser(html).parse();
                    n.writeHtml(writer, htmlFilter);
                    return writer.getHtml();
                }
            };
        }