/**
 * @ignore
 * parse html string into Nodes
 * @author yiminghe@gmail.com
 */
KISSY.add("html-parser/lexer/lexer", function (S, Cursor, Page, TextNode, CData, Utils, Attribute, TagNode, CommentNode) {
    /**
     * Lexer for html parser
     * @param {String} text html content
     * @param {Object} cfg config object
     * @class KISSY.HtmlParser.Lexer
     */
    function Lexer(text, cfg) {
        var self = this;
        self.page = new Page(text);
        self.cursor = new Cursor();
        self.nodeFactory = this;
        this.cfg = cfg || {};
    }

    Lexer.prototype = {
        constructor: Lexer,

        setPosition: function (p) {
            this.cursor.position = p;
        },

        getPosition: function () {
            return this.cursor.position;
        },

        /**
         * get next node parsed from content
         * @param quoteSmart
         * @returns {KISSY.HtmlParse.Node}
         */
        nextNode: function (quoteSmart) {
            var start ,
                ch,
                ret,
                cursor = this.cursor,
                page = this.page;

            start = cursor.position;
            ch = page.getChar(cursor);

            switch (ch) {
                case -1:
                    ret = null;
                    break;
                case '<':
                    ch = page.getChar(cursor);
                    if (ch == -1) {
                        ret = this.makeString(start, cursor.position);
                    } else if (ch == '/' || Utils.isLetter(ch)) {
                        page.ungetChar(cursor);
                        ret = this.parseTag(start);
                    } else if ('!' == ch || '?' == ch) {
                        ch = page.getChar(cursor);
                        if (ch == -1) {
                            ret = this.makeString(start, cursor.position);
                        } else {
                            if ('>' == ch) {
                                ret = this.makeComment(start, cursor.position);
                            } else {
                                page.ungetChar(cursor); // remark/tag need this char
                                if ('-' == ch) {
                                    ret = this.parseComment(start, quoteSmart);
                                } else {
                                    // <!DOCTYPE html>
                                    // <?xml:namespace>
                                    page.ungetChar(cursor); // tag needs prior one too
                                    ret = this.parseTag(start);
                                }
                            }
                        }
                    } else {
                        page.ungetChar(cursor); // see bug #1547354 <<tag> parsed as text
                        ret = this.parseString(start, quoteSmart);
                    }
                    break;
                default:
                    page.ungetChar(cursor); // string needs to see leading fore slash
                    ret = this.parseString(start, quoteSmart);
                    break;
            }

            return (ret);
        },

        makeComment: function (start, end) {
            var length, ret;

            length = end - start;
            if (0 != length) {   // return tag based on second character, '/', '%', Letter (ch), '!'
                if (2 > length) {
                    // this is an error
                    return (this.makeString(start, end));
                }
                ret = this.nodeFactory.createCommentNode(this.page, start, end);
            }
            else
                ret = null;

            return (ret);
        },

        makeString: function (start, end) {
            var ret = null, l;
            l = end - start;
            if (l > 0) {
                ret = this.nodeFactory.createStringNode(this.page, start, end);
            }
            return ret;
        },

       // different from text node : space does matter
        makeCData: function (start, end) {
            var ret = null, l;
            l = end - start;
            if (l > 0) {
                ret = this.nodeFactory.createCDataNode(this.page, start, end);
            }
            return ret;
        },

        makeTag: function (start, end, attributes) {
            var length,
                ret;
            length = end - start;
            if (0 != length) {   // return tag based on second character, '/', '%', Letter (ch), '!'
                if (2 > length) {
                    // this is an error
                    return (this.makeString(start, end));
                }
                ret = this.nodeFactory.createTagNode(this.page, start, end, attributes);
            }
            else {
                ret = null;
            }
            return ret;
        },

        createTagNode: function (page, start, end, attributes) {
            return new TagNode(page, start, end, attributes);
        },

        createStringNode: function (page, start, end) {
            return new TextNode(page, start, end);
        },

        createCDataNode: function (page, start, end) {
            return new CData(page, start, end);
        },

        createCommentNode: function (page, start, end) {
            return new CommentNode(page, start, end);
        },

        /*
          parse tag node according to fsm
          state 0 - outside of any attribute
          state 1 - within attribute name
          state 2 - equals hit
          state 3 - within naked attribute value.
          state 4 - within single quoted attribute value
          state 5 - within double quoted attribute value
          state 6 - whitespaces after attribute name could lead to state 2 (=)or state 0
         */
        parseTag: function (start) {
            var done,
                bookmarks = [],
                attributes = [],
                ch,
                cfg = this.cfg,
                strict = cfg.strict,
                checkError = S.noop,
                page = this.page,
                state = 0,
                cursor = this.cursor;
            if (strict) {
                checkError = function () {
                    if (strict && ch === -1 && attributes.length) {
                        throw new Error(attributes[0].name + ' syntax error at row ' +
                            (page.row(cursor) + 1) + ' , col ' + (page.col(cursor) + 1));
                    }
                };
            }
            /*
              record state position

              states 0 -> bookmarks[1]
              states 1 -> bookmarks[2]
             */
            bookmarks[0] = cursor.position;
            while (!done) {
                // next possible end position for next state
                bookmarks[state + 1] = cursor.position;
                ch = page.getChar(cursor);
                // fsm go!
                switch (state) {
                    case 0:
                        // outside of any attribute
                        if (ch == -1 || '>' == ch || '<' == ch) {
                            if ('<' == ch) {
                                // don't consume the opening angle
                                page.ungetChar(cursor);
                                bookmarks[state + 1] = cursor.position;
                            }
                            done = true;
                        } else {
                            // tag name as a attribute
                            if (!attributes.length) {
                                // </div>
                                if (ch == "/" || Utils.isValidAttributeNameStartChar(ch)) {
                                    state = 1;
                                }
                            }
                            // <img />
                            else if (ch == "/" || Utils.isValidAttributeNameStartChar(ch)) {
                                state = 1;
                            }
                        }
                        break;

                    case 1:
                        // within attribute name
                        if ((-1 == ch) || ('>' == ch) || ('<' == ch)) {
                            if ('<' == ch) {
                                // don't consume the opening angle
                                page.ungetChar(cursor);
                                bookmarks[state + 1] = cursor.getPosition;
                            }
                            this.standalone(attributes, bookmarks);
                            done = true;
                        }
                        else if (Utils.isWhitespace(ch)) {
                            // whitespaces might be followed by next attribute or an equal sign
                            // see Bug #891058 Bug in lexer.
                            bookmarks[6] = bookmarks[2]; // setting the bookmark[0] is done in state 6 if applicable
                            state = 6;
                        }
                        else if ('=' == ch)
                            state = 2;
                        break;

                    case 2: // equals hit
                        if ((-1 == ch) || ('>' == ch)) {
                            this.standalone(attributes, bookmarks);
                            done = true;
                        }
                        else if ('\'' == ch) {
                            state = 4;
                            bookmarks[4] = bookmarks[3];
                        }
                        else if ('"' == ch) {
                            state = 5;
                            bookmarks[5] = bookmarks[3];
                        }
                        else if (Utils.isWhitespace(ch)) {
                            // collect white spaces after "=" into the assignment string;
                            // do nothing
                            // see Bug #891058 Bug in lexer.
                        }
                        else
                            state = 3;
                        break;
                    case 3: // within naked attribute value
                        if ((-1 == ch) || ('>' == ch)) {
                            this.naked(attributes, bookmarks);
                            done = true;
                        }
                        else if (Utils.isWhitespace(ch)) {
                            this.naked(attributes, bookmarks);
                            bookmarks[0] = bookmarks[4];
                            state = 0;
                        }
                        break;
                    case 4: // within single quoted attribute value
                        if (-1 == ch) {
                            this.single_quote(attributes, bookmarks);
                            done = true; // complain?
                        }
                        else if ('\'' == ch) {
                            this.single_quote(attributes, bookmarks);
                            bookmarks[0] = bookmarks[5] + 1;
                            state = 0;
                        }
                        break;
                    case 5: // within double quoted attribute value
                        if (-1 == ch) {
                            this.double_quote(attributes, bookmarks);
                            done = true; // complain?
                        }
                        else if ('"' == ch) {
                            this.double_quote(attributes, bookmarks);
                            bookmarks[0] = bookmarks[6] + 1;
                            state = 0;
                        }
                        break;
                    // patch for lexer state correction by
                    // Gernot Fricke
                    // See Bug # 891058 Bug in lexer.
                    case 6: // undecided for state 0 or 2
                        // we have read white spaces after an attribute name
                        if (-1 == ch) {
                            // same as last else clause
                            this.standalone(attributes, bookmarks);
                            bookmarks[0] = bookmarks[6];
                            page.ungetChar(cursor);
                            state = 0;
                        }
                        else if (Utils.isWhitespace(ch)) {
                            // proceed
                        }
                        else if ('=' == ch) // yepp. the white spaces belonged to the equal.
                        {
                            bookmarks[2] = bookmarks[6];
                            bookmarks[3] = bookmarks[7];
                            state = 2;
                        }
                        else {
                            // white spaces were not ended by equal
                            // meaning the attribute was a stand alone attribute
                            // now: create the stand alone attribute and rewind
                            // the cursor to the end of the white spaces
                            // and restart scanning as whitespace attribute.
                            this.standalone(attributes, bookmarks);
                            bookmarks[0] = bookmarks[6];
                            page.ungetChar(cursor);
                            state = 0;
                        }
                        break;
                    default:
                        throw new Error("how ** did we get in state " + state);
                }

                checkError();
            }

            return this.makeTag(start, cursor.position, attributes);
        },

        /*
          Parse a comment.
          state 0 - prior to the first open delimiter (first dash)
          state 1 - prior to the second open delimiter (second dash)
          state 2 - prior to the first closing delimiter (first dash)
          state 3 - prior to the second closing delimiter (second dash)
          state 4 - prior to the terminating
         */
        parseComment: function (start, quoteSmart) {
            var done,
                ch,
                page = this.page,
                cursor = this.cursor,
                state;

            done = false;
            state = 0;
            while (!done) {
                ch = page.getChar(cursor);
                if (-1 == ch) {
                    done = true;
                }
                else {
                    switch (state) {
                        case 0: // prior to the first open delimiter
                            if ('>' == ch)
                                done = true;
                            if ('-' == ch)
                                state = 1;
                            else
                                return this.parseString(start, quoteSmart);
                            break;
                        case 1: // prior to the second open delimiter
                            if ('-' == ch) {
                                // handle <!--> because netscape does
                                ch = page.getChar(cursor);
                                if (-1 == ch) {
                                    done = true;
                                }
                                else if ('>' == ch) {
                                    done = true;
                                }
                                else {
                                    page.ungetChar(cursor);
                                    state = 2;
                                }
                            }
                            else {
                                return this.parseString(start, quoteSmart);
                            }
                            break;
                        case 2: // prior to the first closing delimiter
                            if ('-' == ch) {
                                state = 3;
                            }
                            else if (-1 == ch) {
                                return this.parseString(start, quoteSmart); // no terminator
                            }
                            break;
                        case 3: // prior to the second closing delimiter
                            if ('-' == ch) {
                                state = 4;
                            }
                            else {
                                state = 2;
                            }
                            break;
                        case 4: // prior to the terminating >
                            if ('>' == ch) {
                                done = true;
                            }
                            else if (Utils.isWhitespace(ch)) {
                                // stay in state 4
                            }
                            else {
                                // bug #1345049 HtmlParser should not terminate a comment with --->
                                // should maybe issue a warning mentioning STRICT_REMARKS
                                state = 2;
                            }
                            break;
                        default:
                            throw new Error("how ** did we get in state " + state);
                    }
                }
            }

            return this.makeComment(start, cursor.position);
        },

        /**
         * parse a string node
         * @private
         * @param start
         * @param quoteSmart strings ignore quoted contents
         */
        parseString: function (start, quoteSmart) {
            var done = 0,
                ch,
                page = this.page,
                cursor = this.cursor,
                quote = 0;

            while (!done) {
                ch = page.getChar(cursor);
                if (-1 == ch) {
                    done = 1;
                }
                else if (quoteSmart && (0 == quote)
                    && (('\'' == ch) || ('"' == ch))) {
                    quote = ch; // enter quoted state
                }
                // patch from Gernot Fricke to handle escaped closing quote
                else if (quoteSmart && (0 != quote) && ('\\' == ch)) {
                    ch = page.getChar(cursor); // try to consume escape
                    if ((-1 != ch)
                        && ('\\' != ch) // escaped backslash
                        && (ch != quote)) // escaped quote character
                    {
                        // ( reflects ["] or [']  whichever opened the quotation)
                        page.ungetChar(cursor); // unconsume char if char not an escape
                    }
                }
                else if (quoteSmart && (ch == quote)) {
                    quote = 0; // exit quoted state
                }
                else if (quoteSmart && (0 == quote) && (ch == '/')) {
                    // handle multiline and double slash comments (with a quote)
                    // in script like:
                    // I can't handle single quotations.
                    ch = page.getChar(cursor);
                    if (-1 == ch) {
                        done = 1;
                    }
                    else if ('/' == ch) {
                        do {
                            ch = page.getChar(cursor);
                        } while ((-1 != ch) && ('\n' != ch));
                    }
                    else if ('*' == ch) {
                        do
                        {
                            do {
                                ch = page.getChar(cursor);
                            } while ((-1 != ch) && ('*' != ch));
                            ch = page.getChar(cursor);
                            if (ch == '*') {
                                page.ungetChar(cursor);
                            }
                        }
                        while ((-1 != ch) && ('/' != ch));
                    }
                    else {
                        page.ungetChar(cursor);
                    }
                }
                else if ((0 == quote) && ('<' == ch)) {
                    ch = page.getChar(cursor);
                    if (-1 == ch) {
                        done = 1;
                    }
                    // the order of these tests might be optimized for speed:
                    else if ('/' == ch ||
                        Utils.isLetter(ch) ||
                        '!' == ch ||
                        // <?xml:namespace
                        '?' == ch) {
                        done = 1;
                        page.ungetChar(cursor);
                        page.ungetChar(cursor);
                    }
                    else {
                        // it's not a tag, so keep going, but check for quotes
                        page.ungetChar(cursor);
                    }
                }
            }

            return this.makeString(start, cursor.position);

        },

        /**
         * parse cdata such as code in script
         * @private
         * @param quoteSmart if set true end tag in quote
         * (but not in comment mode) does not end current tag ( <script>x="<a>taobao</a>"</script> )
         * @param tagName
         */
        parseCDATA: function (quoteSmart, tagName) {
            var start,
                state,
                done,
                quote,
                ch,
                end,
                comment,
                mCursor = this.cursor,
                mPage = this.page;

            start = mCursor.position;
            state = 0;
            done = false;
            quote = '';
            comment = false;

            while (!done) {
                ch = mPage.getChar(mCursor);
                switch (state) {
                    case 0: // prior to ETAGO
                        switch (ch) {
                            case -1:
                                done = true;
                                break;
                            case '\'':
                                if (quoteSmart && !comment) {
                                    if ('' == quote) {
                                        quote = '\''; // enter quoted state
                                    } else if ('\'' == quote) {
                                        quote = ''; // exit quoted state
                                    }
                                }
                                break;
                            case '"':
                                if (quoteSmart && !comment) {
                                    if ('' == quote) {
                                        quote = '"'; // enter quoted state
                                    } else if ('"' == quote) {
                                        quote = ''; // exit quoted state
                                    }
                                }
                                break;
                            case '\\':
                                if (quoteSmart) {
                                    if ('' != quote) {
                                        ch = mPage.getChar(mCursor); // try to consume escaped character
                                        if (-1 == ch) {
                                            done = true;
                                        } else if ((ch != '\\') && (ch != quote)) {
                                            // unconsume char if character was not an escapable char.
                                            mPage.ungetChar(mCursor);
                                        }
                                    }
                                }
                                break;
                            case '/':
                                if (quoteSmart) {
                                    if ('' == quote) {
                                        // handle multiline and double slash comments (with a quote)
                                        ch = mPage.getChar(mCursor);
                                        if (-1 == ch) {
                                            done = true;
                                        } else if ('/' == ch) {
                                            comment = true;
                                        } else if ('*' == ch) {
                                            do {
                                                do
                                                    ch = mPage.getChar(mCursor);
                                                while ((-1 != ch) && ('*' != ch));
                                                ch = mPage.getChar(mCursor);
                                                if (ch == '*') {
                                                    mPage.ungetChar(mCursor);
                                                }
                                            } while ((-1 != ch) && ('/' != ch));
                                        }
                                        else {
                                            mPage.ungetChar(mCursor);
                                        }
                                    }
                                }
                                break;
                            case '\n':
                                comment = false;
                                break;
                            case '<':
                                if (quoteSmart) {
                                    if ('' == quote) {
                                        state = 1;
                                    }
                                }
                                else {
                                    state = 1;
                                }
                                break;
                            default:
                                break;
                        }
                        break;
                    case 1: // <
                        switch (ch) {
                            case -1:
                                done = true;
                                break;
                            case '/':
                                // tagName = "textarea"
                                // <textarea><div></div></textarea>
                                /*
                                  8.1.2.6 Restrictions on the contents of raw text and RCDATA elements

                                    The text in raw text and RCDATA elements must not contain any occurrences
                                    of the string "</" (U+003C LESS-THAN SIGN, U+002F SOLIDUS)
                                    followed by characters that case-insensitively match the tag name of the element
                                    followed by one of U+0009 CHARACTER TABULATION (tab),
                                    U+000A LINE FEED (LF), U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR),
                                    U+0020 SPACE, U+003E GREATER-THAN SIGN (>), or U+002F SOLIDUS (/).
                                 */
                                if (!tagName || (mPage.getText(mCursor.position,
                                    mCursor.position + tagName.length) === tagName && !(mPage.getText(mCursor.position + tagName.length,
                                    mCursor.position + tagName.length + 1).match(/\w/))
                                    )) {
                                    state = 2;
                                } else {
                                    state = 0;
                                }

                                break;
                            case '!':
                                ch = mPage.getChar(mCursor);
                                if (-1 == ch) {
                                    done = true;
                                } else if ('-' == ch) {
                                    ch = mPage.getChar(mCursor);
                                    if (-1 == ch) {
                                        done = true;
                                    } else if ('-' == ch) {
                                        state = 3;
                                    } else {
                                        state = 0;
                                    }
                                }
                                else
                                    state = 0;
                                break;
                            default:
                                state = 0;
                                break;
                        }
                        break;
                    case 2: // </
                        comment = false;
                        if (-1 == ch) {
                            done = true;
                        } else if (Utils.isLetter(ch)) {
                            // 严格 parser 遇到 </x lexer 立即结束
                            // 浏览器实现更复杂点,可能 lexer 和 parser 混合了
                            done = true;
                            // back up to the start of ETAGO
                            mPage.ungetChar(mCursor);
                            mPage.ungetChar(mCursor);
                            mPage.ungetChar(mCursor);
                        } else {
                            state = 0;
                        }
                        break;
                    case 3: // <!
                        comment = false;
                        if (-1 == ch) {
                            done = true;
                        } else if ('-' == ch) {
                            ch = mPage.getChar(mCursor);
                            if (-1 == ch) {
                                done = true;
                            } else if ('-' == ch) {
                                ch = mPage.getChar(mCursor);
                                if (-1 == ch) {
                                    done = true;
                                } else if ('>' == ch) {
                                    // <!----> <!-->
                                    state = 0;
                                } else {
                                    // retreat twice , still begin to check -->
                                    mPage.ungetChar(mCursor);
                                    mPage.ungetChar(mCursor);
                                }
                            } else {
                                // retreat once , still begin to check
                                mPage.ungetChar(mCursor);
                            }
                        } else {
                            // eat comment
                        }
                        break;
                    default:
                        throw new Error("unexpected " + state);
                }
            }
            end = mCursor.position;

            return this.makeCData(start, end);
        },

        /**
         * Generate an single quoted attribute
         * @param attributes The list so far.
         * @param bookmarks The array of positions.
         * @private
         */
        single_quote: function (attributes, bookmarks) {
            var page = this.page;
            attributes.push(new Attribute(page.getText(bookmarks[1], bookmarks[2]), "=", page.getText(bookmarks[4] + 1, bookmarks[5]), "'"));
        },

        /**
         * Generate an double quoted attribute
         * @param attributes The list so far.
         * @param bookmarks The array of positions.
         * @private
         */
        double_quote: function (attributes, bookmarks) {
            var page = this.page;
            attributes.push(new Attribute(page.getText(bookmarks[1], bookmarks[2]), "=", page.getText(bookmarks[5] + 1, bookmarks[6]), '"'));
        },


        /**
         * Generate a standalone attribute
         * @private
         * @param attributes The list so far.
         * @param bookmarks The array of positions.
         */
        standalone: function (attributes, bookmarks) {
            var page = this.page;
            attributes.push(new Attribute(page.getText(bookmarks[1], bookmarks[2])));
        },

        /**
         * Generate an unquoted attribute
         * @private
         * @param attributes The list so far.
         * @param bookmarks The array of positions.
         */
        naked: function (attributes, bookmarks) {
            var page = this.page;
            attributes.push(new Attribute(page.getText(bookmarks[1], bookmarks[2]), "=", page.getText(bookmarks[3], bookmarks[4])));
        }
    };

    return Lexer;
}, {
    requires: [
        './cursor',
        './page',
        '../nodes/text',
        '../nodes/cdata',
        '../utils',
        '../nodes/attribute',
        '../nodes/tag',
        '../nodes/comment'
    ]});