package ro.sync.lexer.js;
import ro.sync.lexer.AbstractLexer;

@SuppressWarnings("unused")
%%


%public 
%class JSLexer
%extends AbstractLexer
%unicode
%char
%type ro.sync.lexer.Symbol
 
%ignorecase
%scanerror ro.sync.lexer.LexerException

%{
    // XML comment markup
    private static final byte SYM_XML_COMMENT_MARKUP = JSTokens.MULTI_LINE_COMMENT;  
    // CDATA markup
    private static final byte SYM_XML_CDATA_MARKUP = JSTokens.MULTI_LINE_COMMENT;  
    
    // Operators and stuff.
    private static final byte SYM_SLASH = JSTokens.OPERATOR;
    private static final byte SYM_BACKSLASH = JSTokens.OPERATOR;
    private static final byte SYM_SQUARE_BRACKET = JSTokens.SQUARE_BRACKET;
    private static final byte SYM_CURLY_BRACKET = JSTokens.CURLY_BRACKET;
    private static final byte SYM_BRACKET = JSTokens.BRACKET;
    private static final byte SYM_QUESTION = JSTokens.OPERATOR;
    private static final byte SYM_COLON = JSTokens.OPERATOR;
    private static final byte SYM_SEMICOLON = JSTokens.SEMICOLON;
    private static final byte SYM_ASTERISK = JSTokens.OPERATOR;
    private static final byte SYM_AT = JSTokens.OPERATOR;
    private static final byte SYM_AMPERSAND = JSTokens.OPERATOR;
    private static final byte SYM_PLUS = JSTokens.OPERATOR;
    private static final byte SYM_MINUS = JSTokens.OPERATOR;
    private static final byte SYM_DOT = JSTokens.OPERATOR;
    private static final byte SYM_COMMA = JSTokens.COMMA;
    private static final byte SYM_LT = JSTokens.OPERATOR;
    private static final byte SYM_GT = JSTokens.OPERATOR;
    private static final byte SYM_LTEQ = JSTokens.OPERATOR;
    private static final byte SYM_GTEQ = JSTokens.OPERATOR;
    private static final byte SYM_EQ = JSTokens.OPERATOR;
    private static final byte SYM_EQ2 = JSTokens.OPERATOR;
    private static final byte SYM_NOTEQ = JSTokens.OPERATOR;
    private static final byte SYM_EQ3 = JSTokens.OPERATOR;
    private static final byte SYM_NOTEQ2 = JSTokens.OPERATOR;
    private static final byte SYM_PERCENT = JSTokens.OPERATOR;
    private static final byte SYM_PLUS2 = JSTokens.OPERATOR;
    private static final byte SYM_MINUS2 = JSTokens.OPERATOR;
    private static final byte SYM_LSHIFT = JSTokens.OPERATOR;
    private static final byte SYM_RSHIFT = JSTokens.OPERATOR;
    private static final byte SYM_GT3 = JSTokens.OPERATOR;
    private static final byte SYM_PIPE = JSTokens.OPERATOR;
    private static final byte SYM_CAROT = JSTokens.OPERATOR;
    private static final byte SYM_EXCLAMATION = JSTokens.OPERATOR;
    private static final byte SYM_TILDE = JSTokens.OPERATOR;
    private static final byte SYM_AMPERSAND2 = JSTokens.OPERATOR;
    private static final byte SYM_PIPE2 = JSTokens.OPERATOR;
    private static final byte SYM_PLUSEQ = JSTokens.OPERATOR;
    private static final byte SYM_MINUSEQ = JSTokens.OPERATOR;
    private static final byte SYM_TIMESEQ = JSTokens.OPERATOR;
    private static final byte SYM_PERCENTEQ = JSTokens.OPERATOR;
    private static final byte SYM_LSHIFTEQ = JSTokens.OPERATOR;
    private static final byte SYM_RSHIFTEQ = JSTokens.OPERATOR;
    private static final byte SYM_GT3EQ = JSTokens.OPERATOR;
    private static final byte SYM_AMPERSANDEQ = JSTokens.OPERATOR;
    private static final byte SYM_PIPEEQ = JSTokens.OPERATOR;
    private static final byte SYM_CAROTEQ = JSTokens.OPERATOR;
    private static final byte SYM_DIVIDEEQ = JSTokens.OPERATOR;
    // Most keywords.
    private static final byte SYM_KEYWORD = JSTokens.KEYWORD;
    private static final byte SYM_RESERVED_WORD = JSTokens.RESERVED_WORD;
    private static final byte SYM_RESERVED_LITERAL = JSTokens.RESERVED_LITERAL;
    // Strings and regular expressions.
    private static final byte SYM_STRING_DQ = JSTokens.STRING_DQ;
    private static final byte SYM_STRING_SQ = JSTokens.STRING_SQ;
    private static final byte SYM_REG_EXP = JSTokens.REG_EXP;
    // XML Entity
    private static final byte SYM_XMLENTITY = JSTokens.XML_ENTITY;
    // Comments
    private static final byte SYM_MULTI_LINE_COMMENT = JSTokens.MULTI_LINE_COMMENT;
    private static final byte SYM_SINGLE_LINE_COMMENT = JSTokens.SINGLE_LINE_COMMENT;
    // Other text.
    private static final byte SYM_TEXT = JSTokens.TEXT;
    // Identifiers: properties, variable names, functions.
    private static final byte SYM_IDENTIFIER = JSTokens.IDENTIFIER;
    // Numbers
    private static final byte SYM_NUMBER = JSTokens.NUMBER;
    /**
     * If <code>true</code>, the XML comments will be emitted as comments entirely.
     */
    private boolean emitXMLComments = false;
    
    private String lastYYText = "";
    
    /**
     * Create an empty lexer, yyreset will be called later to reset and assign
     * the reader
     */
    public JSLexer(boolean emitXMLComments) {
	  super();
	  this.emitXMLComments = emitXMLComments;
    }
    
    public String getName() {
      return JS_LEXER;
    }
    
    /**
     * Checks the last emitted token type and decides if the current token 
     * can be a regular expression can be the current token or not. 
     *
     * @return <code>true</code> if a regular expression is allowed here.
     */
    private boolean areRegularExpressionsEnabled() {
    	boolean ret = true;    	
    	switch (lastSymbolType) {
    	    case JSTokens.IDENTIFIER:
			case JSTokens.KEYWORD:
			case JSTokens.RESERVED_LITERAL:
			case JSTokens.RESERVED_WORD:
			case JSTokens.NUMBER:
			case JSTokens.STRING_SQ:
			case JSTokens.STRING_DQ:
			     ret = false;
			break;
    	    case JSTokens.BRACKET:
    	         ret = !")".equals(lastSymbolString);
    	    break;
		default:
			break;
		}
    	return ret;
    }

    /**
     * Stores the last symbol type for further checking - for example to decide 
	 * if the regular expressions are active or not. 	
     */
    private byte lastSymbolType = -1;
    
    /**
     * Stores the last symbol string for further chechink - for example to decide
     * if the regular expressions are active or not.
     */
    private String lastSymbolString = "";
    
	/**
	 * Emits a symbol using the <code>yylength</code> and the type.
	 * Stores the last symbol type for further checking - for example to decide 
	 * if the regular expressions are active or not. 
	 * 
	 * @param type
	 *          The type of the symbol.
	 * @return The created symbol.
	 */
	@Override
	protected ro.sync.lexer.Symbol symbol(byte type) {
	  if (type != JSTokens.TEXT){
		this.lastSymbolType = type;
	  }
	  if (type == JSTokens.BRACKET) {
	    this.lastSymbolString = yytext();
	  }
	  return super.symbol(type);
	}

    /**
	 * Our reset method. Resets the input buffers, the state to YYINITIAL - what
	 * yyreset would normally do, but we also reset our own buffers.
	 */
	public void reset(java.io.Reader reader) {
	  super.reset(reader);
	  lastSymbolType = -1;
	}
	
%}

%xstate SQ_STRING, DQ_STRING, MULTI_LINE_COMMENT, SINGLE_LINE_COMMENT, XML_COMMENT

// Any character except \ and ; 
// These two characters can occure escaped.
/*
RegExpContent = (\\\/|[^\/;])*
RegExp1 = \/[^\*\/]{RegExpContent}\/[gimy]*
RegExp3 = \/\\\/{RegExpContent}\/[gimy]*
RegExp = {RegExp1} | {RegExp3} 
*/

BackslashSequence = \\.
RegularExpressionFirstChar = [^\*\/]|{BackslashSequence}
RegularExpressionChar = [^\\\/]|{BackslashSequence}
RegExp = \/{RegularExpressionFirstChar}{RegularExpressionChar}*\/[gimy]*


NCNameStartChar = [A-Za-z_] | [\u00C0-\u00D6] | [\u00D8-\u00F6] | [\u00F8-\u02FF] | [\u0370-\u037D]
                | [\u037F-\u1FFF] | [\u200C-\u200D] | [\u2070-\u218F] | [\u2C00-\u2FEF]
                | [\u3001-\uD7FF] | [\uF900-\uFDCF] | [\uFDF0-\uFFFD]

NCNameChar = {NCNameStartChar} | [-\.0-9] | \u00B7 | [\u0300-\u036F] | [\u203F-\u2040]
NCName = {NCNameStartChar} {NCNameChar}*
XmlEntity = "&" {NCName} ";"

Keyword = "break" | "case" | "catch" | "continue" | "default" | "delete" | "do" | "else" | "finally"
            | "for" | "function" | "if" | "in" | "instanceof" | "new" | "return" | "switch" | "this"
            | "throw" | "try" | "typeof" | "var" | "void" | "while" | "with"
             
FutureReservedWord = "abstract" | "boolean" | "byte" | "char" | "class" | "const" | "debugger"
            | "double" | "enum" | "export" | "extends" | "final" | "float" | "goto" | "implements"
            | "import" | "int" | "interface" | "long" | "native" | "package" | "private"
            | "protected" | "public" | "short" | "static" | "super" | "synchronized" | "throws"
            | "transient" | "volatile" 

ReservedLiteral = "null" | "true" | "false"


// Numbers - similar to the ones from the Java
Digit = [0-9]
Integer = {Digit}+

/* floating point literals */
F1 = {Digit}+ \. {Digit}* 
F2 = \. {Digit}+ 
F3 = {Digit}+ 
Exponent = [eE] [+-]? {Digit}+       
Double = ({F1}|{F2}|{F3}) {Exponent}?
Number = {Integer}  | {Double} 

GeneralChar = [^<>\\/,;:{}\-+~|\'\"\[\]\@ \t=*\.#\(\)?!\^%&]

%%


<YYINITIAL> {
    // Compatibile with JS code embeded in XML
    "<!--"                      {   
                                    if (emitXMLComments) {
                                        // Save state to return to.
                                        yybegin(XML_COMMENT);
                                    }
                                    return symbol(SYM_XML_COMMENT_MARKUP);
                                }
    "-->"                       {   return symbol(SYM_XML_COMMENT_MARKUP);  }
    "<![CDATA[" | "]]>"         {   return symbol(SYM_XML_CDATA_MARKUP);    }

    // Keywords.
    {Keyword}                   {   
    								return symbol(SYM_KEYWORD);             
    							}
    {FutureReservedWord}        {       								
    								return symbol(SYM_RESERVED_WORD);       }
    {ReservedLiteral}           {   return symbol(SYM_RESERVED_LITERAL);    }
    // Operators and punctuation marks.
    "\\"                        {   return symbol(SYM_BACKSLASH);           }
    "[" | "]"                   {   return symbol(SYM_SQUARE_BRACKET);      }
    "{" | "}"                   {   return symbol(SYM_CURLY_BRACKET);       }
    "(" | ")"                   {   return symbol(SYM_BRACKET);             }
    "?"                         {   return symbol(SYM_QUESTION);            }
    ":"                         {   return symbol(SYM_COLON);               }
    ";"                         {   return symbol(SYM_SEMICOLON);           }
    "*"                         {   return symbol(SYM_ASTERISK);            }
    "@"                         {   return symbol(SYM_AT);                  }
    "&"                         {   return symbol(SYM_AMPERSAND);           }
    "+"                         {   return symbol(SYM_PLUS);                }
    "-"                         {   return symbol(SYM_MINUS);               }
    "."                         {   return symbol(SYM_DOT);                 }
    ","                         {   return symbol(SYM_COMMA);               }
    "<" | "&lt;"                {   return symbol(SYM_LT);                  }
    ">" | "&gt;"                {   return symbol(SYM_GT);                  }
    "<=" | "&le;"               {   return symbol(SYM_LTEQ);                }
    ">=" | "&ge;"               {   return symbol(SYM_GTEQ);                }
    "="                         {   return symbol(SYM_EQ);                  }
    "=="                        {   return symbol(SYM_EQ2);                 }
    "!="                        {   return symbol(SYM_NOTEQ);               }
    "==="                       {   return symbol(SYM_EQ3);                 }
    "!=="                       {   return symbol(SYM_NOTEQ2);              }
    "%"                         {   return symbol(SYM_PERCENT);             }
    "++"                        {   return symbol(SYM_PLUS2);               }
    "--"                        {   return symbol(SYM_MINUS2);              }
    "<<"                        {   return symbol(SYM_LSHIFT);              }
    ">>"                        {   return symbol(SYM_RSHIFT);              }
    ">>>"                       {   return symbol(SYM_GT3);                 }
    "|"                         {   return symbol(SYM_PIPE);                }
    "^"                         {   return symbol(SYM_CAROT);               }
    "!"                         {   return symbol(SYM_EXCLAMATION);         }
    "~"                         {   return symbol(SYM_TILDE);               }
    "&&"                        {   return symbol(SYM_AMPERSAND2);          }
    "||"                        {   return symbol(SYM_PIPE2);               }
    "+="                        {   return symbol(SYM_PLUSEQ);              }
    "-="                        {   return symbol(SYM_MINUSEQ);             }
    "*="                        {   return symbol(SYM_TIMESEQ);             }
    "%="                        {   return symbol(SYM_PERCENTEQ);           }
    "<<="                       {   return symbol(SYM_LSHIFTEQ);            }
    ">>="                       {   return symbol(SYM_RSHIFTEQ);            }
    ">>>="                      {   return symbol(SYM_GT3EQ);               }
    "&="                        {   return symbol(SYM_AMPERSANDEQ);         }
    "|="                        {   return symbol(SYM_PIPEEQ);              }
    "^="                        {   return symbol(SYM_CAROTEQ);             }
    "/="                        {   return symbol(SYM_DIVIDEEQ);            }

    // XML entity for embeded lexers.
    
    {XmlEntity}                 {   return symbol(SYM_XMLENTITY);           }

    // Comments
    
    "/*"                        {
                                    // Switch to comments.
                                    yybegin(MULTI_LINE_COMMENT);
                                    // Start the char counter
                                    cLen = yylength();
                                }
   "//" .*                		{   return symbol(SYM_SINGLE_LINE_COMMENT); }
    // Strings
    "\""                         {
                                     cLen = 1;
                                     yybegin(DQ_STRING);
                                 }
    "\'"                         {
                                     cLen = 1;
                                     yybegin(SQ_STRING);
                                 }
    // The mighty regular expression.
    {RegExp}                    {  
    								if (areRegularExpressionsEnabled()) {
    									return symbol(SYM_REG_EXP);
    								} else {
    									// Emit only the slash. This is not a RE.
    									yypushback(yylength() - 1);
    									return symbol(SYM_SLASH);
    								}    								          
    							}
    							
    "/"                         {   return symbol(SYM_SLASH);               }
    // White spaces are emitted separatelly.
    [ \t]+                      {   return symbol(SYM_TEXT);                }
    {Number}					{	return symbol(SYM_NUMBER);				}
    // This is Text
    // Match anything else different from the markup.
    {GeneralChar}*              {   return symbol(SYM_IDENTIFIER);          }
}

<DQ_STRING> {
    "\""                         {
                                     if ("\\".equals(lastYYText)) {
                                         cLen++;
                                         lastYYText = yytext();
                                     } else {
                                         yybegin(YYINITIAL);
                                         cLen++;
                                         lastYYText = yytext();
                                         return flush(SYM_STRING_DQ);
                                     }
                                 }
    "\\"                         {
                                     cLen++;
                                     if ("\\".equals(lastYYText)) {
                                        lastYYText = "";
                                     } else {
                                        lastYYText = yytext();
                                     }
                                 }
    [^\"\\]                      {   
                                     cLen++;
                                     lastYYText = yytext();
                                 }
    <<EOF>>                      {
                                     try {
                                         if ("\\".equals(lastYYText)) {
                                             // Keep the state, but flush the string content.
                                             return flush(SYM_STRING_DQ);
                                         } else {
                                             if (cLen > 0) {
                                                 yybegin(YYINITIAL);
                                             }
                                             return flush(SYM_STRING_DQ);
                                         }
                                     } finally {
                                         lastYYText = null;
                                     }
                                 }
}

<SQ_STRING> {
    "'"                          {
                                     if ("\\".equals(lastYYText)) {
                                         cLen++;
                                         lastYYText = yytext();
                                     } else {
                                         yybegin(YYINITIAL);
                                         cLen++;
                                         return flush(SYM_STRING_SQ);
                                     }
                                 }
    "\\"                         {
                                     cLen++;
                                     if ("\\".equals(lastYYText)) {
                                        lastYYText = "";
                                     } else {
                                        lastYYText = yytext();
                                     }
                                 }
    [^\'\\]                      {   
                                     cLen++;
                                     lastYYText = yytext();
                                 }
    <<EOF>>                      {
                                     try {
                                         if ("\\".equals(lastYYText)) {
                                             // Keep the state, but flush the string content.
                                             return flush(SYM_STRING_SQ);
                                         } else {
                                             if (cLen > 0) {
                                                 yybegin(YYINITIAL);
                                             }
                                             return flush(SYM_STRING_SQ);
                                         }
                                     } finally {
                                         lastYYText = null;
                                     }
                                  }
}

<MULTI_LINE_COMMENT> {
  "*/"                          {
                                    cLen += yylength();
                                    yybegin(YYINITIAL);
                                    return flush(SYM_MULTI_LINE_COMMENT);                                     
                                }
  .                             {   cLen ++;                                }
  <<EOF>>                       {   return flush(SYM_MULTI_LINE_COMMENT);   }
}

<XML_COMMENT> {
  [^-]*                          {
                                     cLen += yylength();
                                 }                    
  "-"                            {
                                     cLen ++;
                                 }                                           
  "-->"                          {
                                     yybegin(YYINITIAL);
                                     return symbol(SYM_XML_COMMENT_MARKUP);                                     
                                 }
  ~"-->"                         {
                                     yypushback(3);
                                     return symbol(SYM_XML_COMMENT_MARKUP);
                                 }
  <<EOF>>                        {
                                    return flush(SYM_XML_COMMENT_MARKUP);
                                 }
}