package ro.sync.lexer.sql;
import ro.sync.lexer.AbstractLexer;

@SuppressWarnings("unused")
%%

%public 
%class SQLLexer
%extends AbstractLexer
%unicode
%char
%type ro.sync.lexer.Symbol
 
%caseless
%scanerror ro.sync.lexer.LexerException

%{
    /**
     * Text
     */
    private static final byte SYM_TEXT              = SQLTokens.TEXT;
    
    /**
     * SQL keywords
     */
    private static final byte SYM_KEYWORD           = SQLTokens.KEYWORD;
    
    /**
     * SQL identifier
     */
    private static final byte SYM_IDENTIFIER        = SQLTokens.IDENTIFIER;
    
    /**
     * Operators
     */
    private static final byte SYM_OPERATOR          = SQLTokens.OPERATOR;
    
    /**
     * The semicolon.
     */
    private static final byte SYM_SEMICOLON         = SQLTokens.SEMICOLON;
    
    /**
     * The comma.
     */
    private static final byte SYM_COMMA             = SQLTokens.COMMA;
    
    /**
     * The star.
     */
    private static final byte SYM_STAR              = SQLTokens.STAR;
    
    /**
     * Brackets
     */
    private static final byte SYM_BRACKET           = SQLTokens.BRACKET;
    private static final byte SYM_SQUARE_BRACKET    = SQLTokens.SQUARE_BRACKET;
    private static final byte SYM_CURLY_BRACKET     = SQLTokens.CURLY_BRACKET;
    
    /**
     * String literal
     */
    private static final byte SYM_STRING_DQ         = SQLTokens.STRING_DQ;
    
    /**
     * Char literal
     */
    private static final byte SYM_STRING_SQ         = SQLTokens.STRING_SQ;

    /**
     * Numbers
     */
    private static final byte SYM_NUMBER            = SQLTokens.NUMBER;
    
    /**
     * Comments
     */
    private static final byte SYM_COMMENT           = SQLTokens.COMMENT;
    private static final byte SYM_LINE_COMMENT      = SQLTokens.LINE_COMMENT;
    
    /**
     * Create an empty lexer, yyreset will be called later to reset and assign
     * the reader
     */
    public SQLLexer() {
        super();
    }
    
    public String getName() {
      return SQL_LEXER;
    }
%}

%xstate COMMENT, UNICODE_IDENTIFIER, DQ_STRING, SQ_STRING

Operator = "." | "@" | "=" | ">" | "<" | "!" | "~" | "?" | ":" | "+"
         | "<>" | ">=" | "<=" | "||" | "->" | "::" | "%" | "^"  
             
Keyword = "add" | "all" | "allocate" | "alter" | "analyze" | "and" | "any" | "are" | "array" | "as"
        | "asc" | "asensitive" | "asymmetric" | "at" | "atomic" | "authorization" | "before" 
        | "begin" | "between" | "bigint" | "binary" | "blob" | "boolean" | "both" | "by" | "call" 
        | "called" | "cascade" | "cascaded" | "case" | "cast" | "change" | "char" | "character" 
        | "check" | "clob" | "close" | "collate" | "column" | "commit" | "condition" | "connect" 
        | "connection" | "constraint" | "constructor" | "continue" | "convert" | "corresponding" 
        | "create" | "cross" | "cube" | "current" | "current_date" | "current_default_transform_group" 
        | "current_path" | "current_role" | "current_time" | "current_timestamp" 
        | "current_transform_group_for_type" | "current_user" | "cursor" | "cycle" | "database" 
        | "databases" | "date" | "day" | "deallocate" | "dec" | "decimal" | "declare" | "default" 
        | "delayed" | "delete" | "deref" | "desc" | "describe" | "deterministic" | "disconnect" 
        | "distinct" | "distinctrow" | "div" | "double" | "drop" | "dual" | "dynamic" | "each" 
        | "element" | "else" | "elseif" | "enclosed" | "end" | "end_exec" | "escape" | "escaped" 
        | "except" | "exec" | "execute" | "exists" | "exit" | "explain" | "external" | "false" 
        | "fetch" | "filter" | "float" | "float4" | "float8" | "for" | "force" | "foreign" | "free" 
        | "from" | "full" | "fulltext" | "function" | "generated" | "get" | "global" | "grant" 
        | "group" | "grouping" | "handler" | "having" | "hold" | "hour" | "identity" | "if" 
        | "ignore" | "immediate" | "in" | "index" | "indicator" | "infile" | "inner" | "inout" 
        | "input" | "insensitive" | "insert" | "int" | "integer" | "intersect" | "interval" | "into" 
        | "is" | "isolation" | "iterate" | "join" | "key" | "keys" | "kill" | "language" | "large" 
        | "lateral" | "leading" | "leave" | "left" | "like" | "limit" | "lines" | "load" | "local" 
        | "localtime" | "localtimestamp" | "lock" | "long" | "loop" | "match" | "member" | "merge" 
        | "method" | "minute" | "mod" | "modifies" | "module" | "month" | "multiset" | "national" 
        | "natural" | "nchar" | "nchar_varying" | "nclob" | "new" | "no" | "none" | "not" 
        | "notfound" | "null" | "numeric" | "of" | "old" | "on" | "only" | "open" | "optimize" 
        | "option" | "optionally" | "or" | "order" | "out" | "outer" | "outfile" | "output" | "over" 
        | "overlaps" | "parameter" | "partition" | "precision" | "prepare" | "primary" | "procedure" 
        | "purge" | "range" | "read" | "reads" | "real" | "recursive" | "ref" | "references" 
        | "referencing" | "regexp" | "release" | "rename" | "repeat" | "replace" | "require" 
        | "restrict" | "return" | "returns" | "revoke" | "right" | "rlike" | "rollback" | "rollup" 
        | "row" | "rows" | "savepoint" | "schema" | "schemas" | "scope" | "scroll" | "search" 
        | "second" | "select" | "sensitive" | "separator" | "session_user" | "set" | "show" 
        | "similar" | "smalldatetime" | "smallint" | "some" | "soname" | "spatial" | "specific" 
        | "specifictype" | "sql" | "sqlexception" | "sqlstate" | "sqlwarning" | "start" | "starting"
        | "static" | "submultiset" | "symmetric" | "system" | "system_user" | "table" | "terminated" 
        | "then" | "time" | "timestamp" | "timezone_hour" | "timezone_minute" | "to" | "trailing" 
        | "translation" | "treat" | "trigger" | "true" | "truncate" | "undo" | "union" | "unique" 
        | "unknown" | "unlock" | "unnest" | "unsigned" | "update" | "usage" | "use" | "user" 
        | "using" | "value" | "values" | "varbinary" | "varchar" | "varcharacter" | "varying" 
        | "when" | "whenever" | "where" | "while" | "window" | "with" | "within" | "without" 
        | "write" | "xor" | "year" | "zerofill" 

////////////////////////////////////////////// NUMBERS ////////////////////////////////////////////

// Decimal integer
Digit = [0-9]
Integer = {Digit}+

// Hexadecimal integer
HexDigit = {Digit} | [A-Fa-f]
HexInt = {HexDigit}+

/* floating point literals */
F1 = {Digit}+ \. {Digit}* 
F2 = \. {Digit}+ 
F3 = {Digit}+ 
Exponent = [eE] [+-]? {Digit}+
        
Float = ({F1}|{F2}|{F3}) {Exponent}? [fF]?


Multiplier = [kmg]

Number = (({Integer} | {HexInt}) {Multiplier}?) | {Float}


/////////////////////////////////////////// IDENTIFIERS //////////////////////////////////////////
// Unicode identifiers for SQL 2003
UnicodeIdentifierChar = [\u0020-\u007F] | [\u00A0-\u00FF] | [\u0100-\u017F] | [\u0180-\u024F] 
		              | [\u0250-\u02AF] | [\u0300-\u036F] | [\u0370-\u03FF] | [\u0400-\u04FF] 
		              | [\u0500-\u052F] | [\u0530-\u058F] | [\u0590-\u05FF] | [\u0600-\u06FF] 
		              | [\u0700-\u074F] | [\u0780-\u07BF] | [\u0900-\u097F] | [\u0980-\u09FF] 
		              | [\u0A00-\u0A7F] | [\u0A80-\u0AFF] | [\u0B00-\u0B7F] | [\u0B80-\u0BFF] 
		              | [\u0C00-\u0C7F] | [\u0C80-\u0CFF] | [\u0D00-\u0D7F] | [\u0D80-\u0DFF] 
		              | [\u0E00-\u0E7F] | [\u0E80-\u0EFF] | [\u0F00-\u0FFF] | [\u1000-\u109F] 
		              | [\u10A0-\u10FF] | [\u1100-\u11FF] | [\u1200-\u137F] | [\u13A0-\u13FF] 
		              | [\u1400-\u167F] | [\u1680-\u169F] | [\u16A0-\u16FF] | [\u1700-\u171F] 
		              | [\u1720-\u173F] | [\u1740-\u175F] | [\u1760-\u177F] | [\u1780-\u17FF] 
		              | [\u1800-\u18AF] | [\u1900-\u194F] | [\u1950-\u197F] | [\u19E0-\u19FF] 
		              | [\u1D00-\u1D7F] | [\u1E00-\u1EFF] | [\u1F00-\u1FFF] | [\u2070-\u209F] 
		              | [\u20A0-\u20CF] | [\u20D0-\u20FF] | [\u2100-\u214F] | [\u2150-\u218F] 
		              | [\u2460-\u24FF] | [\u2E80-\u2EFF] | [\u2F00-\u2FDF] | [\u2FF0-\u2FFF] 
		              | [\u3000-\u303F] | [\u3040-\u309F] | [\u30A0-\u30FF] | [\u3100-\u312F] 
		              | [\u3130-\u318F] | [\u3190-\u319F] | [\u31A0-\u31BF] | [\u31F0-\u31FF] 
		              | [\u3200-\u32FF] | [\u3300-\u33FF] | [\u3400-\u4DBF] | [\u4E00-\u9FFF] 
		              | [\uA000-\uA48F] | [\uA490-\uA4CF] | [\uAC00-\uD7AF] | [\uD800-\uDB7F] 
		              | [\uDB80-\uDBFF] | [\uDC00-\uDFFF] | [\uE000-\uF8FF] | [\uF900-\uFAFF] 
		              | [\uFB00-\uFB4F] | [\uFB50-\uFDFF] | [\uFE00-\uFE0F] | [\uFE20-\uFE2F] 
		              | [\uFE30-\uFE4F] | [\uFE50-\uFE6F] | [\uFE70-\uFEFF] | [\uFF00-\uFFEF]

UnicodeEscapeValue = "\\" "+"? {HexDigit}+
UnicodeIdentifierPart = {UnicodeEscapeValue} | {UnicodeIdentifierChar}
UnicodeIdentifier = "U&\"" {UnicodeIdentifierPart}+? "\"" ("escape\\")?
        
// Regular identifiers (SQL92, Sybase ASA, Sybase ASE, SQLServer, Teradata,
// MySQL, Informix, PostgreSQL, Oracle, Interbase, etc)

NonDigit = [a-zA-Z#_$]

// Identifiers that start with a digit must contain
// at least one non-digit char (otherwise they are considered to be numbers)
RegularIdentifierStart = ({Digit}+ {NonDigit}) | {NonDigit} 
RegularIndentifierPart = {Digit} | {NonDigit}
RegularIndentifier = {RegularIdentifierStart} {RegularIndentifierPart}*

// All identifiers
Identifier = {RegularIndentifier}
// | {UnicodeIdentifier} 

/////////////////////////////////////////// STRING LITERALS ///////////////////////////////////////

// Double quote strings
DQStringContent =  [^\"\\]+
DQUnclosedString =  \" {DQStringContent}
DQString =  {DQUnclosedString} \"

// Double quote strings
SQStringContent =  [^\'\\]+
SQUnclosedString =  \' {SQStringContent}
SQString =  {SQUnclosedString} \'

LineComment = ("--" | "//") {Char}*

// All characters
Char = .
GeneralChar = [^ \t\-\^\[\]{};=.~\+|<>()*/?:@,&]

%%

<YYINITIAL> {
    {Keyword}                   {   return symbol(SYM_KEYWORD);         }
    {Identifier}                {   return symbol(SYM_IDENTIFIER);      }
    {Operator}                  {   return symbol(SYM_OPERATOR);        }
    {LineComment}               {   return symbol(SYM_LINE_COMMENT);    }
    "/*"                        {
                                    yypushback(2);
                                    yybegin(COMMENT);
                                }
    "U&\""                      {
                                    cLen = 0;
                                    yypushback(3);
                                    yybegin(UNICODE_IDENTIFIER);
                                }
                                
    {DQString}                  {   return symbol(SYM_STRING_DQ);       }
    
    {DQUnclosedString}          {   
                                    yybegin(DQ_STRING);
                                    return symbol(SYM_STRING_DQ);
                                }
    
    {SQString}                  {   return symbol(SYM_STRING_SQ);       }
    
    {SQUnclosedString}          {
                                    yybegin(SQ_STRING);
                                    return symbol(SYM_STRING_SQ);
                                }
    
    {Number}                    {   return symbol(SYM_NUMBER);          }
    
    "(" | ")"                   {   return symbol(SYM_BRACKET);         }
    "[" | "]"                   {   return symbol(SYM_SQUARE_BRACKET);  }
    "{" | "}"                   {   return symbol(SYM_CURLY_BRACKET);   }
    ";"                         {   return symbol(SYM_SEMICOLON);       }
    ","                         {   return symbol(SYM_COMMA);           }
    "*"                         {   return symbol(SYM_STAR);           }
    
    [ \t]+                      {   return symbol(SYM_TEXT);            }
    {GeneralChar}+              {   return symbol(SYM_TEXT);            }
}

<COMMENT> {
    "*/"                        {
                                    cLen += 2;
                                    yybegin(YYINITIAL);
                                    return flush(SYM_COMMENT);
                                }
    {Char}                      {   cLen++;                             }
    <<EOF>>                     {   
                                    return flush(SYM_COMMENT);
                                }
}

<UNICODE_IDENTIFIER> {
    "U&\""                      {
                                    cLen += 3;
                                }
    "\"" | "\"escape\\"         {
                                    cLen += yylength();
                                    yybegin(YYINITIAL);
                                    return flush(SYM_IDENTIFIER); 
                                }
    {UnicodeIdentifierChar}     {
                                    cLen++;
                                }
    // Fallback  
    .                           {
                                    yypushback(1);
                                    yybegin(YYINITIAL);
                                    return flush(SYM_IDENTIFIER); 
                                }              
    <<EOF>>                     { 
                                    yybegin(YYINITIAL);  
                                    return flush(SYM_IDENTIFIER);
                                }                    
}

<DQ_STRING> {
    {DQStringContent}           {
                                    return symbol(SYM_STRING_DQ);
                                }
    {DQStringContent}* "\""     {
                                    yybegin(YYINITIAL);
                                    return symbol(SYM_STRING_DQ);
                                }
}

<SQ_STRING> {
    {SQStringContent}           {
                                    return symbol(SYM_STRING_SQ);
                                }
    {SQStringContent}* "\'"     {
                                    yybegin(YYINITIAL);
                                    return symbol(SYM_STRING_SQ);
                                }
}