PartiQL ANTLR Grammar

Parser Grammar
grammar PartiQL;

options {
    tokenVocab=PartiQLTokens;
    caseInsensitive = true;
}

/**
 *
 * TOP LEVEL
 *
 */

root
    : (EXPLAIN (PAREN_LEFT explainOption (COMMA explainOption)* PAREN_RIGHT)? )? statement;

statement
    : dql COLON_SEMI? EOF          # QueryDql
    | dml COLON_SEMI? EOF          # QueryDml
    | ddl COLON_SEMI? EOF          # QueryDdl
    | execCommand COLON_SEMI? EOF  # QueryExec
    ;

/**
 *
 * COMMON STRUCTURES
 *
 */

explainOption
    : param=IDENTIFIER value=IDENTIFIER;

asIdent
    : AS symbolPrimitive;

atIdent
    : AT symbolPrimitive;

byIdent
    : BY symbolPrimitive;

symbolPrimitive
    : ident=( IDENTIFIER | IDENTIFIER_QUOTED )
    ;

/**
 *
 * DATA QUERY LANGUAGE (DQL)
 *
 */

dql
    : expr;

/**
 *
 * EXECUTE
 *
 */

execCommand
    : EXEC name=expr ( args+=expr ( COMMA args+=expr )* )?;

/**
 *
 * DATA DEFINITION LANGUAGE (DDL)
 *
 */

ddl
    : createCommand
    | dropCommand
    ;

createCommand
    : CREATE TABLE symbolPrimitive                                                              # CreateTable
    | CREATE INDEX ON symbolPrimitive PAREN_LEFT pathSimple ( COMMA pathSimple )* PAREN_RIGHT   # CreateIndex
    ;

dropCommand
    : DROP TABLE target=symbolPrimitive                         # DropTable
    | DROP INDEX target=symbolPrimitive ON on=symbolPrimitive   # DropIndex
    ;

/**
 *
 * DATA MANIPULATION LANGUAGE (DML)
 *
 */

dml
    : updateClause dmlBaseCommand+ whereClause? returningClause?  # DmlBaseWrapper
    | fromClause whereClause? dmlBaseCommand+ returningClause?    # DmlBaseWrapper
    | deleteCommand                                               # DmlDelete
    | insertCommandReturning                                      # DmlInsertReturning
    | dmlBaseCommand                                              # DmlBase
    ;

dmlBaseCommand
    : insertCommand
    | setCommand
    | replaceCommand
    | removeCommand
    | upsertCommand
    ;

pathSimple
    : symbolPrimitive pathSimpleSteps*;

pathSimpleSteps
    : BRACKET_LEFT key=literal BRACKET_RIGHT             # PathSimpleLiteral
    | BRACKET_LEFT key=symbolPrimitive BRACKET_RIGHT     # PathSimpleSymbol
    | PERIOD key=symbolPrimitive                         # PathSimpleDotSymbol
    ;

// Based on https://github.com/partiql/partiql-lang/blob/main/RFCs/0011-partiql-insert.md
replaceCommand
    : REPLACE INTO symbolPrimitive asIdent? value=expr;

// Based on https://github.com/partiql/partiql-lang/blob/main/RFCs/0011-partiql-insert.md
upsertCommand
    : UPSERT INTO symbolPrimitive asIdent? value=expr;

removeCommand
    : REMOVE pathSimple;

insertCommandReturning
    : INSERT INTO pathSimple VALUE value=expr ( AT pos=expr )? onConflictClause? returningClause?;

insertCommand
    : INSERT INTO pathSimple VALUE value=expr ( AT pos=expr )? onConflictClause?  # InsertLegacy
    // See the Grammar at https://github.com/partiql/partiql-lang/blob/main/RFCs/0011-partiql-insert.md#2-proposed-grammar-and-semantics
    | INSERT INTO symbolPrimitive asIdent? value=expr onConflictClause?           # Insert
    ;

onConflictClause
    : ON CONFLICT WHERE expr DO NOTHING                                           # OnConflictLegacy
    | ON CONFLICT conflictTarget? conflictAction                                  # OnConflict
    ;

/**
    <conflict target> ::=
        ( <index target> [, <index target>]... )
        | ( { <primary key> | <composite primary key> } )
        | ON CONSTRAINT <constraint name>
*/
conflictTarget
    : PAREN_LEFT symbolPrimitive (COMMA symbolPrimitive)* PAREN_RIGHT
    | ON CONSTRAINT constraintName;

constraintName
    : symbolPrimitive;

conflictAction
    : DO NOTHING
    | DO REPLACE doReplace
    | DO UPDATE doUpdate;

/*
<do replace> ::= EXCLUDED
    | SET <attr values> [, <attr values>]...
    | VALUE <tuple value>
   [ WHERE <condition> ]
*/
doReplace
    : EXCLUDED;
    // :TODO add the rest of the grammar

/*
<do update> ::= EXCLUDED
    | SET <attr values> [, <attr values>]...
    | VALUE <tuple value>
   [ WHERE <condition> ]
*/
doUpdate
    : EXCLUDED;
    // :TODO add the rest of the grammar

updateClause
    : UPDATE tableBaseReference;

setCommand
    : SET setAssignment ( COMMA setAssignment )*;

setAssignment
    : pathSimple EQ expr;

deleteCommand
    : DELETE fromClauseSimple whereClause? returningClause?;

returningClause
    : RETURNING returningColumn ( COMMA returningColumn )*;

returningColumn
    : status=(MODIFIED|ALL) age=(OLD|NEW) ASTERISK
    | status=(MODIFIED|ALL) age=(OLD|NEW) col=expr
    ;

fromClauseSimple
    : FROM pathSimple asIdent? atIdent? byIdent?   # FromClauseSimpleExplicit
    | FROM pathSimple symbolPrimitive              # FromClauseSimpleImplicit
    ;

whereClause
    : WHERE arg=expr;

/**
 *
 * SELECT AND PROJECTION
 *
 */

selectClause
    : SELECT setQuantifierStrategy? ASTERISK          # SelectAll
    | SELECT setQuantifierStrategy? projectionItems   # SelectItems
    | SELECT setQuantifierStrategy? VALUE expr        # SelectValue
    | PIVOT pivot=expr AT at=expr                     # SelectPivot
    ;

projectionItems
    : projectionItem ( COMMA projectionItem )* ;

projectionItem
    : expr ( AS? symbolPrimitive )? ;

setQuantifierStrategy
    : DISTINCT
    | ALL
    ;

/**
 * LET CLAUSE
 */

letClause
    : LET letBinding ( COMMA letBinding )*;

letBinding
    : expr AS symbolPrimitive;

/**
 *
 * ORDER BY CLAUSE
 *
 */

orderByClause
    : ORDER BY orderSortSpec ( COMMA orderSortSpec )*;

orderSortSpec
    : expr dir=(ASC|DESC)? (NULLS nulls=(FIRST|LAST))?;

/**
 *
 * GROUP CLAUSE
 *
 */

groupClause
    : GROUP PARTIAL? BY groupKey ( COMMA groupKey )* groupAlias?;

groupAlias
    : GROUP AS symbolPrimitive;

groupKey
    : key=exprSelect (AS symbolPrimitive)?;

/**
 *
 * Window Function
 */

over
   : OVER PAREN_LEFT windowPartitionList? windowSortSpecList? PAREN_RIGHT
   ;

windowPartitionList
   : PARTITION BY expr (COMMA expr)*
   ;

windowSortSpecList
   : ORDER BY orderSortSpec (COMMA orderSortSpec)*
   ;

/**
 *
 * SIMPLE CLAUSES
 *
 */

havingClause
    : HAVING arg=exprSelect;

fromClause
    : FROM tableReference;

whereClauseSelect
    : WHERE arg=exprSelect;

offsetByClause
    : OFFSET arg=exprSelect;

limitClause
    : LIMIT arg=exprSelect;

/**
 *
 * GRAPH PATTERN MATCHING LANGUAGE (GPML)
 *
 */

gpmlPattern
    : selector=matchSelector? matchPattern;

gpmlPatternList
    : selector=matchSelector? matchPattern ( COMMA matchPattern )*;

matchPattern
    : restrictor=patternRestrictor? variable=patternPathVariable? graphPart*;

graphPart
    : node
    | edge
    | pattern
    ;

matchSelector
    : mod=(ANY|ALL) SHORTEST                  # SelectorBasic
    | ANY k=LITERAL_INTEGER?                  # SelectorAny
    | SHORTEST k=LITERAL_INTEGER GROUP?       # SelectorShortest
    ;

patternPathVariable
    : symbolPrimitive EQ;

patternRestrictor    // Should be TRAIL / ACYCLIC / SIMPLE
    : restrictor=IDENTIFIER;

node
    : PAREN_LEFT symbolPrimitive? patternPartLabel? whereClause? PAREN_RIGHT;

edge
    : edgeWSpec quantifier=patternQuantifier?    # EdgeWithSpec
    | edgeAbbrev quantifier=patternQuantifier?   # EdgeAbbreviated
    ;

pattern
    : PAREN_LEFT restrictor=patternRestrictor? variable=patternPathVariable? graphPart+ where=whereClause? PAREN_RIGHT quantifier=patternQuantifier?
    | BRACKET_LEFT restrictor=patternRestrictor? variable=patternPathVariable? graphPart+ where=whereClause? BRACKET_RIGHT quantifier=patternQuantifier?
    ;

patternQuantifier
    : quant=( PLUS | ASTERISK )
    | BRACE_LEFT lower=LITERAL_INTEGER COMMA upper=LITERAL_INTEGER? BRACE_RIGHT
    ;

edgeWSpec
    : MINUS edgeSpec MINUS ANGLE_RIGHT             # EdgeSpecRight
    | TILDE edgeSpec TILDE                         # EdgeSpecUndirected
    | ANGLE_LEFT MINUS edgeSpec MINUS              # EdgeSpecLeft
    | TILDE edgeSpec TILDE ANGLE_RIGHT             # EdgeSpecUndirectedRight
    | ANGLE_LEFT TILDE edgeSpec TILDE              # EdgeSpecUndirectedLeft
    | ANGLE_LEFT MINUS edgeSpec MINUS ANGLE_RIGHT  # EdgeSpecBidirectional
    | MINUS edgeSpec MINUS                         # EdgeSpecUndirectedBidirectional
    ;

edgeSpec
    : BRACKET_LEFT symbolPrimitive? patternPartLabel? whereClause? BRACKET_RIGHT;

patternPartLabel
    : COLON symbolPrimitive;

edgeAbbrev
    : TILDE
    | TILDE ANGLE_RIGHT
    | ANGLE_LEFT TILDE
    | ANGLE_LEFT? MINUS ANGLE_RIGHT?
    ;

/**
 *
 * TABLES & JOINS
 *
 */

tableReference
    : lhs=tableReference joinType? CROSS JOIN rhs=joinRhs     # TableCrossJoin
    | lhs=tableReference COMMA rhs=joinRhs                    # TableCrossJoin
    | lhs=tableReference joinType? JOIN rhs=joinRhs joinSpec  # TableQualifiedJoin
    | tableNonJoin                                            # TableRefBase
    | PAREN_LEFT tableReference PAREN_RIGHT                   # TableWrapped
    ;

tableNonJoin
    : tableBaseReference
    | tableUnpivot
    ;

tableBaseReference
    : source=exprSelect symbolPrimitive              # TableBaseRefSymbol
    | source=exprSelect asIdent? atIdent? byIdent?   # TableBaseRefClauses
    | source=exprGraphMatchOne asIdent? atIdent? byIdent?   # TableBaseRefMatch
    ;

tableUnpivot
    : UNPIVOT expr asIdent? atIdent? byIdent?;

joinRhs
    : tableNonJoin                           # JoinRhsBase
    | PAREN_LEFT tableReference PAREN_RIGHT  # JoinRhsTableJoined
    ;

joinSpec
    : ON expr;

joinType
    : mod=INNER
    | mod=LEFT OUTER?
    | mod=RIGHT OUTER?
    | mod=FULL OUTER?
    | mod=OUTER
    ;

/**
 *
 * EXPRESSIONS & PRECEDENCE
 *
 * Precedence Table:
 * 1. Primary Expressions: Functions, Literals, Paths, Identifiers, etc (ex: a, f(a), 1, a.b, "a")
 * 2. Unary plus, minus (ex: -a, +a)
 * 3. Multiplication, Division, Modulo (ex: a * b)
 * 4. Addition, Subtraction (ex: a + b)
 * 5. Other operators (ex: a || b)
 * 6. Predicates (ex: a LIKE b, a < b, a IN b, a = b)
 * 7. IS true/false. Not yet implemented in PartiQL, but defined in SQL-92. (ex: a IS TRUE)
 * 8. NOT (ex: NOT a)
 * 8. AND (ex: a AND b)
 * 9. OR (ex: a OR b)
 *
 */

expr
    : exprBagOp
    ;

exprBagOp
    : lhs=exprBagOp OUTER? EXCEPT (DISTINCT|ALL)? rhs=exprSelect           # Except
    | lhs=exprBagOp OUTER? UNION (DISTINCT|ALL)? rhs=exprSelect            # Union
    | lhs=exprBagOp OUTER? INTERSECT (DISTINCT|ALL)? rhs=exprSelect        # Intersect
    | exprSelect                                                           # QueryBase
    ;

exprSelect
    : select=selectClause
        from=fromClause
        let=letClause?
        where=whereClauseSelect?
        group=groupClause?
        having=havingClause?
        order=orderByClause?
        limit=limitClause?
        offset=offsetByClause? # SfwQuery
    | exprOr            # SfwBase
    ;

exprOr
    : lhs=exprOr OR rhs=exprAnd     # Or
    | parent=exprAnd                # ExprOrBase
    ;

exprAnd
    : lhs=exprAnd op=AND rhs=exprNot  # And
    | parent=exprNot                  # ExprAndBase
    ;

exprNot
    : <assoc=right> op=NOT rhs=exprNot  # Not
    | parent=exprPredicate              # ExprNotBase
    ;

exprPredicate
    : lhs=exprPredicate op=(LT_EQ|GT_EQ|ANGLE_LEFT|ANGLE_RIGHT|NEQ|EQ) rhs=mathOp00  # PredicateComparison
    | lhs=exprPredicate IS NOT? type                                                 # PredicateIs
    | lhs=exprPredicate NOT? IN PAREN_LEFT expr PAREN_RIGHT                          # PredicateIn
    | lhs=exprPredicate NOT? IN rhs=mathOp00                                         # PredicateIn
    | lhs=exprPredicate NOT? LIKE rhs=mathOp00 ( ESCAPE escape=expr )?               # PredicateLike
    | lhs=exprPredicate NOT? BETWEEN lower=mathOp00 AND upper=mathOp00               # PredicateBetween
    | parent=mathOp00                                                                # PredicateBase
    ;

mathOp00
    : lhs=mathOp00 op=CONCAT rhs=mathOp01
    | parent=mathOp01
    ;

mathOp01
    : lhs=mathOp01 op=(PLUS|MINUS) rhs=mathOp02
    | parent=mathOp02
    ;

mathOp02
    : lhs=mathOp02 op=(PERCENT|ASTERISK|SLASH_FORWARD) rhs=valueExpr
    | parent=valueExpr
    ;

valueExpr
    : sign=(PLUS|MINUS) rhs=valueExpr
    | parent=exprPrimary
    ;

exprPrimary
    : exprTerm                   # ExprPrimaryBase
    | cast                       # ExprPrimaryBase
    | sequenceConstructor        # ExprPrimaryBase
    | substring                  # ExprPrimaryBase
    | canCast                    # ExprPrimaryBase
    | canLosslessCast            # ExprPrimaryBase
    | extract                    # ExprPrimaryBase
    | coalesce                   # ExprPrimaryBase
    | dateFunction               # ExprPrimaryBase
    | aggregate                  # ExprPrimaryBase
    | trimFunction               # ExprPrimaryBase
    | functionCall               # ExprPrimaryBase
    | nullIf                     # ExprPrimaryBase
    | exprPrimary pathStep+      # ExprPrimaryPath
    | exprGraphMatchMany         # ExprPrimaryBase
    | caseExpr                   # ExprPrimaryBase
    | valueList                  # ExprPrimaryBase
    | values                     # ExprPrimaryBase
    | windowFunction             # ExprPrimaryBase
    ;

/**
 *
 * PRIMARY EXPRESSIONS
 *
 */

exprTerm
    : PAREN_LEFT expr PAREN_RIGHT    # ExprTermWrappedQuery
    | parameter                      # ExprTermBase
    | varRefExpr                     # ExprTermBase
    | literal                        # ExprTermBase
    | collection                     # ExprTermBase
    | tuple                          # ExprTermBase
    ;

nullIf
    : NULLIF PAREN_LEFT expr COMMA expr PAREN_RIGHT;

coalesce
    : COALESCE PAREN_LEFT expr ( COMMA expr )* PAREN_RIGHT;

caseExpr
    : CASE case=expr? (WHEN whens+=expr THEN thens+=expr)+ (ELSE else=expr)? END;

values
    : VALUES valueRow ( COMMA valueRow )*;

valueRow
    : PAREN_LEFT expr ( COMMA expr )* PAREN_RIGHT;

valueList
    : PAREN_LEFT expr ( COMMA expr )+ PAREN_RIGHT;

sequenceConstructor
    : datatype=(LIST|SEXP) PAREN_LEFT (expr ( COMMA expr )* )? PAREN_RIGHT;

substring
    : SUBSTRING PAREN_LEFT expr ( COMMA expr ( COMMA expr )? )? PAREN_RIGHT
    | SUBSTRING PAREN_LEFT expr ( FROM expr ( FOR expr )? )? PAREN_RIGHT
    ;

aggregate
    : func=COUNT PAREN_LEFT ASTERISK PAREN_RIGHT                                        # CountAll
    | func=(COUNT|MAX|MIN|SUM|AVG) PAREN_LEFT setQuantifierStrategy? expr PAREN_RIGHT   # AggregateBase
    ;

/**
*
* Supported Window Functions:
* 1. LAG(expr, [offset [, default]]) OVER([window_partition] window_ordering)
* 2. LEAD(expr, [offset [, default]]) OVER([window_partition] window_ordering)
*
*/
windowFunction
    : func=(LAG|LEAD) PAREN_LEFT expr ( COMMA expr (COMMA expr)?)? PAREN_RIGHT over #LagLeadFunction
    ;

cast
    : CAST PAREN_LEFT expr AS type PAREN_RIGHT;

canLosslessCast
    : CAN_LOSSLESS_CAST PAREN_LEFT expr AS type PAREN_RIGHT;

canCast
    : CAN_CAST PAREN_LEFT expr AS type PAREN_RIGHT;

extract
    : EXTRACT PAREN_LEFT IDENTIFIER FROM rhs=expr PAREN_RIGHT;

trimFunction
    : func=TRIM PAREN_LEFT ( mod=IDENTIFIER? sub=expr? FROM )? target=expr PAREN_RIGHT;

dateFunction
    : func=(DATE_ADD|DATE_DIFF) PAREN_LEFT dt=IDENTIFIER COMMA expr COMMA expr PAREN_RIGHT;

functionCall
    : name=( CHAR_LENGTH | CHARACTER_LENGTH | OCTET_LENGTH |
        BIT_LENGTH | UPPER | LOWER | SIZE | EXISTS | COUNT )
        PAREN_LEFT ( expr ( COMMA expr )* )? PAREN_RIGHT                         # FunctionCallReserved
    | name=symbolPrimitive PAREN_LEFT ( expr ( COMMA expr )* )? PAREN_RIGHT      # FunctionCallIdent
    ;

pathStep
    : BRACKET_LEFT key=expr BRACKET_RIGHT        # PathStepIndexExpr
    | BRACKET_LEFT all=ASTERISK BRACKET_RIGHT    # PathStepIndexAll
    | PERIOD key=symbolPrimitive                 # PathStepDotExpr
    | PERIOD all=ASTERISK                        # PathStepDotAll
    ;

exprGraphMatchMany
    :  PAREN_LEFT exprPrimary MATCH gpmlPatternList PAREN_RIGHT ;

exprGraphMatchOne
    :   exprPrimary MATCH gpmlPattern ;


parameter
    : QUESTION_MARK;

varRefExpr
    : qualifier=AT_SIGN? ident=(IDENTIFIER|IDENTIFIER_QUOTED);

/**
 *
 * LITERALS & TYPES
 *
 */

collection
    : array
    | bag
    ;

array
    : BRACKET_LEFT ( expr ( COMMA expr )* )? BRACKET_RIGHT;

bag
    : ANGLE_DOUBLE_LEFT ( expr ( COMMA expr )* )? ANGLE_DOUBLE_RIGHT;

tuple
    : BRACE_LEFT ( pair ( COMMA pair )* )? BRACE_RIGHT;

pair
    : lhs=expr COLON rhs=expr;

literal
    : NULL                                                                                # LiteralNull
    | MISSING                                                                             # LiteralMissing
    | TRUE                                                                                # LiteralTrue
    | FALSE                                                                               # LiteralFalse
    | LITERAL_STRING                                                                      # LiteralString
    | LITERAL_INTEGER                                                                     # LiteralInteger
    | LITERAL_DECIMAL                                                                     # LiteralDecimal
    | ION_CLOSURE                                                                         # LiteralIon
    | DATE LITERAL_STRING                                                                 # LiteralDate
    | TIME ( PAREN_LEFT LITERAL_INTEGER PAREN_RIGHT )? (WITH TIME ZONE)? LITERAL_STRING   # LiteralTime
    ;

type
    : datatype=(
        NULL | BOOL | BOOLEAN | SMALLINT | INTEGER2 | INT2 | INTEGER | INT | INTEGER4 | INT4
        | INTEGER8 | INT8 | BIGINT | REAL | TIMESTAMP | CHAR | CHARACTER | MISSING
        | STRING | SYMBOL | BLOB | CLOB | DATE | STRUCT | TUPLE | LIST | SEXP | BAG | ANY
      )                                                                                                                # TypeAtomic
    | datatype=DOUBLE PRECISION                                                                                        # TypeAtomic
    | datatype=(CHARACTER|CHAR|FLOAT|VARCHAR) ( PAREN_LEFT arg0=LITERAL_INTEGER PAREN_RIGHT )?                         # TypeArgSingle
    | CHARACTER VARYING ( PAREN_LEFT arg0=LITERAL_INTEGER PAREN_RIGHT )?                                               # TypeVarChar
    | datatype=(DECIMAL|DEC|NUMERIC) ( PAREN_LEFT arg0=LITERAL_INTEGER ( COMMA arg1=LITERAL_INTEGER )? PAREN_RIGHT )?  # TypeArgDouble
    | TIME ( PAREN_LEFT precision=LITERAL_INTEGER PAREN_RIGHT )? (WITH TIME ZONE)?                                     # TypeTimeZone
    | symbolPrimitive                                                                                                  # TypeCustom
    ;
Lexer Grammar
lexer grammar PartiQLTokens;

options {
    caseInsensitive = true;
}

/**
 *
 * KEYWORDS
 *
 */

ABSOLUTE: 'ABSOLUTE';
ACTION: 'ACTION';
ADD: 'ADD';
ALL: 'ALL';
ALLOCATE: 'ALLOCATE';
ALTER: 'ALTER';
AND: 'AND';
ANY: 'ANY';
ARE: 'ARE';
AS: 'AS';
ASC: 'ASC';
ASSERTION: 'ASSERTION';
AT: 'AT';
AUTHORIZATION: 'AUTHORIZATION';
AVG: 'AVG';
BEGIN: 'BEGIN';
BETWEEN: 'BETWEEN';
BIT: 'BIT';
BIT_LENGTH: 'BIT_LENGTH';
BY: 'BY';
CASCADE: 'CASCADE';
CASCADED: 'CASCADED';
CASE: 'CASE';
CAST: 'CAST';
CATALOG: 'CATALOG';
CHAR: 'CHAR';
CHARACTER: 'CHARACTER';
CHARACTER_LENGTH: 'CHARACTER_LENGTH';
CHAR_LENGTH: 'CHAR_LENGTH';
CHECK: 'CHECK';
CLOSE: 'CLOSE';
COALESCE: 'COALESCE';
COLLATE: 'COLLATE';
COLLATION: 'COLLATION';
COLUMN: 'COLUMN';
COMMIT: 'COMMIT';
CONNECT: 'CONNECT';
CONNECTION: 'CONNECTION';
CONSTRAINT: 'CONSTRAINT';
CONSTRAINTS: 'CONSTRAINTS';
CONTINUE: 'CONTINUE';
CONVERT: 'CONVERT';
CORRESPONDING: 'CORRESPONDING';
COUNT: 'COUNT';
CREATE: 'CREATE';
CROSS: 'CROSS';
CURRENT: 'CURRENT';
CURRENT_DATE: 'CURRENT_DATE';
CURRENT_TIME: 'CURRENT_TIME';
CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
CURRENT_USER: 'CURRENT_USER';
CURSOR: 'CURSOR';
DATE: 'DATE';
DEALLOCATE: 'DEALLOCATE';
DEC: 'DEC';
DECIMAL: 'DECIMAL';
DECLARE: 'DECLARE';
DEFAULT: 'DEFAULT';
DEFERRABLE: 'DEFERRABLE';
DEFERRED: 'DEFERRED';
DELETE: 'DELETE';
DESC: 'DESC';
DESCRIBE: 'DESCRIBE';
DESCRIPTOR: 'DESCRIPTOR';
DIAGNOSTICS: 'DIAGNOSTICS';
DISCONNECT: 'DISCONNECT';
DISTINCT: 'DISTINCT';
DOMAIN: 'DOMAIN';
DOUBLE: 'DOUBLE';
DROP: 'DROP';
ELSE: 'ELSE';
END: 'END';
END_EXEC: 'END-EXEC';
ESCAPE: 'ESCAPE';
EXCEPT: 'EXCEPT';
EXCEPTION: 'EXCEPTION';
EXCLUDED: 'EXCLUDED';
EXEC: 'EXEC';
EXECUTE: 'EXECUTE';
EXISTS: 'EXISTS';
EXPLAIN: 'EXPLAIN';
EXTERNAL: 'EXTERNAL';
EXTRACT: 'EXTRACT';
DATE_ADD: 'DATE_ADD';
DATE_DIFF: 'DATE_DIFF';
FALSE: 'FALSE';
FETCH: 'FETCH';
FIRST: 'FIRST';
FLOAT: 'FLOAT';
FOR: 'FOR';
FOREIGN: 'FOREIGN';
FOUND: 'FOUND';
FROM: 'FROM';
FULL: 'FULL';
GET: 'GET';
GLOBAL: 'GLOBAL';
GO: 'GO';
GOTO: 'GOTO';
GRANT: 'GRANT';
GROUP: 'GROUP';
HAVING: 'HAVING';
IDENTITY: 'IDENTITY';
IMMEDIATE: 'IMMEDIATE';
IN: 'IN';
INDICATOR: 'INDICATOR';
INITIALLY: 'INITIALLY';
INNER: 'INNER';
INPUT: 'INPUT';
INSENSITIVE: 'INSENSITIVE';
INSERT: 'INSERT';
INT: 'INT';
INTEGER: 'INTEGER';
INTERSECT: 'INTERSECT';
INTERVAL: 'INTERVAL';
INTO: 'INTO';
IS: 'IS';
ISOLATION: 'ISOLATION';
JOIN: 'JOIN';
KEY: 'KEY';
LANGUAGE: 'LANGUAGE';
LAST: 'LAST';
LATERAL: 'LATERAL';
LEFT: 'LEFT';
LEVEL: 'LEVEL';
LIKE: 'LIKE';
LOCAL: 'LOCAL';
LOWER: 'LOWER';
MATCH: 'MATCH';
MAX: 'MAX';
MIN: 'MIN';
MODULE: 'MODULE';
NAMES: 'NAMES';
NATIONAL: 'NATIONAL';
NATURAL: 'NATURAL';
NCHAR: 'NCHAR';
NEXT: 'NEXT';
NO: 'NO';
NOT: 'NOT';
NULL: 'NULL';
NULLS: 'NULLS';
NULLIF: 'NULLIF';
NUMERIC: 'NUMERIC';
OCTET_LENGTH: 'OCTET_LENGTH';
OF: 'OF';
ON: 'ON';
ONLY: 'ONLY';
OPEN: 'OPEN';
OPTION: 'OPTION';
OR: 'OR';
ORDER: 'ORDER';
OUTER: 'OUTER';
OUTPUT: 'OUTPUT';
OVERLAPS: 'OVERLAPS';
OVERLAY: 'OVERLAY';
PAD: 'PAD';
PARTIAL: 'PARTIAL';
PLACING: 'PLACING';
POSITION: 'POSITION';
PRECISION: 'PRECISION';
PREPARE: 'PREPARE';
PRESERVE: 'PRESERVE';
PRIMARY: 'PRIMARY';
PRIOR: 'PRIOR';
PRIVILEGES: 'PRIVILEGES';
PROCEDURE: 'PROCEDURE';
PUBLIC: 'PUBLIC';
READ: 'READ';
REAL: 'REAL';
REFERENCES: 'REFERENCES';
RELATIVE: 'RELATIVE';
REPLACE: 'REPLACE';
RESTRICT: 'RESTRICT';
REVOKE: 'REVOKE';
RIGHT: 'RIGHT';
ROLLBACK: 'ROLLBACK';
ROWS: 'ROWS';
SCHEMA: 'SCHEMA';
SCROLL: 'SCROLL';
SECTION: 'SECTION';
SELECT: 'SELECT';
SESSION: 'SESSION';
SESSION_USER: 'SESSION_USER';
SET: 'SET';
SHORTEST: 'SHORTEST';
SIZE: 'SIZE';
SMALLINT: 'SMALLINT';
SOME: 'SOME';
SPACE: 'SPACE';
SQL: 'SQL';
SQLCODE: 'SQLCODE';
SQLERROR: 'SQLERROR';
SQLSTATE: 'SQLSTATE';
SUBSTRING: 'SUBSTRING';
SUM: 'SUM';
SYSTEM_USER: 'SYSTEM_USER';
TABLE: 'TABLE';
TEMPORARY: 'TEMPORARY';
THEN: 'THEN';
TIME: 'TIME';
TIMESTAMP: 'TIMESTAMP';
TO: 'TO';
TRANSACTION: 'TRANSACTION';
TRANSLATE: 'TRANSLATE';
TRANSLATION: 'TRANSLATION';
TRIM: 'TRIM';
TRUE: 'TRUE';
UNION: 'UNION';
UNIQUE: 'UNIQUE';
UNKNOWN: 'UNKNOWN';
UPDATE: 'UPDATE';
UPPER: 'UPPER';
UPSERT: 'UPSERT';
USAGE: 'USAGE';
USER: 'USER';
USING: 'USING';
VALUE: 'VALUE';
VALUES: 'VALUES';
VARCHAR: 'VARCHAR';
VARYING: 'VARYING';
VIEW: 'VIEW';
WHEN: 'WHEN';
WHENEVER: 'WHENEVER';
WHERE: 'WHERE';
WITH: 'WITH';
WORK: 'WORK';
WRITE: 'WRITE';
ZONE: 'ZONE';


/**
 * window related
 */
LAG: 'LAG';
LEAD: 'LEAD';
OVER: 'OVER';
PARTITION: 'PARTITION';


/**
 * OTHER
 */
CAN_CAST: 'CAN_CAST';
CAN_LOSSLESS_CAST: 'CAN_LOSSLESS_CAST';
MISSING: 'MISSING';
PIVOT: 'PIVOT';
UNPIVOT: 'UNPIVOT';
LIMIT: 'LIMIT';
OFFSET: 'OFFSET';
REMOVE: 'REMOVE';
INDEX: 'INDEX';
LET: 'LET';
CONFLICT: 'CONFLICT';
DO: 'DO';
RETURNING: 'RETURNING';
MODIFIED: 'MODIFIED';
NEW: 'NEW';
OLD: 'OLD';
NOTHING: 'NOTHING';

/**
 *
 * DATA TYPES
 *
 */

TUPLE: 'TUPLE';
INTEGER2: 'INTEGER2';
INT2: 'INT2';
INTEGER4: 'INTEGER4';
INT4: 'INT4';
INTEGER8: 'INTEGER8';
INT8: 'INT8';
BIGINT: 'BIGINT';
BOOL: 'BOOL';
BOOLEAN: 'BOOLEAN';
STRING: 'STRING';
SYMBOL: 'SYMBOL';
CLOB: 'CLOB';
BLOB: 'BLOB';
STRUCT: 'STRUCT';
LIST: 'LIST';
SEXP: 'SEXP';
BAG: 'BAG';

/**
 *
 * OPERATORS AND LITERALS
 *
 */

CARET: '^';
COMMA: ',';
PLUS: '+';
MINUS: '-';
SLASH_FORWARD: '/';
PERCENT: '%';
AT_SIGN: '@';
TILDE: '~';
ASTERISK: '*';
LT_EQ: '<=';
GT_EQ: '>=';
EQ: '=';
NEQ: '<>' | '!=';
CONCAT: '||';
ANGLE_LEFT: '<';
ANGLE_RIGHT: '>';
ANGLE_DOUBLE_LEFT: '<<';
ANGLE_DOUBLE_RIGHT: '>>';
BRACKET_LEFT: '[';
BRACKET_RIGHT: ']';
BRACE_LEFT: '{';
BRACE_RIGHT: '}';
PAREN_LEFT: '(';
PAREN_RIGHT: ')';
BACKTICK: '`' -> more, pushMode(ION);
COLON: ':';
COLON_SEMI: ';';
QUESTION_MARK: '?';
PERIOD: '.';

/**
 *
 * LITERALS & IDENTIFIERS
 *
 */

LITERAL_STRING
    : '\'' ( ('\'\'') | ~('\'') )* '\'';

LITERAL_INTEGER
    : DIGIT DIGIT*;

LITERAL_DECIMAL:
    DIGIT+ '.' DIGIT* ([e] [+-]? DIGIT+)?
    | '.' DIGIT DIGIT* ([e] [+-]? DIGIT+)?
    | DIGIT DIGIT* ([e] [+-]? DIGIT+)?
    ;

IDENTIFIER
    : [A-Z$_][A-Z0-9$_]*;

IDENTIFIER_QUOTED
    : '"' ( ('""') | ~('"') )* '"';

/**
 *
 * TO IGNORE
 *
 */

WS
    : WHITESPACE+ -> channel(HIDDEN);

COMMENT_SINGLE
    : '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN);

COMMENT_BLOCK
    : '/*' .*? '*/' -> channel(HIDDEN);

UNRECOGNIZED
    : . ;

/**
 *
 * FRAGMENTS
 *
 */

fragment DIGIT
    : [0-9];

fragment LETTER
    : [A-Z];

fragment LETTER_NOT
    : ~[A-Z];

fragment WHITESPACE
    : [ \r\n\t];

/**
 *
 * ION MODE
 * Note: This is largely copied from Ion's public ANTLR grammar, but this is used in a very specific manner in PartiQL's
 *  grammar. We use a Lexer Grammar because it allows multiple modes (languages), and whenever we find a straggling
 *  backtick, we use pushMode() to enter Ion's grammar/mode. From there, since we don't necessarily care about the semantics of
 *  Ion, we need to capture *everything* until we see a standalone backtick. So, the only tokens we need to watch out for
 *  are tokens that *may* include a backtick in its contents (such as comments, strings, and quoted symbols). We use the
 *  `-> more` annotation to accumulate all of the received Ion tokens into a single PartiQL token (ION_CLOSURE). That
 *  indicates when we've received a standalone backtick and can pop out from Ion's grammar.
 *
 */

mode ION;

ION_INLINE_COMMENT
    : '//' .*? (ION_NEWLINE | EOF) -> more;

ION_BLOCK_COMMENT
    : '/*' .*? '*/' -> more;

ION_BLOB
    : LOB_START (BASE_64_QUARTET | WS)* BASE_64_PAD? WS* LOB_END -> more;

SHORT_QUOTED_STRING
    : SHORT_QUOTE STRING_SHORT_TEXT SHORT_QUOTE -> more
    ;

LONG_QUOTED_STRING
    : LONG_QUOTE STRING_LONG_TEXT LONG_QUOTE -> more
    ;

QUOTED_SYMBOL
    : SYMBOL_QUOTE SYMBOL_TEXT SYMBOL_QUOTE -> more;

ION_CLOSURE: '`' -> popMode;

ION_ANY: . -> more;

fragment ION_NEWLINE
    : '\u000D\u000A'
    | '\u000D'
    | '\u000A'
    ;

fragment SHORT_QUOTE
    : '"';

fragment LONG_QUOTE
    : '\'\'\'';

fragment STRING_SHORT_TEXT
    : (TEXT_ESCAPE | STRING_SHORT_TEXT_ALLOWED)*;

fragment STRING_LONG_TEXT
    : (TEXT_ESCAPE | STRING_LONG_TEXT_ALLOWED)*?;

// non-control Unicode and not double quote or backslash
fragment STRING_SHORT_TEXT_ALLOWED
    : '\u0020'..'\u0021' // no C1 control characters and no U+0022 double quote
    | '\u0023'..'\u005B' // no U+005C backslash
    | '\u005D'..'\uFFFF' // FIXME should be up to U+10FFFF
    | WS_NOT_NL
    ;

// non-control Unicode (newlines are OK)
fragment STRING_LONG_TEXT_ALLOWED
    : '\u0020'..'\u005B' // no C1 control characters and no U+005C blackslash
    | '\u005D'..'\uFFFF' // FIXME should be up to U+10FFFF
    | WS
    ;

fragment TEXT_ESCAPE
    : COMMON_ESCAPE | HEX_ESCAPE | UNICODE_ESCAPE;

fragment LOB_START
    : '{{';

fragment LOB_END
    : '}}';

fragment BASE_64_PAD
    : BASE_64_PAD1
    | BASE_64_PAD2
    ;

fragment BASE_64_QUARTET
    : BASE_64_CHAR WS* BASE_64_CHAR WS* BASE_64_CHAR WS* BASE_64_CHAR;

fragment BASE_64_PAD1
    : BASE_64_CHAR WS* BASE_64_CHAR WS* BASE_64_CHAR WS* '=';

fragment BASE_64_PAD2
    : BASE_64_CHAR WS* BASE_64_CHAR WS* '=' WS* '=';

fragment BASE_64_CHAR
    : [0-9A-Z+/];

fragment SYMBOL_TEXT
    : (TEXT_ESCAPE | SYMBOL_TEXT_ALLOWED)*;

fragment SYMBOL_TEXT_ALLOWED
    : '\u0020'..'\u0026' // no C1 control characters and no U+0027 single quote
    | '\u0028'..'\u005B' // no U+005C backslash
    | '\u005D'..'\uFFFF' // should be up to U+10FFFF
    | WS_NOT_NL
    ;

fragment COMMON_ESCAPE
    : '\\' COMMON_ESCAPE_CODE;

fragment COMMON_ESCAPE_CODE
    : 'a'
    | 'b'
    | 't'
    | 'n'
    | 'f'
    | 'r'
    | 'v'
    | '?'
    | '0'
    | '\''
    | '"'
    | '/'
    | '\\'
    | ION_NEWLINE
    ;

fragment HEX_ESCAPE
    : '\\x' HEX_DIGIT HEX_DIGIT;

fragment UNICODE_ESCAPE
    : '\\u'     HEX_DIGIT_QUARTET
    | '\\U000'  HEX_DIGIT_QUARTET HEX_DIGIT
    | '\\U0010' HEX_DIGIT_QUARTET
    ;

fragment HEX_DIGIT_QUARTET
    : HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;

fragment HEX_DIGIT
    : [0-9A-F];

fragment WS_NOT_NL
    : '\u0009' // tab
    | '\u000B' // vertical tab
    | '\u000C' // form feed
    | '\u0020' // space
    ;

fragment SYMBOL_QUOTE : '\'';