PartiQL ANTLR Grammar
Parser Grammar
grammar PartiQL;
options {
tokenVocab=PartiQLTokens;
caseInsensitive = true;
}
/**
*
* TOP LEVEL
*
*/
root
: (EXPLAIN (PAREN_LEFT explainOption (COMMA explainOption)* PAREN_RIGHT)? )? statement;
statement
: dql COLON_SEMI? EOF # QueryDql
| dml COLON_SEMI? EOF # QueryDml
| ddl COLON_SEMI? EOF # QueryDdl
| execCommand COLON_SEMI? EOF # QueryExec
;
/**
*
* COMMON STRUCTURES
*
*/
explainOption
: param=IDENTIFIER value=IDENTIFIER;
asIdent
: AS symbolPrimitive;
atIdent
: AT symbolPrimitive;
byIdent
: BY symbolPrimitive;
symbolPrimitive
: ident=( IDENTIFIER | IDENTIFIER_QUOTED )
;
/**
*
* DATA QUERY LANGUAGE (DQL)
*
*/
dql
: expr;
/**
*
* EXECUTE
*
*/
execCommand
: EXEC name=expr ( args+=expr ( COMMA args+=expr )* )?;
/**
*
* DATA DEFINITION LANGUAGE (DDL)
*
*/
ddl
: createCommand
| dropCommand
;
createCommand
: CREATE TABLE symbolPrimitive # CreateTable
| CREATE INDEX ON symbolPrimitive PAREN_LEFT pathSimple ( COMMA pathSimple )* PAREN_RIGHT # CreateIndex
;
dropCommand
: DROP TABLE target=symbolPrimitive # DropTable
| DROP INDEX target=symbolPrimitive ON on=symbolPrimitive # DropIndex
;
/**
*
* DATA MANIPULATION LANGUAGE (DML)
*
*/
dml
: updateClause dmlBaseCommand+ whereClause? returningClause? # DmlBaseWrapper
| fromClause whereClause? dmlBaseCommand+ returningClause? # DmlBaseWrapper
| deleteCommand # DmlDelete
| insertCommandReturning # DmlInsertReturning
| dmlBaseCommand # DmlBase
;
dmlBaseCommand
: insertCommand
| setCommand
| replaceCommand
| removeCommand
| upsertCommand
;
pathSimple
: symbolPrimitive pathSimpleSteps*;
pathSimpleSteps
: BRACKET_LEFT key=literal BRACKET_RIGHT # PathSimpleLiteral
| BRACKET_LEFT key=symbolPrimitive BRACKET_RIGHT # PathSimpleSymbol
| PERIOD key=symbolPrimitive # PathSimpleDotSymbol
;
// Based on https://github.com/partiql/partiql-lang/blob/main/RFCs/0011-partiql-insert.md
replaceCommand
: REPLACE INTO symbolPrimitive asIdent? value=expr;
// Based on https://github.com/partiql/partiql-lang/blob/main/RFCs/0011-partiql-insert.md
upsertCommand
: UPSERT INTO symbolPrimitive asIdent? value=expr;
removeCommand
: REMOVE pathSimple;
insertCommandReturning
: INSERT INTO pathSimple VALUE value=expr ( AT pos=expr )? onConflictClause? returningClause?;
insertCommand
: INSERT INTO pathSimple VALUE value=expr ( AT pos=expr )? onConflictClause? # InsertLegacy
// See the Grammar at https://github.com/partiql/partiql-lang/blob/main/RFCs/0011-partiql-insert.md#2-proposed-grammar-and-semantics
| INSERT INTO symbolPrimitive asIdent? value=expr onConflictClause? # Insert
;
onConflictClause
: ON CONFLICT WHERE expr DO NOTHING # OnConflictLegacy
| ON CONFLICT conflictTarget? conflictAction # OnConflict
;
/**
<conflict target> ::=
( <index target> [, <index target>]... )
| ( { <primary key> | <composite primary key> } )
| ON CONSTRAINT <constraint name>
*/
conflictTarget
: PAREN_LEFT symbolPrimitive (COMMA symbolPrimitive)* PAREN_RIGHT
| ON CONSTRAINT constraintName;
constraintName
: symbolPrimitive;
conflictAction
: DO NOTHING
| DO REPLACE doReplace
| DO UPDATE doUpdate;
/*
<do replace> ::= EXCLUDED
| SET <attr values> [, <attr values>]...
| VALUE <tuple value>
[ WHERE <condition> ]
*/
doReplace
: EXCLUDED;
// :TODO add the rest of the grammar
/*
<do update> ::= EXCLUDED
| SET <attr values> [, <attr values>]...
| VALUE <tuple value>
[ WHERE <condition> ]
*/
doUpdate
: EXCLUDED;
// :TODO add the rest of the grammar
updateClause
: UPDATE tableBaseReference;
setCommand
: SET setAssignment ( COMMA setAssignment )*;
setAssignment
: pathSimple EQ expr;
deleteCommand
: DELETE fromClauseSimple whereClause? returningClause?;
returningClause
: RETURNING returningColumn ( COMMA returningColumn )*;
returningColumn
: status=(MODIFIED|ALL) age=(OLD|NEW) ASTERISK
| status=(MODIFIED|ALL) age=(OLD|NEW) col=expr
;
fromClauseSimple
: FROM pathSimple asIdent? atIdent? byIdent? # FromClauseSimpleExplicit
| FROM pathSimple symbolPrimitive # FromClauseSimpleImplicit
;
whereClause
: WHERE arg=expr;
/**
*
* SELECT AND PROJECTION
*
*/
selectClause
: SELECT setQuantifierStrategy? ASTERISK # SelectAll
| SELECT setQuantifierStrategy? projectionItems # SelectItems
| SELECT setQuantifierStrategy? VALUE expr # SelectValue
| PIVOT pivot=expr AT at=expr # SelectPivot
;
projectionItems
: projectionItem ( COMMA projectionItem )* ;
projectionItem
: expr ( AS? symbolPrimitive )? ;
setQuantifierStrategy
: DISTINCT
| ALL
;
/**
* LET CLAUSE
*/
letClause
: LET letBinding ( COMMA letBinding )*;
letBinding
: expr AS symbolPrimitive;
/**
*
* ORDER BY CLAUSE
*
*/
orderByClause
: ORDER BY orderSortSpec ( COMMA orderSortSpec )*;
orderSortSpec
: expr dir=(ASC|DESC)? (NULLS nulls=(FIRST|LAST))?;
/**
*
* GROUP CLAUSE
*
*/
groupClause
: GROUP PARTIAL? BY groupKey ( COMMA groupKey )* groupAlias?;
groupAlias
: GROUP AS symbolPrimitive;
groupKey
: key=exprSelect (AS symbolPrimitive)?;
/**
*
* Window Function
*/
over
: OVER PAREN_LEFT windowPartitionList? windowSortSpecList? PAREN_RIGHT
;
windowPartitionList
: PARTITION BY expr (COMMA expr)*
;
windowSortSpecList
: ORDER BY orderSortSpec (COMMA orderSortSpec)*
;
/**
*
* SIMPLE CLAUSES
*
*/
havingClause
: HAVING arg=exprSelect;
fromClause
: FROM tableReference;
whereClauseSelect
: WHERE arg=exprSelect;
offsetByClause
: OFFSET arg=exprSelect;
limitClause
: LIMIT arg=exprSelect;
/**
*
* GRAPH PATTERN MATCHING LANGUAGE (GPML)
*
*/
gpmlPattern
: selector=matchSelector? matchPattern;
gpmlPatternList
: selector=matchSelector? matchPattern ( COMMA matchPattern )*;
matchPattern
: restrictor=patternRestrictor? variable=patternPathVariable? graphPart*;
graphPart
: node
| edge
| pattern
;
matchSelector
: mod=(ANY|ALL) SHORTEST # SelectorBasic
| ANY k=LITERAL_INTEGER? # SelectorAny
| SHORTEST k=LITERAL_INTEGER GROUP? # SelectorShortest
;
patternPathVariable
: symbolPrimitive EQ;
patternRestrictor // Should be TRAIL / ACYCLIC / SIMPLE
: restrictor=IDENTIFIER;
node
: PAREN_LEFT symbolPrimitive? patternPartLabel? whereClause? PAREN_RIGHT;
edge
: edgeWSpec quantifier=patternQuantifier? # EdgeWithSpec
| edgeAbbrev quantifier=patternQuantifier? # EdgeAbbreviated
;
pattern
: PAREN_LEFT restrictor=patternRestrictor? variable=patternPathVariable? graphPart+ where=whereClause? PAREN_RIGHT quantifier=patternQuantifier?
| BRACKET_LEFT restrictor=patternRestrictor? variable=patternPathVariable? graphPart+ where=whereClause? BRACKET_RIGHT quantifier=patternQuantifier?
;
patternQuantifier
: quant=( PLUS | ASTERISK )
| BRACE_LEFT lower=LITERAL_INTEGER COMMA upper=LITERAL_INTEGER? BRACE_RIGHT
;
edgeWSpec
: MINUS edgeSpec MINUS ANGLE_RIGHT # EdgeSpecRight
| TILDE edgeSpec TILDE # EdgeSpecUndirected
| ANGLE_LEFT MINUS edgeSpec MINUS # EdgeSpecLeft
| TILDE edgeSpec TILDE ANGLE_RIGHT # EdgeSpecUndirectedRight
| ANGLE_LEFT TILDE edgeSpec TILDE # EdgeSpecUndirectedLeft
| ANGLE_LEFT MINUS edgeSpec MINUS ANGLE_RIGHT # EdgeSpecBidirectional
| MINUS edgeSpec MINUS # EdgeSpecUndirectedBidirectional
;
edgeSpec
: BRACKET_LEFT symbolPrimitive? patternPartLabel? whereClause? BRACKET_RIGHT;
patternPartLabel
: COLON symbolPrimitive;
edgeAbbrev
: TILDE
| TILDE ANGLE_RIGHT
| ANGLE_LEFT TILDE
| ANGLE_LEFT? MINUS ANGLE_RIGHT?
;
/**
*
* TABLES & JOINS
*
*/
tableReference
: lhs=tableReference joinType? CROSS JOIN rhs=joinRhs # TableCrossJoin
| lhs=tableReference COMMA rhs=joinRhs # TableCrossJoin
| lhs=tableReference joinType? JOIN rhs=joinRhs joinSpec # TableQualifiedJoin
| tableNonJoin # TableRefBase
| PAREN_LEFT tableReference PAREN_RIGHT # TableWrapped
;
tableNonJoin
: tableBaseReference
| tableUnpivot
;
tableBaseReference
: source=exprSelect symbolPrimitive # TableBaseRefSymbol
| source=exprSelect asIdent? atIdent? byIdent? # TableBaseRefClauses
| source=exprGraphMatchOne asIdent? atIdent? byIdent? # TableBaseRefMatch
;
tableUnpivot
: UNPIVOT expr asIdent? atIdent? byIdent?;
joinRhs
: tableNonJoin # JoinRhsBase
| PAREN_LEFT tableReference PAREN_RIGHT # JoinRhsTableJoined
;
joinSpec
: ON expr;
joinType
: mod=INNER
| mod=LEFT OUTER?
| mod=RIGHT OUTER?
| mod=FULL OUTER?
| mod=OUTER
;
/**
*
* EXPRESSIONS & PRECEDENCE
*
* Precedence Table:
* 1. Primary Expressions: Functions, Literals, Paths, Identifiers, etc (ex: a, f(a), 1, a.b, "a")
* 2. Unary plus, minus (ex: -a, +a)
* 3. Multiplication, Division, Modulo (ex: a * b)
* 4. Addition, Subtraction (ex: a + b)
* 5. Other operators (ex: a || b)
* 6. Predicates (ex: a LIKE b, a < b, a IN b, a = b)
* 7. IS true/false. Not yet implemented in PartiQL, but defined in SQL-92. (ex: a IS TRUE)
* 8. NOT (ex: NOT a)
* 8. AND (ex: a AND b)
* 9. OR (ex: a OR b)
*
*/
expr
: exprBagOp
;
exprBagOp
: lhs=exprBagOp OUTER? EXCEPT (DISTINCT|ALL)? rhs=exprSelect # Except
| lhs=exprBagOp OUTER? UNION (DISTINCT|ALL)? rhs=exprSelect # Union
| lhs=exprBagOp OUTER? INTERSECT (DISTINCT|ALL)? rhs=exprSelect # Intersect
| exprSelect # QueryBase
;
exprSelect
: select=selectClause
from=fromClause
let=letClause?
where=whereClauseSelect?
group=groupClause?
having=havingClause?
order=orderByClause?
limit=limitClause?
offset=offsetByClause? # SfwQuery
| exprOr # SfwBase
;
exprOr
: lhs=exprOr OR rhs=exprAnd # Or
| parent=exprAnd # ExprOrBase
;
exprAnd
: lhs=exprAnd op=AND rhs=exprNot # And
| parent=exprNot # ExprAndBase
;
exprNot
: <assoc=right> op=NOT rhs=exprNot # Not
| parent=exprPredicate # ExprNotBase
;
exprPredicate
: lhs=exprPredicate op=(LT_EQ|GT_EQ|ANGLE_LEFT|ANGLE_RIGHT|NEQ|EQ) rhs=mathOp00 # PredicateComparison
| lhs=exprPredicate IS NOT? type # PredicateIs
| lhs=exprPredicate NOT? IN PAREN_LEFT expr PAREN_RIGHT # PredicateIn
| lhs=exprPredicate NOT? IN rhs=mathOp00 # PredicateIn
| lhs=exprPredicate NOT? LIKE rhs=mathOp00 ( ESCAPE escape=expr )? # PredicateLike
| lhs=exprPredicate NOT? BETWEEN lower=mathOp00 AND upper=mathOp00 # PredicateBetween
| parent=mathOp00 # PredicateBase
;
mathOp00
: lhs=mathOp00 op=CONCAT rhs=mathOp01
| parent=mathOp01
;
mathOp01
: lhs=mathOp01 op=(PLUS|MINUS) rhs=mathOp02
| parent=mathOp02
;
mathOp02
: lhs=mathOp02 op=(PERCENT|ASTERISK|SLASH_FORWARD) rhs=valueExpr
| parent=valueExpr
;
valueExpr
: sign=(PLUS|MINUS) rhs=valueExpr
| parent=exprPrimary
;
exprPrimary
: exprTerm # ExprPrimaryBase
| cast # ExprPrimaryBase
| sequenceConstructor # ExprPrimaryBase
| substring # ExprPrimaryBase
| canCast # ExprPrimaryBase
| canLosslessCast # ExprPrimaryBase
| extract # ExprPrimaryBase
| coalesce # ExprPrimaryBase
| dateFunction # ExprPrimaryBase
| aggregate # ExprPrimaryBase
| trimFunction # ExprPrimaryBase
| functionCall # ExprPrimaryBase
| nullIf # ExprPrimaryBase
| exprPrimary pathStep+ # ExprPrimaryPath
| exprGraphMatchMany # ExprPrimaryBase
| caseExpr # ExprPrimaryBase
| valueList # ExprPrimaryBase
| values # ExprPrimaryBase
| windowFunction # ExprPrimaryBase
;
/**
*
* PRIMARY EXPRESSIONS
*
*/
exprTerm
: PAREN_LEFT expr PAREN_RIGHT # ExprTermWrappedQuery
| parameter # ExprTermBase
| varRefExpr # ExprTermBase
| literal # ExprTermBase
| collection # ExprTermBase
| tuple # ExprTermBase
;
nullIf
: NULLIF PAREN_LEFT expr COMMA expr PAREN_RIGHT;
coalesce
: COALESCE PAREN_LEFT expr ( COMMA expr )* PAREN_RIGHT;
caseExpr
: CASE case=expr? (WHEN whens+=expr THEN thens+=expr)+ (ELSE else=expr)? END;
values
: VALUES valueRow ( COMMA valueRow )*;
valueRow
: PAREN_LEFT expr ( COMMA expr )* PAREN_RIGHT;
valueList
: PAREN_LEFT expr ( COMMA expr )+ PAREN_RIGHT;
sequenceConstructor
: datatype=(LIST|SEXP) PAREN_LEFT (expr ( COMMA expr )* )? PAREN_RIGHT;
substring
: SUBSTRING PAREN_LEFT expr ( COMMA expr ( COMMA expr )? )? PAREN_RIGHT
| SUBSTRING PAREN_LEFT expr ( FROM expr ( FOR expr )? )? PAREN_RIGHT
;
aggregate
: func=COUNT PAREN_LEFT ASTERISK PAREN_RIGHT # CountAll
| func=(COUNT|MAX|MIN|SUM|AVG) PAREN_LEFT setQuantifierStrategy? expr PAREN_RIGHT # AggregateBase
;
/**
*
* Supported Window Functions:
* 1. LAG(expr, [offset [, default]]) OVER([window_partition] window_ordering)
* 2. LEAD(expr, [offset [, default]]) OVER([window_partition] window_ordering)
*
*/
windowFunction
: func=(LAG|LEAD) PAREN_LEFT expr ( COMMA expr (COMMA expr)?)? PAREN_RIGHT over #LagLeadFunction
;
cast
: CAST PAREN_LEFT expr AS type PAREN_RIGHT;
canLosslessCast
: CAN_LOSSLESS_CAST PAREN_LEFT expr AS type PAREN_RIGHT;
canCast
: CAN_CAST PAREN_LEFT expr AS type PAREN_RIGHT;
extract
: EXTRACT PAREN_LEFT IDENTIFIER FROM rhs=expr PAREN_RIGHT;
trimFunction
: func=TRIM PAREN_LEFT ( mod=IDENTIFIER? sub=expr? FROM )? target=expr PAREN_RIGHT;
dateFunction
: func=(DATE_ADD|DATE_DIFF) PAREN_LEFT dt=IDENTIFIER COMMA expr COMMA expr PAREN_RIGHT;
functionCall
: name=( CHAR_LENGTH | CHARACTER_LENGTH | OCTET_LENGTH |
BIT_LENGTH | UPPER | LOWER | SIZE | EXISTS | COUNT )
PAREN_LEFT ( expr ( COMMA expr )* )? PAREN_RIGHT # FunctionCallReserved
| name=symbolPrimitive PAREN_LEFT ( expr ( COMMA expr )* )? PAREN_RIGHT # FunctionCallIdent
;
pathStep
: BRACKET_LEFT key=expr BRACKET_RIGHT # PathStepIndexExpr
| BRACKET_LEFT all=ASTERISK BRACKET_RIGHT # PathStepIndexAll
| PERIOD key=symbolPrimitive # PathStepDotExpr
| PERIOD all=ASTERISK # PathStepDotAll
;
exprGraphMatchMany
: PAREN_LEFT exprPrimary MATCH gpmlPatternList PAREN_RIGHT ;
exprGraphMatchOne
: exprPrimary MATCH gpmlPattern ;
parameter
: QUESTION_MARK;
varRefExpr
: qualifier=AT_SIGN? ident=(IDENTIFIER|IDENTIFIER_QUOTED);
/**
*
* LITERALS & TYPES
*
*/
collection
: array
| bag
;
array
: BRACKET_LEFT ( expr ( COMMA expr )* )? BRACKET_RIGHT;
bag
: ANGLE_DOUBLE_LEFT ( expr ( COMMA expr )* )? ANGLE_DOUBLE_RIGHT;
tuple
: BRACE_LEFT ( pair ( COMMA pair )* )? BRACE_RIGHT;
pair
: lhs=expr COLON rhs=expr;
literal
: NULL # LiteralNull
| MISSING # LiteralMissing
| TRUE # LiteralTrue
| FALSE # LiteralFalse
| LITERAL_STRING # LiteralString
| LITERAL_INTEGER # LiteralInteger
| LITERAL_DECIMAL # LiteralDecimal
| ION_CLOSURE # LiteralIon
| DATE LITERAL_STRING # LiteralDate
| TIME ( PAREN_LEFT LITERAL_INTEGER PAREN_RIGHT )? (WITH TIME ZONE)? LITERAL_STRING # LiteralTime
;
type
: datatype=(
NULL | BOOL | BOOLEAN | SMALLINT | INTEGER2 | INT2 | INTEGER | INT | INTEGER4 | INT4
| INTEGER8 | INT8 | BIGINT | REAL | TIMESTAMP | CHAR | CHARACTER | MISSING
| STRING | SYMBOL | BLOB | CLOB | DATE | STRUCT | TUPLE | LIST | SEXP | BAG | ANY
) # TypeAtomic
| datatype=DOUBLE PRECISION # TypeAtomic
| datatype=(CHARACTER|CHAR|FLOAT|VARCHAR) ( PAREN_LEFT arg0=LITERAL_INTEGER PAREN_RIGHT )? # TypeArgSingle
| CHARACTER VARYING ( PAREN_LEFT arg0=LITERAL_INTEGER PAREN_RIGHT )? # TypeVarChar
| datatype=(DECIMAL|DEC|NUMERIC) ( PAREN_LEFT arg0=LITERAL_INTEGER ( COMMA arg1=LITERAL_INTEGER )? PAREN_RIGHT )? # TypeArgDouble
| TIME ( PAREN_LEFT precision=LITERAL_INTEGER PAREN_RIGHT )? (WITH TIME ZONE)? # TypeTimeZone
| symbolPrimitive # TypeCustom
;
Lexer Grammar
lexer grammar PartiQLTokens;
options {
caseInsensitive = true;
}
/**
*
* KEYWORDS
*
*/
ABSOLUTE: 'ABSOLUTE';
ACTION: 'ACTION';
ADD: 'ADD';
ALL: 'ALL';
ALLOCATE: 'ALLOCATE';
ALTER: 'ALTER';
AND: 'AND';
ANY: 'ANY';
ARE: 'ARE';
AS: 'AS';
ASC: 'ASC';
ASSERTION: 'ASSERTION';
AT: 'AT';
AUTHORIZATION: 'AUTHORIZATION';
AVG: 'AVG';
BEGIN: 'BEGIN';
BETWEEN: 'BETWEEN';
BIT: 'BIT';
BIT_LENGTH: 'BIT_LENGTH';
BY: 'BY';
CASCADE: 'CASCADE';
CASCADED: 'CASCADED';
CASE: 'CASE';
CAST: 'CAST';
CATALOG: 'CATALOG';
CHAR: 'CHAR';
CHARACTER: 'CHARACTER';
CHARACTER_LENGTH: 'CHARACTER_LENGTH';
CHAR_LENGTH: 'CHAR_LENGTH';
CHECK: 'CHECK';
CLOSE: 'CLOSE';
COALESCE: 'COALESCE';
COLLATE: 'COLLATE';
COLLATION: 'COLLATION';
COLUMN: 'COLUMN';
COMMIT: 'COMMIT';
CONNECT: 'CONNECT';
CONNECTION: 'CONNECTION';
CONSTRAINT: 'CONSTRAINT';
CONSTRAINTS: 'CONSTRAINTS';
CONTINUE: 'CONTINUE';
CONVERT: 'CONVERT';
CORRESPONDING: 'CORRESPONDING';
COUNT: 'COUNT';
CREATE: 'CREATE';
CROSS: 'CROSS';
CURRENT: 'CURRENT';
CURRENT_DATE: 'CURRENT_DATE';
CURRENT_TIME: 'CURRENT_TIME';
CURRENT_TIMESTAMP: 'CURRENT_TIMESTAMP';
CURRENT_USER: 'CURRENT_USER';
CURSOR: 'CURSOR';
DATE: 'DATE';
DEALLOCATE: 'DEALLOCATE';
DEC: 'DEC';
DECIMAL: 'DECIMAL';
DECLARE: 'DECLARE';
DEFAULT: 'DEFAULT';
DEFERRABLE: 'DEFERRABLE';
DEFERRED: 'DEFERRED';
DELETE: 'DELETE';
DESC: 'DESC';
DESCRIBE: 'DESCRIBE';
DESCRIPTOR: 'DESCRIPTOR';
DIAGNOSTICS: 'DIAGNOSTICS';
DISCONNECT: 'DISCONNECT';
DISTINCT: 'DISTINCT';
DOMAIN: 'DOMAIN';
DOUBLE: 'DOUBLE';
DROP: 'DROP';
ELSE: 'ELSE';
END: 'END';
END_EXEC: 'END-EXEC';
ESCAPE: 'ESCAPE';
EXCEPT: 'EXCEPT';
EXCEPTION: 'EXCEPTION';
EXCLUDED: 'EXCLUDED';
EXEC: 'EXEC';
EXECUTE: 'EXECUTE';
EXISTS: 'EXISTS';
EXPLAIN: 'EXPLAIN';
EXTERNAL: 'EXTERNAL';
EXTRACT: 'EXTRACT';
DATE_ADD: 'DATE_ADD';
DATE_DIFF: 'DATE_DIFF';
FALSE: 'FALSE';
FETCH: 'FETCH';
FIRST: 'FIRST';
FLOAT: 'FLOAT';
FOR: 'FOR';
FOREIGN: 'FOREIGN';
FOUND: 'FOUND';
FROM: 'FROM';
FULL: 'FULL';
GET: 'GET';
GLOBAL: 'GLOBAL';
GO: 'GO';
GOTO: 'GOTO';
GRANT: 'GRANT';
GROUP: 'GROUP';
HAVING: 'HAVING';
IDENTITY: 'IDENTITY';
IMMEDIATE: 'IMMEDIATE';
IN: 'IN';
INDICATOR: 'INDICATOR';
INITIALLY: 'INITIALLY';
INNER: 'INNER';
INPUT: 'INPUT';
INSENSITIVE: 'INSENSITIVE';
INSERT: 'INSERT';
INT: 'INT';
INTEGER: 'INTEGER';
INTERSECT: 'INTERSECT';
INTERVAL: 'INTERVAL';
INTO: 'INTO';
IS: 'IS';
ISOLATION: 'ISOLATION';
JOIN: 'JOIN';
KEY: 'KEY';
LANGUAGE: 'LANGUAGE';
LAST: 'LAST';
LATERAL: 'LATERAL';
LEFT: 'LEFT';
LEVEL: 'LEVEL';
LIKE: 'LIKE';
LOCAL: 'LOCAL';
LOWER: 'LOWER';
MATCH: 'MATCH';
MAX: 'MAX';
MIN: 'MIN';
MODULE: 'MODULE';
NAMES: 'NAMES';
NATIONAL: 'NATIONAL';
NATURAL: 'NATURAL';
NCHAR: 'NCHAR';
NEXT: 'NEXT';
NO: 'NO';
NOT: 'NOT';
NULL: 'NULL';
NULLS: 'NULLS';
NULLIF: 'NULLIF';
NUMERIC: 'NUMERIC';
OCTET_LENGTH: 'OCTET_LENGTH';
OF: 'OF';
ON: 'ON';
ONLY: 'ONLY';
OPEN: 'OPEN';
OPTION: 'OPTION';
OR: 'OR';
ORDER: 'ORDER';
OUTER: 'OUTER';
OUTPUT: 'OUTPUT';
OVERLAPS: 'OVERLAPS';
OVERLAY: 'OVERLAY';
PAD: 'PAD';
PARTIAL: 'PARTIAL';
PLACING: 'PLACING';
POSITION: 'POSITION';
PRECISION: 'PRECISION';
PREPARE: 'PREPARE';
PRESERVE: 'PRESERVE';
PRIMARY: 'PRIMARY';
PRIOR: 'PRIOR';
PRIVILEGES: 'PRIVILEGES';
PROCEDURE: 'PROCEDURE';
PUBLIC: 'PUBLIC';
READ: 'READ';
REAL: 'REAL';
REFERENCES: 'REFERENCES';
RELATIVE: 'RELATIVE';
REPLACE: 'REPLACE';
RESTRICT: 'RESTRICT';
REVOKE: 'REVOKE';
RIGHT: 'RIGHT';
ROLLBACK: 'ROLLBACK';
ROWS: 'ROWS';
SCHEMA: 'SCHEMA';
SCROLL: 'SCROLL';
SECTION: 'SECTION';
SELECT: 'SELECT';
SESSION: 'SESSION';
SESSION_USER: 'SESSION_USER';
SET: 'SET';
SHORTEST: 'SHORTEST';
SIZE: 'SIZE';
SMALLINT: 'SMALLINT';
SOME: 'SOME';
SPACE: 'SPACE';
SQL: 'SQL';
SQLCODE: 'SQLCODE';
SQLERROR: 'SQLERROR';
SQLSTATE: 'SQLSTATE';
SUBSTRING: 'SUBSTRING';
SUM: 'SUM';
SYSTEM_USER: 'SYSTEM_USER';
TABLE: 'TABLE';
TEMPORARY: 'TEMPORARY';
THEN: 'THEN';
TIME: 'TIME';
TIMESTAMP: 'TIMESTAMP';
TO: 'TO';
TRANSACTION: 'TRANSACTION';
TRANSLATE: 'TRANSLATE';
TRANSLATION: 'TRANSLATION';
TRIM: 'TRIM';
TRUE: 'TRUE';
UNION: 'UNION';
UNIQUE: 'UNIQUE';
UNKNOWN: 'UNKNOWN';
UPDATE: 'UPDATE';
UPPER: 'UPPER';
UPSERT: 'UPSERT';
USAGE: 'USAGE';
USER: 'USER';
USING: 'USING';
VALUE: 'VALUE';
VALUES: 'VALUES';
VARCHAR: 'VARCHAR';
VARYING: 'VARYING';
VIEW: 'VIEW';
WHEN: 'WHEN';
WHENEVER: 'WHENEVER';
WHERE: 'WHERE';
WITH: 'WITH';
WORK: 'WORK';
WRITE: 'WRITE';
ZONE: 'ZONE';
/**
* window related
*/
LAG: 'LAG';
LEAD: 'LEAD';
OVER: 'OVER';
PARTITION: 'PARTITION';
/**
* OTHER
*/
CAN_CAST: 'CAN_CAST';
CAN_LOSSLESS_CAST: 'CAN_LOSSLESS_CAST';
MISSING: 'MISSING';
PIVOT: 'PIVOT';
UNPIVOT: 'UNPIVOT';
LIMIT: 'LIMIT';
OFFSET: 'OFFSET';
REMOVE: 'REMOVE';
INDEX: 'INDEX';
LET: 'LET';
CONFLICT: 'CONFLICT';
DO: 'DO';
RETURNING: 'RETURNING';
MODIFIED: 'MODIFIED';
NEW: 'NEW';
OLD: 'OLD';
NOTHING: 'NOTHING';
/**
*
* DATA TYPES
*
*/
TUPLE: 'TUPLE';
INTEGER2: 'INTEGER2';
INT2: 'INT2';
INTEGER4: 'INTEGER4';
INT4: 'INT4';
INTEGER8: 'INTEGER8';
INT8: 'INT8';
BIGINT: 'BIGINT';
BOOL: 'BOOL';
BOOLEAN: 'BOOLEAN';
STRING: 'STRING';
SYMBOL: 'SYMBOL';
CLOB: 'CLOB';
BLOB: 'BLOB';
STRUCT: 'STRUCT';
LIST: 'LIST';
SEXP: 'SEXP';
BAG: 'BAG';
/**
*
* OPERATORS AND LITERALS
*
*/
CARET: '^';
COMMA: ',';
PLUS: '+';
MINUS: '-';
SLASH_FORWARD: '/';
PERCENT: '%';
AT_SIGN: '@';
TILDE: '~';
ASTERISK: '*';
LT_EQ: '<=';
GT_EQ: '>=';
EQ: '=';
NEQ: '<>' | '!=';
CONCAT: '||';
ANGLE_LEFT: '<';
ANGLE_RIGHT: '>';
ANGLE_DOUBLE_LEFT: '<<';
ANGLE_DOUBLE_RIGHT: '>>';
BRACKET_LEFT: '[';
BRACKET_RIGHT: ']';
BRACE_LEFT: '{';
BRACE_RIGHT: '}';
PAREN_LEFT: '(';
PAREN_RIGHT: ')';
BACKTICK: '`' -> more, pushMode(ION);
COLON: ':';
COLON_SEMI: ';';
QUESTION_MARK: '?';
PERIOD: '.';
/**
*
* LITERALS & IDENTIFIERS
*
*/
LITERAL_STRING
: '\'' ( ('\'\'') | ~('\'') )* '\'';
LITERAL_INTEGER
: DIGIT DIGIT*;
LITERAL_DECIMAL:
DIGIT+ '.' DIGIT* ([e] [+-]? DIGIT+)?
| '.' DIGIT DIGIT* ([e] [+-]? DIGIT+)?
| DIGIT DIGIT* ([e] [+-]? DIGIT+)?
;
IDENTIFIER
: [A-Z$_][A-Z0-9$_]*;
IDENTIFIER_QUOTED
: '"' ( ('""') | ~('"') )* '"';
/**
*
* TO IGNORE
*
*/
WS
: WHITESPACE+ -> channel(HIDDEN);
COMMENT_SINGLE
: '--' ~[\r\n]* '\r'? '\n'? -> channel(HIDDEN);
COMMENT_BLOCK
: '/*' .*? '*/' -> channel(HIDDEN);
UNRECOGNIZED
: . ;
/**
*
* FRAGMENTS
*
*/
fragment DIGIT
: [0-9];
fragment LETTER
: [A-Z];
fragment LETTER_NOT
: ~[A-Z];
fragment WHITESPACE
: [ \r\n\t];
/**
*
* ION MODE
* Note: This is largely copied from Ion's public ANTLR grammar, but this is used in a very specific manner in PartiQL's
* grammar. We use a Lexer Grammar because it allows multiple modes (languages), and whenever we find a straggling
* backtick, we use pushMode() to enter Ion's grammar/mode. From there, since we don't necessarily care about the semantics of
* Ion, we need to capture *everything* until we see a standalone backtick. So, the only tokens we need to watch out for
* are tokens that *may* include a backtick in its contents (such as comments, strings, and quoted symbols). We use the
* `-> more` annotation to accumulate all of the received Ion tokens into a single PartiQL token (ION_CLOSURE). That
* indicates when we've received a standalone backtick and can pop out from Ion's grammar.
*
*/
mode ION;
ION_INLINE_COMMENT
: '//' .*? (ION_NEWLINE | EOF) -> more;
ION_BLOCK_COMMENT
: '/*' .*? '*/' -> more;
ION_BLOB
: LOB_START (BASE_64_QUARTET | WS)* BASE_64_PAD? WS* LOB_END -> more;
SHORT_QUOTED_STRING
: SHORT_QUOTE STRING_SHORT_TEXT SHORT_QUOTE -> more
;
LONG_QUOTED_STRING
: LONG_QUOTE STRING_LONG_TEXT LONG_QUOTE -> more
;
QUOTED_SYMBOL
: SYMBOL_QUOTE SYMBOL_TEXT SYMBOL_QUOTE -> more;
ION_CLOSURE: '`' -> popMode;
ION_ANY: . -> more;
fragment ION_NEWLINE
: '\u000D\u000A'
| '\u000D'
| '\u000A'
;
fragment SHORT_QUOTE
: '"';
fragment LONG_QUOTE
: '\'\'\'';
fragment STRING_SHORT_TEXT
: (TEXT_ESCAPE | STRING_SHORT_TEXT_ALLOWED)*;
fragment STRING_LONG_TEXT
: (TEXT_ESCAPE | STRING_LONG_TEXT_ALLOWED)*?;
// non-control Unicode and not double quote or backslash
fragment STRING_SHORT_TEXT_ALLOWED
: '\u0020'..'\u0021' // no C1 control characters and no U+0022 double quote
| '\u0023'..'\u005B' // no U+005C backslash
| '\u005D'..'\uFFFF' // FIXME should be up to U+10FFFF
| WS_NOT_NL
;
// non-control Unicode (newlines are OK)
fragment STRING_LONG_TEXT_ALLOWED
: '\u0020'..'\u005B' // no C1 control characters and no U+005C blackslash
| '\u005D'..'\uFFFF' // FIXME should be up to U+10FFFF
| WS
;
fragment TEXT_ESCAPE
: COMMON_ESCAPE | HEX_ESCAPE | UNICODE_ESCAPE;
fragment LOB_START
: '{{';
fragment LOB_END
: '}}';
fragment BASE_64_PAD
: BASE_64_PAD1
| BASE_64_PAD2
;
fragment BASE_64_QUARTET
: BASE_64_CHAR WS* BASE_64_CHAR WS* BASE_64_CHAR WS* BASE_64_CHAR;
fragment BASE_64_PAD1
: BASE_64_CHAR WS* BASE_64_CHAR WS* BASE_64_CHAR WS* '=';
fragment BASE_64_PAD2
: BASE_64_CHAR WS* BASE_64_CHAR WS* '=' WS* '=';
fragment BASE_64_CHAR
: [0-9A-Z+/];
fragment SYMBOL_TEXT
: (TEXT_ESCAPE | SYMBOL_TEXT_ALLOWED)*;
fragment SYMBOL_TEXT_ALLOWED
: '\u0020'..'\u0026' // no C1 control characters and no U+0027 single quote
| '\u0028'..'\u005B' // no U+005C backslash
| '\u005D'..'\uFFFF' // should be up to U+10FFFF
| WS_NOT_NL
;
fragment COMMON_ESCAPE
: '\\' COMMON_ESCAPE_CODE;
fragment COMMON_ESCAPE_CODE
: 'a'
| 'b'
| 't'
| 'n'
| 'f'
| 'r'
| 'v'
| '?'
| '0'
| '\''
| '"'
| '/'
| '\\'
| ION_NEWLINE
;
fragment HEX_ESCAPE
: '\\x' HEX_DIGIT HEX_DIGIT;
fragment UNICODE_ESCAPE
: '\\u' HEX_DIGIT_QUARTET
| '\\U000' HEX_DIGIT_QUARTET HEX_DIGIT
| '\\U0010' HEX_DIGIT_QUARTET
;
fragment HEX_DIGIT_QUARTET
: HEX_DIGIT HEX_DIGIT HEX_DIGIT HEX_DIGIT;
fragment HEX_DIGIT
: [0-9A-F];
fragment WS_NOT_NL
: '\u0009' // tab
| '\u000B' // vertical tab
| '\u000C' // form feed
| '\u0020' // space
;
fragment SYMBOL_QUOTE : '\'';