|
- package xsql
- import (
- "bufio"
- "bytes"
- "io"
- "strings"
- )
- type Token int
- const (
- // Special tokens
- ILLEGAL Token = iota
- EOF
- WS
- COMMENT
- AS
- // Literals
- IDENT // main
- INTEGER // 12345
- NUMBER //12345.67
- STRING // "abc"
- BADSTRING // "abc
- operatorBeg
- // ADD and the following are InfluxQL Operators
- ADD // +
- SUB // -
- MUL // *
- DIV // /
- MOD // %
- BITWISE_AND // &
- BITWISE_OR // |
- BITWISE_XOR // ^
- AND // AND
- OR // OR
- EQ // =
- NEQ // !=
- LT // <
- LTE // <=
- GT // >
- GTE // >=
- SUBSET //[
- ARROW //->
- operatorEnd
- // Misc characters
- ASTERISK // *
- COMMA // ,
- LPAREN // (
- RPAREN // )
- LBRACKET //[
- RBRACKET //]
- HASH // #
- DOT // .
- COLON //:
- SEMICOLON //;
- // Keywords
- SELECT
- FROM
- JOIN
- INNER
- LEFT
- RIGHT
- FULL
- CROSS
- ON
- WHERE
- GROUP
- ORDER
- HAVING
- BY
- ASC
- DESC
- TRUE
- FALSE
- CREATE
- DROP
- EXPLAIN
- DESCRIBE
- SHOW
- STREAM
- STREAMS
- WITH
- XBIGINT
- XFLOAT
- XSTRING
- XDATETIME
- XBOOLEAN
- XARRAY
- XSTRUCT
- DATASOURCE
- KEY
- FORMAT
- CONF_KEY
- TYPE
- STRICT_VALIDATION
- TIMESTAMP
- TIMESTAMP_FORMAT
- DD
- HH
- MI
- SS
- MS
- )
- var tokens = []string{
- ILLEGAL: "ILLEGAL",
- EOF: "EOF",
- AS: "AS",
- WS: "WS",
- IDENT: "IDENT",
- INTEGER: "INTEGER",
- NUMBER: "NUMBER",
- STRING: "STRING",
- ADD: "+",
- SUB: "-",
- MUL: "*",
- DIV: "/",
- MOD: "%",
- BITWISE_AND: "&",
- BITWISE_OR: "|",
- BITWISE_XOR: "^",
- EQ: "=",
- NEQ: "!=",
- LT: "<",
- LTE: "<=",
- GT: ">",
- GTE: ">=",
- ARROW: "->",
- ASTERISK: "*",
- COMMA: ",",
- LPAREN: "(",
- RPAREN: ")",
- LBRACKET: "[",
- RBRACKET: "]",
- HASH: "#",
- DOT: ".",
- SEMICOLON: ";",
- COLON: ":",
- SELECT: "SELECT",
- FROM: "FROM",
- JOIN: "JOIN",
- LEFT: "LEFT",
- INNER: "INNER",
- ON: "ON",
- WHERE: "WHERE",
- GROUP: "GROUP",
- ORDER: "ORDER",
- HAVING: "HAVING",
- BY: "BY",
- ASC: "ASC",
- DESC: "DESC",
- CREATE: "CREATE",
- DROP: "RROP",
- EXPLAIN: "EXPLAIN",
- DESCRIBE: "DESCRIBE",
- SHOW: "SHOW",
- STREAM: "STREAM",
- STREAMS: "STREAMS",
- WITH: "WITH",
- XBIGINT: "BIGINT",
- XFLOAT: "FLOAT",
- XSTRING: "STRING",
- XDATETIME: "DATETIME",
- XBOOLEAN: "BOOLEAN",
- XARRAY: "ARRAY",
- XSTRUCT: "STRUCT",
- DATASOURCE: "DATASOURCE",
- KEY: "KEY",
- FORMAT: "FORMAT",
- CONF_KEY: "CONF_KEY",
- TYPE: "TYPE",
- STRICT_VALIDATION: "STRICT_VALIDATION",
- TIMESTAMP: "TIMESTAMP",
- TIMESTAMP_FORMAT: "TIMESTAMP_FORMAT",
- AND: "AND",
- OR: "OR",
- TRUE: "TRUE",
- FALSE: "FALSE",
- DD: "DD",
- HH: "HH",
- MI: "MI",
- SS: "SS",
- MS: "MS",
- }
- func (tok Token) String() string {
- if tok >= 0 && tok < Token(len(tokens)) {
- return tokens[tok]
- }
- return ""
- }
- type Scanner struct {
- r *bufio.Reader
- }
- func NewScanner(r io.Reader) *Scanner {
- return &Scanner{r: bufio.NewReader(r)}
- }
- func (s *Scanner) Scan() (tok Token, lit string) {
- ch := s.read()
- if isWhiteSpace(ch) {
- //s.unread()
- return s.ScanWhiteSpace()
- } else if isLetter(ch) {
- s.unread()
- return s.ScanIdent()
- } else if isQuotation(ch) {
- s.unread()
- return s.ScanString()
- } else if isDigit(ch) {
- s.unread()
- return s.ScanNumber(false, false)
- }
- switch ch {
- case eof:
- return EOF, tokens[EOF]
- case '=':
- return EQ, tokens[EQ]
- case '!':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '=' {
- return NEQ, tokens[NEQ]
- } else {
- s.unread()
- }
- return EQ, tokens[EQ]
- case '<':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '=' {
- return LTE, tokens[LTE]
- } else {
- s.unread()
- }
- return LT, tokens[LT]
- case '>':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '=' {
- return GTE, tokens[GTE]
- } else {
- s.unread()
- }
- return GT, tokens[GT]
- case '+':
- return ADD, tokens[ADD]
- case '-':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '-' {
- s.skipUntilNewline()
- return COMMENT, ""
- } else if r == '>' {
- return ARROW, tokens[ARROW]
- } else if isDigit(r) {
- s.unread()
- return s.ScanNumber(false, true)
- } else if r == '.' {
- _, _ = s.ScanWhiteSpace()
- if r1 := s.read(); isDigit(r1) {
- s.unread()
- return s.ScanNumber(true, true)
- } else {
- s.unread()
- }
- s.unread()
- } else {
- s.unread()
- }
- return SUB, tokens[SUB]
- case '/':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '*' {
- if err := s.skipUntilEndComment(); err != nil {
- return ILLEGAL, ""
- }
- return COMMENT, ""
- } else {
- s.unread()
- }
- return DIV, tokens[DIV]
- case '.':
- if r := s.read(); isDigit(r) {
- s.unread()
- return s.ScanNumber(true, false)
- }
- s.unread()
- return DOT, tokens[DOT]
- case '%':
- return MOD, tokens[MOD]
- case '&':
- return BITWISE_AND, tokens[BITWISE_AND]
- case '|':
- return BITWISE_OR, tokens[BITWISE_OR]
- case '^':
- return BITWISE_XOR, tokens[BITWISE_XOR]
- case '*':
- return ASTERISK, tokens[ASTERISK]
- case ',':
- return COMMA, tokens[COMMA]
- case '(':
- return LPAREN, tokens[LPAREN]
- case ')':
- return RPAREN, tokens[RPAREN]
- case '[':
- return LBRACKET, tokens[LBRACKET]
- case ']':
- return RBRACKET, tokens[RBRACKET]
- case ':':
- return COLON, tokens[COLON]
- case '#':
- return HASH, tokens[HASH]
- case ';':
- return SEMICOLON, tokens[SEMICOLON]
- }
- return ILLEGAL, ""
- }
- func (s *Scanner) ScanIdent() (tok Token, lit string) {
- var buf bytes.Buffer
- buf.WriteRune(s.read())
- for {
- if ch := s.read(); ch == eof {
- break
- } else if !isLetter(ch) && !isDigit(ch) && ch != '_' {
- s.unread()
- break
- } else {
- buf.WriteRune(ch)
- }
- }
- switch lit = strings.ToUpper(buf.String()); lit {
- case "SELECT":
- return SELECT, lit
- case "AS":
- return AS, lit
- case "FROM":
- return FROM, lit
- case "WHERE":
- return WHERE, lit
- case "AND":
- return AND, lit
- case "OR":
- return OR, lit
- case "GROUP":
- return GROUP, lit
- case "HAVING":
- return HAVING, lit
- case "ORDER":
- return ORDER, lit
- case "BY":
- return BY, lit
- case "DESC":
- return DESC, lit
- case "ASC":
- return ASC, lit
- case "INNER":
- return INNER, lit
- case "LEFT":
- return LEFT, lit
- case "RIGHT":
- return RIGHT, lit
- case "FULL":
- return FULL, lit
- case "CROSS":
- return CROSS, lit
- case "JOIN":
- return JOIN, lit
- case "ON":
- return ON, lit
- case "CREATE":
- return CREATE, lit
- case "DROP":
- return DROP, lit
- case "EXPLAIN":
- return EXPLAIN, lit
- case "DESCRIBE":
- return DESCRIBE, lit
- case "SHOW":
- return SHOW, lit
- case "STREAM":
- return STREAM, lit
- case "STREAMS":
- return STREAMS, lit
- case "WITH":
- return WITH, lit
- case "BIGINT":
- return XBIGINT, lit
- case "FLOAT":
- return XFLOAT, lit
- case "DATETIME":
- return XDATETIME, lit
- case "STRING":
- return XSTRING, lit
- case "BOOLEAN":
- return XBOOLEAN, lit
- case "ARRAY":
- return XARRAY, lit
- case "STRUCT":
- return XSTRUCT, lit
- case "DATASOURCE":
- return DATASOURCE, lit
- case "KEY":
- return KEY, lit
- case "FORMAT":
- return FORMAT, lit
- case "CONF_KEY":
- return CONF_KEY, lit
- case "TYPE":
- return TYPE, lit
- case "TRUE":
- return TRUE, lit
- case "FALSE":
- return FALSE, lit
- case "STRICT_VALIDATION":
- return STRICT_VALIDATION, lit
- case "TIMESTAMP":
- return TIMESTAMP, lit
- case "TIMESTAMP_FORMAT":
- return TIMESTAMP_FORMAT, lit
- case "DD":
- return DD, lit
- case "HH":
- return HH, lit
- case "MI":
- return MI, lit
- case "SS":
- return SS, lit
- case "MS":
- return MS, lit
- }
- return IDENT, buf.String()
- }
- func (s *Scanner) ScanString() (tok Token, lit string) {
- var buf bytes.Buffer
- _ = s.read()
- for {
- ch := s.read()
- if ch == '"' {
- break
- } else if ch == eof {
- return BADSTRING, buf.String()
- } else {
- buf.WriteRune(ch)
- }
- }
- return STRING, buf.String()
- }
- func (s *Scanner) ScanDigit() (tok Token, lit string) {
- var buf bytes.Buffer
- ch := s.read()
- buf.WriteRune(ch)
- for {
- if ch := s.read(); isDigit(ch) {
- buf.WriteRune(ch)
- } else {
- s.unread()
- break
- }
- }
- return INTEGER, buf.String()
- }
- func (s *Scanner) ScanNumber(startWithDot bool, isNeg bool) (tok Token, lit string) {
- var buf bytes.Buffer
- if isNeg {
- buf.WriteRune('-')
- }
- if startWithDot {
- buf.WriteRune('.')
- }
- ch := s.read()
- buf.WriteRune(ch)
- isNum := false
- for {
- if ch := s.read(); isDigit(ch) {
- buf.WriteRune(ch)
- } else if ch == '.' {
- isNum = true
- buf.WriteRune(ch)
- } else {
- s.unread()
- break
- }
- }
- if isNum || startWithDot {
- return NUMBER, buf.String()
- } else {
- return INTEGER, buf.String()
- }
- }
- func (s *Scanner) skipUntilNewline() {
- for {
- if ch := s.read(); ch == '\n' || ch == eof {
- return
- }
- }
- }
- func (s *Scanner) skipUntilEndComment() error {
- for {
- if ch1 := s.read(); ch1 == '*' {
- // We might be at the end.
- star:
- ch2 := s.read()
- if ch2 == '/' {
- return nil
- } else if ch2 == '*' {
- // We are back in the state machine since we see a star.
- goto star
- } else if ch2 == eof {
- return io.EOF
- }
- } else if ch1 == eof {
- return io.EOF
- }
- }
- }
- func (s *Scanner) ScanWhiteSpace() (tok Token, lit string) {
- var buf bytes.Buffer
- for {
- if ch := s.read(); ch == eof {
- break
- } else if !isWhiteSpace(ch) {
- s.unread()
- break
- } else {
- buf.WriteRune(ch)
- }
- }
- return WS, buf.String()
- }
- func (s *Scanner) read() rune {
- ch, _, err := s.r.ReadRune()
- if err != nil {
- return eof
- }
- return ch
- }
- func (s *Scanner) unread() {
- _ = s.r.UnreadRune()
- }
- var eof = rune(0)
- func isWhiteSpace(r rune) bool {
- return (r == ' ') || (r == '\t') || (r == '\r') || (r == '\n')
- }
- func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
- func isDigit(ch rune) bool { return ch >= '0' && ch <= '9' }
- func isQuotation(ch rune) bool { return ch == '"' }
- func (tok Token) isOperator() bool { return (tok > operatorBeg && tok < operatorEnd) || tok == ASTERISK || tok == LBRACKET }
- func (tok Token) isTimeLiteral() bool { return tok >= DD && tok <= MS }
- func (tok Token) allowedSourceToken() bool {
- return tok == IDENT || tok == DIV || tok == HASH || tok == ADD
- }
- //Allowed special field name token
- func (tok Token) allowedSFNToken() bool { return tok == DOT }
- func (tok Token) Precedence() int {
- switch tok {
- case OR:
- return 1
- case AND:
- return 2
- case EQ, NEQ, LT, LTE, GT, GTE:
- return 3
- case ADD, SUB, BITWISE_OR, BITWISE_XOR:
- return 4
- case MUL, DIV, MOD, BITWISE_AND, SUBSET, ARROW:
- return 5
- }
- return 0
- }
- type DataType int
- const (
- UNKNOWN DataType = iota
- BIGINT
- FLOAT
- STRINGS
- DATETIME
- BOOLEAN
- ARRAY
- STRUCT
- )
- var dataTypes = []string{
- BIGINT : "bigint",
- FLOAT : "float",
- STRINGS : "string",
- DATETIME: "datetime",
- BOOLEAN : "boolean",
- ARRAY : "array",
- STRUCT : "struct",
- }
- func (d DataType) isSimpleType() bool {
- return d >= BIGINT && d <= BOOLEAN
- }
- func (d DataType) String() string {
- if d >= 0 && d < DataType(len(dataTypes)) {
- return dataTypes[d]
- }
- return ""
- }
- func getDataType(tok Token) DataType {
- switch tok {
- case XBIGINT:
- return BIGINT
- case XFLOAT:
- return FLOAT
- case XSTRING:
- return STRINGS
- case XDATETIME:
- return DATETIME
- case XBOOLEAN:
- return BOOLEAN
- case XARRAY:
- return ARRAY
- case XSTRUCT:
- return STRUCT
- }
- return UNKNOWN
- }
|