123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452 |
- // Copyright 2021-2022 EMQ Technologies Co., Ltd.
- //
- // Licensed under the Apache License, Version 2.0 (the "License");
- // you may not use this file except in compliance with the License.
- // You may obtain a copy of the License at
- //
- // http://www.apache.org/licenses/LICENSE-2.0
- //
- // Unless required by applicable law or agreed to in writing, software
- // distributed under the License is distributed on an "AS IS" BASIS,
- // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- // See the License for the specific language governing permissions and
- // limitations under the License.
- package xsql
- import (
- "bufio"
- "bytes"
- "github.com/lf-edge/ekuiper/pkg/ast"
- "io"
- "strconv"
- "strings"
- )
- type Scanner struct {
- r *bufio.Reader
- }
- func NewScanner(r io.Reader) *Scanner {
- return &Scanner{r: bufio.NewReader(r)}
- }
- func (s *Scanner) Scan() (tok ast.Token, lit string) {
- ch := s.read()
- if isWhiteSpace(ch) {
- //s.unread()
- return s.ScanWhiteSpace()
- } else if isLetter(ch) {
- s.unread()
- return s.ScanIdent()
- } else if isQuotation(ch) {
- s.unread()
- return s.ScanString()
- } else if isDigit(ch) {
- s.unread()
- return s.ScanNumber(false, false)
- } else if isBackquote(ch) {
- return s.ScanBackquoteIdent()
- }
- switch ch {
- case eof:
- return ast.EOF, ast.Tokens[ast.EOF]
- case '=':
- return ast.EQ, ast.Tokens[ast.EQ]
- case '!':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '=' {
- return ast.NEQ, ast.Tokens[ast.NEQ]
- } else {
- s.unread()
- }
- return ast.EQ, ast.Tokens[ast.EQ]
- case '<':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '=' {
- return ast.LTE, ast.Tokens[ast.LTE]
- } else {
- s.unread()
- }
- return ast.LT, ast.Tokens[ast.LT]
- case '>':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '=' {
- return ast.GTE, ast.Tokens[ast.GTE]
- } else {
- s.unread()
- }
- return ast.GT, ast.Tokens[ast.GT]
- case '+':
- return ast.ADD, ast.Tokens[ast.ADD]
- case '-':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '-' {
- s.skipUntilNewline()
- return ast.COMMENT, ""
- } else if r == '>' {
- return ast.ARROW, ast.Tokens[ast.ARROW]
- } else if r == '.' {
- _, _ = s.ScanWhiteSpace()
- if r1 := s.read(); isDigit(r1) {
- s.unread()
- return s.ScanNumber(true, true)
- } else {
- s.unread()
- }
- s.unread()
- } else {
- s.unread()
- }
- return ast.SUB, ast.Tokens[ast.SUB]
- case '/':
- _, _ = s.ScanWhiteSpace()
- if r := s.read(); r == '*' {
- if err := s.skipUntilEndComment(); err != nil {
- return ast.ILLEGAL, ""
- }
- return ast.COMMENT, ""
- } else {
- s.unread()
- }
- return ast.DIV, ast.Tokens[ast.DIV]
- case '.':
- if r := s.read(); isDigit(r) {
- s.unread()
- return s.ScanNumber(true, false)
- }
- s.unread()
- return ast.DOT, ast.Tokens[ast.DOT]
- case '%':
- return ast.MOD, ast.Tokens[ast.MOD]
- case '&':
- return ast.BITWISE_AND, ast.Tokens[ast.BITWISE_AND]
- case '|':
- return ast.BITWISE_OR, ast.Tokens[ast.BITWISE_OR]
- case '^':
- return ast.BITWISE_XOR, ast.Tokens[ast.BITWISE_XOR]
- case '*':
- return ast.ASTERISK, ast.Tokens[ast.ASTERISK]
- case ',':
- return ast.COMMA, ast.Tokens[ast.COMMA]
- case '(':
- return ast.LPAREN, ast.Tokens[ast.LPAREN]
- case ')':
- return ast.RPAREN, ast.Tokens[ast.RPAREN]
- case '[':
- return ast.LBRACKET, ast.Tokens[ast.LBRACKET]
- case ']':
- return ast.RBRACKET, ast.Tokens[ast.RBRACKET]
- case ':':
- return ast.COLON, ast.Tokens[ast.COLON]
- case '#':
- return ast.HASH, ast.Tokens[ast.HASH]
- case ';':
- return ast.SEMICOLON, ast.Tokens[ast.SEMICOLON]
- }
- return ast.ILLEGAL, ""
- }
- func (s *Scanner) ScanIdent() (tok ast.Token, lit string) {
- var buf bytes.Buffer
- buf.WriteRune(s.read())
- for {
- if ch := s.read(); ch == eof {
- break
- } else if !isLetter(ch) && !isDigit(ch) && ch != '_' {
- s.unread()
- break
- } else {
- buf.WriteRune(ch)
- }
- }
- switch lit = strings.ToUpper(buf.String()); lit {
- case "SELECT":
- return ast.SELECT, lit
- case "AS":
- return ast.AS, lit
- case "FROM":
- return ast.FROM, lit
- case "WHERE":
- return ast.WHERE, lit
- case "AND":
- return ast.AND, lit
- case "OR":
- return ast.OR, lit
- case "GROUP":
- return ast.GROUP, lit
- case "HAVING":
- return ast.HAVING, lit
- case "ORDER":
- return ast.ORDER, lit
- case "BY":
- return ast.BY, lit
- case "DESC":
- return ast.DESC, lit
- case "ASC":
- return ast.ASC, lit
- case "FILTER":
- return ast.FILTER, lit
- case "INNER":
- return ast.INNER, lit
- case "LEFT":
- return ast.LEFT, lit
- case "RIGHT":
- return ast.RIGHT, lit
- case "FULL":
- return ast.FULL, lit
- case "CROSS":
- return ast.CROSS, lit
- case "JOIN":
- return ast.JOIN, lit
- case "ON":
- return ast.ON, lit
- case "CASE":
- return ast.CASE, lit
- case "WHEN":
- return ast.WHEN, lit
- case "THEN":
- return ast.THEN, lit
- case "ELSE":
- return ast.ELSE, lit
- case "END":
- return ast.END, lit
- case "IN":
- return ast.IN, lit
- case "NOT":
- return ast.NOT, lit
- case "CREATE":
- return ast.CREATE, lit
- case "DROP":
- return ast.DROP, lit
- case "EXPLAIN":
- return ast.EXPLAIN, lit
- case "DESCRIBE":
- return ast.DESCRIBE, lit
- case "SHOW":
- return ast.SHOW, lit
- case "STREAM":
- return ast.STREAM, lit
- case "STREAMS":
- return ast.STREAMS, lit
- case "TABLE":
- return ast.TABLE, lit
- case "TABLES":
- return ast.TABLES, lit
- case "WITH":
- return ast.WITH, lit
- case "BIGINT":
- return ast.XBIGINT, lit
- case "FLOAT":
- return ast.XFLOAT, lit
- case "DATETIME":
- return ast.XDATETIME, lit
- case "STRING":
- return ast.XSTRING, lit
- case "BYTEA":
- return ast.XBYTEA, lit
- case "BOOLEAN":
- return ast.XBOOLEAN, lit
- case "ARRAY":
- return ast.XARRAY, lit
- case "STRUCT":
- return ast.XSTRUCT, lit
- case "DATASOURCE":
- return ast.DATASOURCE, lit
- case "KEY":
- return ast.KEY, lit
- case "FORMAT":
- return ast.FORMAT, lit
- case "CONF_KEY":
- return ast.CONF_KEY, lit
- case "TYPE":
- return ast.TYPE, lit
- case "TRUE":
- return ast.TRUE, lit
- case "FALSE":
- return ast.FALSE, lit
- case "STRICT_VALIDATION":
- return ast.STRICT_VALIDATION, lit
- case "TIMESTAMP":
- return ast.TIMESTAMP, lit
- case "TIMESTAMP_FORMAT":
- return ast.TIMESTAMP_FORMAT, lit
- case "RETAIN_SIZE":
- return ast.RETAIN_SIZE, lit
- case "SHARED":
- return ast.SHARED, lit
- case "SCHEMAID":
- return ast.SCHEMAID, lit
- case "DD":
- return ast.DD, lit
- case "HH":
- return ast.HH, lit
- case "MI":
- return ast.MI, lit
- case "SS":
- return ast.SS, lit
- case "MS":
- return ast.MS, lit
- }
- return ast.IDENT, buf.String()
- }
- func (s *Scanner) ScanString() (tok ast.Token, lit string) {
- var buf bytes.Buffer
- ch := s.read()
- buf.WriteRune(ch)
- escape := false
- for {
- ch = s.read()
- if ch == '"' && !escape {
- buf.WriteRune(ch)
- break
- } else if ch == eof {
- return ast.BADSTRING, buf.String()
- } else if ch == '\\' && !escape {
- escape = true
- buf.WriteRune(ch)
- } else {
- escape = false
- buf.WriteRune(ch)
- }
- }
- r, err := strconv.Unquote(buf.String())
- if err != nil {
- return ast.ILLEGAL, "invalid string: " + buf.String()
- }
- return ast.STRING, r
- }
- func (s *Scanner) ScanDigit() (tok ast.Token, lit string) {
- var buf bytes.Buffer
- ch := s.read()
- buf.WriteRune(ch)
- for {
- if ch := s.read(); isDigit(ch) {
- buf.WriteRune(ch)
- } else {
- s.unread()
- break
- }
- }
- return ast.INTEGER, buf.String()
- }
- func (s *Scanner) ScanNumber(startWithDot bool, isNeg bool) (tok ast.Token, lit string) {
- var buf bytes.Buffer
- if isNeg {
- buf.WriteRune('-')
- }
- if startWithDot {
- buf.WriteRune('.')
- }
- ch := s.read()
- buf.WriteRune(ch)
- isNum := false
- for {
- if ch := s.read(); isDigit(ch) {
- buf.WriteRune(ch)
- } else if ch == '.' {
- isNum = true
- buf.WriteRune(ch)
- } else {
- s.unread()
- break
- }
- }
- if isNum || startWithDot {
- return ast.NUMBER, buf.String()
- } else {
- return ast.INTEGER, buf.String()
- }
- }
- func (s *Scanner) ScanBackquoteIdent() (tok ast.Token, lit string) {
- var buf bytes.Buffer
- for {
- ch := s.read()
- if isBackquote(ch) || ch == eof {
- break
- } else {
- buf.WriteRune(ch)
- }
- }
- return ast.IDENT, buf.String()
- }
- func (s *Scanner) skipUntilNewline() {
- for {
- if ch := s.read(); ch == '\n' || ch == eof {
- return
- }
- }
- }
- func (s *Scanner) skipUntilEndComment() error {
- for {
- if ch1 := s.read(); ch1 == '*' {
- // We might be at the end.
- star:
- ch2 := s.read()
- if ch2 == '/' {
- return nil
- } else if ch2 == '*' {
- // We are back in the state machine since we see a star.
- goto star
- } else if ch2 == eof {
- return io.EOF
- }
- } else if ch1 == eof {
- return io.EOF
- }
- }
- }
- func (s *Scanner) ScanWhiteSpace() (tok ast.Token, lit string) {
- var buf bytes.Buffer
- for {
- if ch := s.read(); ch == eof {
- break
- } else if !isWhiteSpace(ch) {
- s.unread()
- break
- } else {
- buf.WriteRune(ch)
- }
- }
- return ast.WS, buf.String()
- }
- func (s *Scanner) read() rune {
- ch, _, err := s.r.ReadRune()
- if err != nil {
- return eof
- }
- return ch
- }
- func (s *Scanner) unread() {
- _ = s.r.UnreadRune()
- }
- var eof = rune(0)
- func isWhiteSpace(r rune) bool {
- return (r == ' ') || (r == '\t') || (r == '\r') || (r == '\n')
- }
- func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
- func isDigit(ch rune) bool { return ch >= '0' && ch <= '9' }
- func isQuotation(ch rune) bool { return ch == '"' }
- func isBackquote(ch rune) bool { return ch == '`' }
|