lexical.go 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462
  1. // Copyright 2021-2022 EMQ Technologies Co., Ltd.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package xsql
  15. import (
  16. "bufio"
  17. "bytes"
  18. "github.com/lf-edge/ekuiper/pkg/ast"
  19. "io"
  20. "strconv"
  21. "strings"
  22. )
  23. type Scanner struct {
  24. r *bufio.Reader
  25. }
  26. func NewScanner(r io.Reader) *Scanner {
  27. return &Scanner{r: bufio.NewReader(r)}
  28. }
  29. func (s *Scanner) Scan() (tok ast.Token, lit string) {
  30. ch := s.read()
  31. if isWhiteSpace(ch) {
  32. //s.unread()
  33. return s.ScanWhiteSpace()
  34. } else if isLetter(ch) {
  35. s.unread()
  36. return s.ScanIdent()
  37. } else if isQuotation(ch) {
  38. s.unread()
  39. return s.ScanString()
  40. } else if isDigit(ch) {
  41. s.unread()
  42. return s.ScanNumber(false, false)
  43. } else if isBackquote(ch) {
  44. return s.ScanBackquoteIdent()
  45. }
  46. switch ch {
  47. case eof:
  48. return ast.EOF, ast.Tokens[ast.EOF]
  49. case '=':
  50. return ast.EQ, ast.Tokens[ast.EQ]
  51. case '!':
  52. _, _ = s.ScanWhiteSpace()
  53. if r := s.read(); r == '=' {
  54. return ast.NEQ, ast.Tokens[ast.NEQ]
  55. } else {
  56. s.unread()
  57. }
  58. return ast.EQ, ast.Tokens[ast.EQ]
  59. case '<':
  60. _, _ = s.ScanWhiteSpace()
  61. if r := s.read(); r == '=' {
  62. return ast.LTE, ast.Tokens[ast.LTE]
  63. } else {
  64. s.unread()
  65. }
  66. return ast.LT, ast.Tokens[ast.LT]
  67. case '>':
  68. _, _ = s.ScanWhiteSpace()
  69. if r := s.read(); r == '=' {
  70. return ast.GTE, ast.Tokens[ast.GTE]
  71. } else {
  72. s.unread()
  73. }
  74. return ast.GT, ast.Tokens[ast.GT]
  75. case '+':
  76. return ast.ADD, ast.Tokens[ast.ADD]
  77. case '-':
  78. _, _ = s.ScanWhiteSpace()
  79. if r := s.read(); r == '-' {
  80. s.skipUntilNewline()
  81. return ast.COMMENT, ""
  82. } else if r == '>' {
  83. return ast.ARROW, ast.Tokens[ast.ARROW]
  84. } else if r == '.' {
  85. _, _ = s.ScanWhiteSpace()
  86. if r1 := s.read(); isDigit(r1) {
  87. s.unread()
  88. return s.ScanNumber(true, true)
  89. } else {
  90. s.unread()
  91. }
  92. s.unread()
  93. } else {
  94. s.unread()
  95. }
  96. return ast.SUB, ast.Tokens[ast.SUB]
  97. case '/':
  98. _, _ = s.ScanWhiteSpace()
  99. if r := s.read(); r == '*' {
  100. if err := s.skipUntilEndComment(); err != nil {
  101. return ast.ILLEGAL, ""
  102. }
  103. return ast.COMMENT, ""
  104. } else {
  105. s.unread()
  106. }
  107. return ast.DIV, ast.Tokens[ast.DIV]
  108. case '.':
  109. if r := s.read(); isDigit(r) {
  110. s.unread()
  111. return s.ScanNumber(true, false)
  112. }
  113. s.unread()
  114. return ast.DOT, ast.Tokens[ast.DOT]
  115. case '%':
  116. return ast.MOD, ast.Tokens[ast.MOD]
  117. case '&':
  118. return ast.BITWISE_AND, ast.Tokens[ast.BITWISE_AND]
  119. case '|':
  120. return ast.BITWISE_OR, ast.Tokens[ast.BITWISE_OR]
  121. case '^':
  122. return ast.BITWISE_XOR, ast.Tokens[ast.BITWISE_XOR]
  123. case '*':
  124. return ast.ASTERISK, ast.Tokens[ast.ASTERISK]
  125. case ',':
  126. return ast.COMMA, ast.Tokens[ast.COMMA]
  127. case '(':
  128. return ast.LPAREN, ast.Tokens[ast.LPAREN]
  129. case ')':
  130. return ast.RPAREN, ast.Tokens[ast.RPAREN]
  131. case '[':
  132. return ast.LBRACKET, ast.Tokens[ast.LBRACKET]
  133. case ']':
  134. return ast.RBRACKET, ast.Tokens[ast.RBRACKET]
  135. case ':':
  136. return ast.COLON, ast.Tokens[ast.COLON]
  137. case '#':
  138. return ast.HASH, ast.Tokens[ast.HASH]
  139. case ';':
  140. return ast.SEMICOLON, ast.Tokens[ast.SEMICOLON]
  141. }
  142. return ast.ILLEGAL, ""
  143. }
  144. func (s *Scanner) ScanIdent() (tok ast.Token, lit string) {
  145. var buf bytes.Buffer
  146. buf.WriteRune(s.read())
  147. for {
  148. if ch := s.read(); ch == eof {
  149. break
  150. } else if !isLetter(ch) && !isDigit(ch) && ch != '_' {
  151. s.unread()
  152. break
  153. } else {
  154. buf.WriteRune(ch)
  155. }
  156. }
  157. switch lit = strings.ToUpper(buf.String()); lit {
  158. case "SELECT":
  159. return ast.SELECT, lit
  160. case "AS":
  161. return ast.AS, lit
  162. case "FROM":
  163. return ast.FROM, lit
  164. case "WHERE":
  165. return ast.WHERE, lit
  166. case "AND":
  167. return ast.AND, lit
  168. case "OR":
  169. return ast.OR, lit
  170. case "GROUP":
  171. return ast.GROUP, lit
  172. case "HAVING":
  173. return ast.HAVING, lit
  174. case "ORDER":
  175. return ast.ORDER, lit
  176. case "BY":
  177. return ast.BY, lit
  178. case "DESC":
  179. return ast.DESC, lit
  180. case "ASC":
  181. return ast.ASC, lit
  182. case "FILTER":
  183. return ast.FILTER, lit
  184. case "INNER":
  185. return ast.INNER, lit
  186. case "LEFT":
  187. return ast.LEFT, lit
  188. case "RIGHT":
  189. return ast.RIGHT, lit
  190. case "FULL":
  191. return ast.FULL, lit
  192. case "CROSS":
  193. return ast.CROSS, lit
  194. case "JOIN":
  195. return ast.JOIN, lit
  196. case "ON":
  197. return ast.ON, lit
  198. case "CASE":
  199. return ast.CASE, lit
  200. case "WHEN":
  201. return ast.WHEN, lit
  202. case "THEN":
  203. return ast.THEN, lit
  204. case "ELSE":
  205. return ast.ELSE, lit
  206. case "END":
  207. return ast.END, lit
  208. case "IN":
  209. return ast.IN, lit
  210. case "NOT":
  211. return ast.NOT, lit
  212. case "BETWEEN":
  213. return ast.BETWEEN, lit
  214. case "LIKE":
  215. return ast.LIKE, lit
  216. case "OVER":
  217. return ast.OVER, lit
  218. case "PARTITION":
  219. return ast.PARTITION, lit
  220. case "CREATE":
  221. return ast.CREATE, lit
  222. case "DROP":
  223. return ast.DROP, lit
  224. case "EXPLAIN":
  225. return ast.EXPLAIN, lit
  226. case "DESCRIBE":
  227. return ast.DESCRIBE, lit
  228. case "SHOW":
  229. return ast.SHOW, lit
  230. case "STREAM":
  231. return ast.STREAM, lit
  232. case "STREAMS":
  233. return ast.STREAMS, lit
  234. case "TABLE":
  235. return ast.TABLE, lit
  236. case "TABLES":
  237. return ast.TABLES, lit
  238. case "WITH":
  239. return ast.WITH, lit
  240. case "BIGINT":
  241. return ast.XBIGINT, lit
  242. case "FLOAT":
  243. return ast.XFLOAT, lit
  244. case "DATETIME":
  245. return ast.XDATETIME, lit
  246. case "STRING":
  247. return ast.XSTRING, lit
  248. case "BYTEA":
  249. return ast.XBYTEA, lit
  250. case "BOOLEAN":
  251. return ast.XBOOLEAN, lit
  252. case "ARRAY":
  253. return ast.XARRAY, lit
  254. case "STRUCT":
  255. return ast.XSTRUCT, lit
  256. case "DATASOURCE":
  257. return ast.DATASOURCE, lit
  258. case "KEY":
  259. return ast.KEY, lit
  260. case "FORMAT":
  261. return ast.FORMAT, lit
  262. case "CONF_KEY":
  263. return ast.CONF_KEY, lit
  264. case "TYPE":
  265. return ast.TYPE, lit
  266. case "TRUE":
  267. return ast.TRUE, lit
  268. case "FALSE":
  269. return ast.FALSE, lit
  270. case "STRICT_VALIDATION":
  271. return ast.STRICT_VALIDATION, lit
  272. case "TIMESTAMP":
  273. return ast.TIMESTAMP, lit
  274. case "TIMESTAMP_FORMAT":
  275. return ast.TIMESTAMP_FORMAT, lit
  276. case "RETAIN_SIZE":
  277. return ast.RETAIN_SIZE, lit
  278. case "SHARED":
  279. return ast.SHARED, lit
  280. case "SCHEMAID":
  281. return ast.SCHEMAID, lit
  282. case "KIND":
  283. return ast.KIND, lit
  284. case "DD":
  285. return ast.DD, lit
  286. case "HH":
  287. return ast.HH, lit
  288. case "MI":
  289. return ast.MI, lit
  290. case "SS":
  291. return ast.SS, lit
  292. case "MS":
  293. return ast.MS, lit
  294. }
  295. return ast.IDENT, buf.String()
  296. }
  297. func (s *Scanner) ScanString() (tok ast.Token, lit string) {
  298. var buf bytes.Buffer
  299. ch := s.read()
  300. buf.WriteRune(ch)
  301. escape := false
  302. for {
  303. ch = s.read()
  304. if ch == '"' && !escape {
  305. buf.WriteRune(ch)
  306. break
  307. } else if ch == eof {
  308. return ast.BADSTRING, buf.String()
  309. } else if ch == '\\' && !escape {
  310. escape = true
  311. buf.WriteRune(ch)
  312. } else {
  313. escape = false
  314. buf.WriteRune(ch)
  315. }
  316. }
  317. r, err := strconv.Unquote(buf.String())
  318. if err != nil {
  319. return ast.ILLEGAL, "invalid string: " + buf.String()
  320. }
  321. return ast.STRING, r
  322. }
  323. func (s *Scanner) ScanDigit() (tok ast.Token, lit string) {
  324. var buf bytes.Buffer
  325. ch := s.read()
  326. buf.WriteRune(ch)
  327. for {
  328. if ch := s.read(); isDigit(ch) {
  329. buf.WriteRune(ch)
  330. } else {
  331. s.unread()
  332. break
  333. }
  334. }
  335. return ast.INTEGER, buf.String()
  336. }
  337. func (s *Scanner) ScanNumber(startWithDot bool, isNeg bool) (tok ast.Token, lit string) {
  338. var buf bytes.Buffer
  339. if isNeg {
  340. buf.WriteRune('-')
  341. }
  342. if startWithDot {
  343. buf.WriteRune('.')
  344. }
  345. ch := s.read()
  346. buf.WriteRune(ch)
  347. isNum := false
  348. for {
  349. if ch := s.read(); isDigit(ch) {
  350. buf.WriteRune(ch)
  351. } else if ch == '.' {
  352. isNum = true
  353. buf.WriteRune(ch)
  354. } else {
  355. s.unread()
  356. break
  357. }
  358. }
  359. if isNum || startWithDot {
  360. return ast.NUMBER, buf.String()
  361. } else {
  362. return ast.INTEGER, buf.String()
  363. }
  364. }
  365. func (s *Scanner) ScanBackquoteIdent() (tok ast.Token, lit string) {
  366. var buf bytes.Buffer
  367. for {
  368. ch := s.read()
  369. if isBackquote(ch) || ch == eof {
  370. break
  371. } else {
  372. buf.WriteRune(ch)
  373. }
  374. }
  375. return ast.IDENT, buf.String()
  376. }
  377. func (s *Scanner) skipUntilNewline() {
  378. for {
  379. if ch := s.read(); ch == '\n' || ch == eof {
  380. return
  381. }
  382. }
  383. }
  384. func (s *Scanner) skipUntilEndComment() error {
  385. for {
  386. if ch1 := s.read(); ch1 == '*' {
  387. // We might be at the end.
  388. star:
  389. ch2 := s.read()
  390. if ch2 == '/' {
  391. return nil
  392. } else if ch2 == '*' {
  393. // We are back in the state machine since we see a star.
  394. goto star
  395. } else if ch2 == eof {
  396. return io.EOF
  397. }
  398. } else if ch1 == eof {
  399. return io.EOF
  400. }
  401. }
  402. }
  403. func (s *Scanner) ScanWhiteSpace() (tok ast.Token, lit string) {
  404. var buf bytes.Buffer
  405. for {
  406. if ch := s.read(); ch == eof {
  407. break
  408. } else if !isWhiteSpace(ch) {
  409. s.unread()
  410. break
  411. } else {
  412. buf.WriteRune(ch)
  413. }
  414. }
  415. return ast.WS, buf.String()
  416. }
  417. func (s *Scanner) read() rune {
  418. ch, _, err := s.r.ReadRune()
  419. if err != nil {
  420. return eof
  421. }
  422. return ch
  423. }
  424. func (s *Scanner) unread() {
  425. _ = s.r.UnreadRune()
  426. }
  427. var eof = rune(0)
  428. func isWhiteSpace(r rune) bool {
  429. return (r == ' ') || (r == '\t') || (r == '\r') || (r == '\n')
  430. }
  431. func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
  432. func isDigit(ch rune) bool { return ch >= '0' && ch <= '9' }
  433. func isQuotation(ch rune) bool { return ch == '"' }
  434. func isBackquote(ch rune) bool { return ch == '`' }