lexical.go 8.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431
  1. // Copyright 2021-2023 EMQ Technologies Co., Ltd.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package xsql
  15. import (
  16. "bufio"
  17. "bytes"
  18. "io"
  19. "strconv"
  20. "strings"
  21. "github.com/lf-edge/ekuiper/pkg/ast"
  22. )
  23. type Scanner struct {
  24. r *bufio.Reader
  25. buf *bytes.Buffer
  26. }
  27. func NewScanner(r io.Reader) *Scanner {
  28. return &Scanner{r: bufio.NewReader(r), buf: &bytes.Buffer{}}
  29. }
  30. func (s *Scanner) Scan() (tok ast.Token, lit string) {
  31. ch := s.read()
  32. if isWhiteSpace(ch) {
  33. // s.unread()
  34. return s.ScanWhiteSpace()
  35. } else if isLetter(ch) {
  36. s.unread()
  37. return s.ScanIdent()
  38. } else if isQuotation(ch) {
  39. s.unread()
  40. return s.ScanString(ch == '\'')
  41. } else if isDigit(ch) {
  42. s.unread()
  43. return s.ScanNumber(false, false)
  44. } else if isBackquote(ch) {
  45. return s.ScanBackquoteIdent()
  46. }
  47. switch ch {
  48. case eof:
  49. return ast.EOF, ast.Tokens[ast.EOF]
  50. case '=':
  51. return ast.EQ, ast.Tokens[ast.EQ]
  52. case '!':
  53. _, _ = s.ScanWhiteSpace()
  54. if r := s.read(); r == '=' {
  55. return ast.NEQ, ast.Tokens[ast.NEQ]
  56. } else {
  57. s.unread()
  58. }
  59. return ast.EQ, ast.Tokens[ast.EQ]
  60. case '<':
  61. _, _ = s.ScanWhiteSpace()
  62. if r := s.read(); r == '=' {
  63. return ast.LTE, ast.Tokens[ast.LTE]
  64. } else {
  65. s.unread()
  66. }
  67. return ast.LT, ast.Tokens[ast.LT]
  68. case '>':
  69. _, _ = s.ScanWhiteSpace()
  70. if r := s.read(); r == '=' {
  71. return ast.GTE, ast.Tokens[ast.GTE]
  72. } else {
  73. s.unread()
  74. }
  75. return ast.GT, ast.Tokens[ast.GT]
  76. case '+':
  77. return ast.ADD, ast.Tokens[ast.ADD]
  78. case '-':
  79. _, _ = s.ScanWhiteSpace()
  80. if r := s.read(); r == '-' {
  81. s.skipUntilNewline()
  82. return ast.COMMENT, ""
  83. } else if r == '>' {
  84. return ast.ARROW, ast.Tokens[ast.ARROW]
  85. } else if r == '.' {
  86. _, _ = s.ScanWhiteSpace()
  87. if r1 := s.read(); isDigit(r1) {
  88. s.unread()
  89. return s.ScanNumber(true, true)
  90. } else {
  91. s.unread()
  92. }
  93. s.unread()
  94. } else {
  95. s.unread()
  96. }
  97. return ast.SUB, ast.Tokens[ast.SUB]
  98. case '/':
  99. _, _ = s.ScanWhiteSpace()
  100. if r := s.read(); r == '*' {
  101. if err := s.skipUntilEndComment(); err != nil {
  102. return ast.ILLEGAL, ""
  103. }
  104. return ast.COMMENT, ""
  105. } else {
  106. s.unread()
  107. }
  108. return ast.DIV, ast.Tokens[ast.DIV]
  109. case '.':
  110. if r := s.read(); isDigit(r) {
  111. s.unread()
  112. return s.ScanNumber(true, false)
  113. }
  114. s.unread()
  115. return ast.DOT, ast.Tokens[ast.DOT]
  116. case '%':
  117. return ast.MOD, ast.Tokens[ast.MOD]
  118. case '&':
  119. return ast.BITWISE_AND, ast.Tokens[ast.BITWISE_AND]
  120. case '|':
  121. return ast.BITWISE_OR, ast.Tokens[ast.BITWISE_OR]
  122. case '^':
  123. return ast.BITWISE_XOR, ast.Tokens[ast.BITWISE_XOR]
  124. case '*':
  125. return ast.ASTERISK, ast.Tokens[ast.ASTERISK]
  126. case ',':
  127. return ast.COMMA, ast.Tokens[ast.COMMA]
  128. case '(':
  129. return ast.LPAREN, ast.Tokens[ast.LPAREN]
  130. case ')':
  131. return ast.RPAREN, ast.Tokens[ast.RPAREN]
  132. case '[':
  133. return ast.LBRACKET, ast.Tokens[ast.LBRACKET]
  134. case ']':
  135. return ast.RBRACKET, ast.Tokens[ast.RBRACKET]
  136. case ':':
  137. return ast.COLON, ast.Tokens[ast.COLON]
  138. case '#':
  139. return ast.HASH, ast.Tokens[ast.HASH]
  140. case ';':
  141. return ast.SEMICOLON, ast.Tokens[ast.SEMICOLON]
  142. }
  143. return ast.ILLEGAL, ""
  144. }
  145. func (s *Scanner) ScanIdent() (tok ast.Token, lit string) {
  146. s.buf.Reset()
  147. s.buf.WriteRune(s.read())
  148. for {
  149. if ch := s.read(); ch == eof {
  150. break
  151. } else if !isLetter(ch) && !isDigit(ch) && ch != '_' {
  152. s.unread()
  153. break
  154. } else {
  155. s.buf.WriteRune(ch)
  156. }
  157. }
  158. word := s.buf.String()
  159. switch lit = strings.ToUpper(word); lit {
  160. case "SELECT":
  161. return ast.SELECT, lit
  162. case "AS":
  163. return ast.AS, lit
  164. case "FROM":
  165. return ast.FROM, lit
  166. case "WHERE":
  167. return ast.WHERE, lit
  168. case "AND":
  169. return ast.AND, lit
  170. case "OR":
  171. return ast.OR, lit
  172. case "GROUP":
  173. return ast.GROUP, lit
  174. case "HAVING":
  175. return ast.HAVING, lit
  176. case "ORDER":
  177. return ast.ORDER, lit
  178. case "BY":
  179. return ast.BY, lit
  180. case "DESC":
  181. return ast.DESC, lit
  182. case "ASC":
  183. return ast.ASC, lit
  184. case "FILTER":
  185. return ast.FILTER, lit
  186. case "INNER":
  187. return ast.INNER, lit
  188. case "LEFT":
  189. return ast.LEFT, lit
  190. case "RIGHT":
  191. return ast.RIGHT, lit
  192. case "FULL":
  193. return ast.FULL, lit
  194. case "CROSS":
  195. return ast.CROSS, lit
  196. case "JOIN":
  197. return ast.JOIN, lit
  198. case "ON":
  199. return ast.ON, lit
  200. case "CASE":
  201. return ast.CASE, lit
  202. case "WHEN":
  203. return ast.WHEN, lit
  204. case "THEN":
  205. return ast.THEN, lit
  206. case "ELSE":
  207. return ast.ELSE, lit
  208. case "END":
  209. return ast.END, lit
  210. case "IN":
  211. return ast.IN, lit
  212. case "NOT":
  213. return ast.NOT, lit
  214. case "BETWEEN":
  215. return ast.BETWEEN, lit
  216. case "LIKE":
  217. return ast.LIKE, lit
  218. case "OVER":
  219. return ast.OVER, lit
  220. case "PARTITION":
  221. return ast.PARTITION, lit
  222. case "REPLACE":
  223. return ast.REPLACE, lit
  224. case "EXCEPT":
  225. return ast.EXCEPT, lit
  226. case "TRUE":
  227. return ast.TRUE, lit
  228. case "FALSE":
  229. return ast.FALSE, lit
  230. case "DD":
  231. return ast.DD, lit
  232. case "HH":
  233. return ast.HH, lit
  234. case "MI":
  235. return ast.MI, lit
  236. case "SS":
  237. return ast.SS, lit
  238. case "MS":
  239. return ast.MS, lit
  240. case "LIMIT":
  241. return ast.LIMIT, lit
  242. }
  243. return ast.IDENT, word
  244. }
  245. func (s *Scanner) ScanString(isSingle bool) (tok ast.Token, lit string) {
  246. s.buf.Reset()
  247. ch := s.read()
  248. if ch == '\'' && isSingle {
  249. s.buf.WriteRune('"')
  250. } else {
  251. s.buf.WriteRune(ch)
  252. }
  253. escape := false
  254. for {
  255. ch = s.read()
  256. if ch == '"' && !escape {
  257. if !isSingle {
  258. s.buf.WriteRune(ch)
  259. break
  260. } else {
  261. escape = false
  262. s.buf.WriteRune('\\')
  263. s.buf.WriteRune(ch)
  264. }
  265. } else if ch == '\'' && !escape && isSingle {
  266. s.buf.WriteRune('"')
  267. break
  268. } else if ch == eof {
  269. return ast.BADSTRING, s.buf.String()
  270. } else if ch == '\\' && !escape {
  271. escape = true
  272. nextCh := s.read()
  273. if nextCh == '\'' && isSingle {
  274. s.buf.WriteRune(nextCh)
  275. } else {
  276. s.buf.WriteRune(ch)
  277. s.unread()
  278. }
  279. } else {
  280. escape = false
  281. s.buf.WriteRune(ch)
  282. }
  283. }
  284. word := s.buf.String()
  285. r, err := strconv.Unquote(word)
  286. if err != nil {
  287. return ast.ILLEGAL, "invalid string: " + word
  288. }
  289. if isSingle {
  290. return ast.SINGLEQUOTE, r
  291. }
  292. return ast.STRING, r
  293. }
  294. func (s *Scanner) ScanDigit() (tok ast.Token, lit string) {
  295. s.buf.Reset()
  296. ch := s.read()
  297. s.buf.WriteRune(ch)
  298. for {
  299. if ch := s.read(); isDigit(ch) {
  300. s.buf.WriteRune(ch)
  301. } else {
  302. s.unread()
  303. break
  304. }
  305. }
  306. return ast.INTEGER, s.buf.String()
  307. }
  308. func (s *Scanner) ScanNumber(startWithDot bool, isNeg bool) (tok ast.Token, lit string) {
  309. s.buf.Reset()
  310. if isNeg {
  311. s.buf.WriteRune('-')
  312. }
  313. if startWithDot {
  314. s.buf.WriteRune('.')
  315. }
  316. ch := s.read()
  317. s.buf.WriteRune(ch)
  318. isNum := false
  319. for {
  320. if ch := s.read(); isDigit(ch) {
  321. s.buf.WriteRune(ch)
  322. } else if ch == '.' {
  323. isNum = true
  324. s.buf.WriteRune(ch)
  325. } else {
  326. s.unread()
  327. break
  328. }
  329. }
  330. if isNum || startWithDot {
  331. return ast.NUMBER, s.buf.String()
  332. } else {
  333. return ast.INTEGER, s.buf.String()
  334. }
  335. }
  336. func (s *Scanner) ScanBackquoteIdent() (tok ast.Token, lit string) {
  337. s.buf.Reset()
  338. for {
  339. ch := s.read()
  340. if isBackquote(ch) || ch == eof {
  341. break
  342. }
  343. s.buf.WriteRune(ch)
  344. }
  345. return ast.IDENT, s.buf.String()
  346. }
  347. func (s *Scanner) skipUntilNewline() {
  348. for {
  349. if ch := s.read(); ch == '\n' || ch == eof {
  350. return
  351. }
  352. }
  353. }
  354. func (s *Scanner) skipUntilEndComment() error {
  355. for {
  356. if ch1 := s.read(); ch1 == '*' {
  357. // We might be at the end.
  358. star:
  359. ch2 := s.read()
  360. if ch2 == '/' {
  361. return nil
  362. } else if ch2 == '*' {
  363. // We are back in the state machine since we see a star.
  364. goto star
  365. } else if ch2 == eof {
  366. return io.EOF
  367. }
  368. } else if ch1 == eof {
  369. return io.EOF
  370. }
  371. }
  372. }
  373. func (s *Scanner) ScanWhiteSpace() (tok ast.Token, lit string) {
  374. s.buf.Reset()
  375. for {
  376. if ch := s.read(); ch == eof {
  377. break
  378. } else if !isWhiteSpace(ch) {
  379. s.unread()
  380. break
  381. } else {
  382. s.buf.WriteRune(ch)
  383. }
  384. }
  385. return ast.WS, s.buf.String()
  386. }
  387. func (s *Scanner) read() rune {
  388. ch, _, err := s.r.ReadRune()
  389. if err != nil {
  390. return eof
  391. }
  392. return ch
  393. }
  394. func (s *Scanner) unread() {
  395. _ = s.r.UnreadRune()
  396. }
  397. var eof = rune(0)
  398. func isWhiteSpace(r rune) bool {
  399. return (r == ' ') || (r == '\t') || (r == '\r') || (r == '\n')
  400. }
  401. func isLetter(ch rune) bool { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') }
  402. func isDigit(ch rune) bool { return ch >= '0' && ch <= '9' }
  403. func isQuotation(ch rune) bool { return ch == '"' || ch == '\'' }
  404. func isBackquote(ch rune) bool { return ch == '`' }