analyzer.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668
  1. // Copyright 2022-2023 EMQ Technologies Co., Ltd.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package planner
  15. import (
  16. "fmt"
  17. "sort"
  18. "strings"
  19. "github.com/lf-edge/ekuiper/internal/binder/function"
  20. "github.com/lf-edge/ekuiper/internal/schema"
  21. "github.com/lf-edge/ekuiper/internal/xsql"
  22. "github.com/lf-edge/ekuiper/pkg/ast"
  23. "github.com/lf-edge/ekuiper/pkg/kv"
  24. )
  25. type streamInfo struct {
  26. stmt *ast.StreamStmt
  27. schema ast.StreamFields
  28. }
  29. // Analyze the select statement by decorating the info from stream statement.
  30. // Typically, set the correct stream name for fieldRefs
  31. func decorateStmt(s *ast.SelectStatement, store kv.KeyValue) ([]*streamInfo, []*ast.Call, error) {
  32. streamsFromStmt := xsql.GetStreams(s)
  33. streamStmts := make([]*streamInfo, len(streamsFromStmt))
  34. isSchemaless := false
  35. for i, s := range streamsFromStmt {
  36. streamStmt, err := xsql.GetDataSource(store, s)
  37. if err != nil {
  38. return nil, nil, fmt.Errorf("fail to get stream %s, please check if stream is created", s)
  39. }
  40. si, err := convertStreamInfo(streamStmt)
  41. if err != nil {
  42. return nil, nil, err
  43. }
  44. streamStmts[i] = si
  45. if si.schema == nil {
  46. isSchemaless = true
  47. }
  48. }
  49. if checkAliasReferenceCycle(s) {
  50. return nil, nil, fmt.Errorf("select fields have cycled alias")
  51. }
  52. if !isSchemaless {
  53. aliasFieldTopoSort(s, streamStmts)
  54. }
  55. dsn := ast.DefaultStream
  56. if len(streamsFromStmt) == 1 {
  57. dsn = streamStmts[0].stmt.Name
  58. }
  59. // [fieldName][streamsName][*aliasRef] if alias, with special key alias/default. Each key has exactly one value
  60. fieldsMap := newFieldsMap(isSchemaless, dsn)
  61. if !isSchemaless {
  62. for _, streamStmt := range streamStmts {
  63. for _, field := range streamStmt.schema {
  64. fieldsMap.reserve(field.Name, streamStmt.stmt.Name)
  65. }
  66. }
  67. }
  68. var (
  69. walkErr error
  70. aliasFields []*ast.Field
  71. analyticFuncs []*ast.Call
  72. )
  73. // Scan columns fields: bind all field refs, collect alias
  74. for i, f := range s.Fields {
  75. ast.WalkFunc(f.Expr, func(n ast.Node) bool {
  76. switch f := n.(type) {
  77. case *ast.FieldRef:
  78. walkErr = fieldsMap.bind(f)
  79. }
  80. return true
  81. })
  82. if walkErr != nil {
  83. return nil, nil, walkErr
  84. }
  85. if f.AName != "" {
  86. aliasFields = append(aliasFields, &s.Fields[i])
  87. fieldsMap.bindAlias(f.AName)
  88. }
  89. }
  90. // bind alias field expressions
  91. for _, f := range aliasFields {
  92. ar, err := ast.NewAliasRef(f.Expr)
  93. if err != nil {
  94. walkErr = err
  95. } else {
  96. f.Expr = &ast.FieldRef{
  97. StreamName: ast.AliasStream,
  98. Name: f.AName,
  99. AliasRef: ar,
  100. }
  101. walkErr = fieldsMap.save(f.AName, ast.AliasStream, ar)
  102. for _, subF := range s.Fields {
  103. ast.WalkFunc(&subF, func(node ast.Node) bool {
  104. switch fr := node.(type) {
  105. case *ast.FieldRef:
  106. if fr.Name == f.AName {
  107. fr.StreamName = ast.AliasStream
  108. fr.AliasRef = ar
  109. }
  110. return false
  111. }
  112. return true
  113. })
  114. }
  115. }
  116. }
  117. // Bind field ref for alias AND set StreamName for all field ref
  118. ast.WalkFunc(s, func(n ast.Node) bool {
  119. switch f := n.(type) {
  120. case ast.Fields: // do not bind selection fields, should have done above
  121. return false
  122. case *ast.FieldRef:
  123. if f.StreamName != "" && f.StreamName != ast.DefaultStream {
  124. // check if stream exists
  125. found := false
  126. for _, sn := range streamsFromStmt {
  127. if sn == string(f.StreamName) {
  128. found = true
  129. break
  130. }
  131. }
  132. if !found {
  133. walkErr = fmt.Errorf("stream %s not found", f.StreamName)
  134. return true
  135. }
  136. }
  137. walkErr = fieldsMap.bind(f)
  138. }
  139. return true
  140. })
  141. if walkErr != nil {
  142. return nil, nil, walkErr
  143. }
  144. walkErr = validate(s)
  145. // Collect all analytic function calls so that we can let them run firstly
  146. ast.WalkFunc(s, func(n ast.Node) bool {
  147. switch f := n.(type) {
  148. case ast.Fields:
  149. return false
  150. case *ast.Call:
  151. if function.IsAnalyticFunc(f.Name) {
  152. f.CachedField = fmt.Sprintf("%s_%s_%d", function.AnalyticPrefix, f.Name, f.FuncId)
  153. f.Cached = true
  154. analyticFuncs = append(analyticFuncs, &ast.Call{
  155. Name: f.Name,
  156. FuncId: f.FuncId,
  157. FuncType: f.FuncType,
  158. Args: f.Args,
  159. CachedField: f.CachedField,
  160. Partition: f.Partition,
  161. WhenExpr: f.WhenExpr,
  162. })
  163. }
  164. }
  165. return true
  166. })
  167. if walkErr != nil {
  168. return nil, nil, walkErr
  169. }
  170. // walk sources at last to let them run firstly
  171. // because another clause may depend on the alias defined here
  172. ast.WalkFunc(s.Fields, func(n ast.Node) bool {
  173. switch f := n.(type) {
  174. case *ast.Call:
  175. if function.IsAnalyticFunc(f.Name) {
  176. f.CachedField = fmt.Sprintf("%s_%s_%d", function.AnalyticPrefix, f.Name, f.FuncId)
  177. f.Cached = true
  178. analyticFuncs = append(analyticFuncs, &ast.Call{
  179. Name: f.Name,
  180. FuncId: f.FuncId,
  181. FuncType: f.FuncType,
  182. Args: f.Args,
  183. CachedField: f.CachedField,
  184. Partition: f.Partition,
  185. WhenExpr: f.WhenExpr,
  186. })
  187. }
  188. }
  189. return true
  190. })
  191. if walkErr != nil {
  192. return nil, nil, walkErr
  193. }
  194. return streamStmts, analyticFuncs, walkErr
  195. }
  196. type aliasTopoDegree struct {
  197. alias string
  198. degree int
  199. field ast.Field
  200. }
  201. type aliasTopoDegrees []*aliasTopoDegree
  202. func (a aliasTopoDegrees) Len() int {
  203. return len(a)
  204. }
  205. func (a aliasTopoDegrees) Less(i, j int) bool {
  206. if a[i].degree == a[j].degree {
  207. return a[i].alias < a[j].alias
  208. }
  209. return a[i].degree < a[j].degree
  210. }
  211. func (a aliasTopoDegrees) Swap(i, j int) {
  212. a[i], a[j] = a[j], a[i]
  213. }
  214. // checkAliasReferenceCycle checks whether exists select a + 1 as b, b + 1 as a from demo;
  215. func checkAliasReferenceCycle(s *ast.SelectStatement) bool {
  216. aliasRef := make(map[string]map[string]struct{})
  217. for _, field := range s.Fields {
  218. if len(field.AName) > 0 {
  219. aliasRef[field.AName] = make(map[string]struct{})
  220. }
  221. }
  222. if len(aliasRef) < 1 {
  223. return false
  224. }
  225. hasCycleAlias := false
  226. for _, field := range s.Fields {
  227. if len(field.AName) > 0 {
  228. ast.WalkFunc(&field, func(node ast.Node) bool {
  229. switch f := node.(type) {
  230. case *ast.FieldRef:
  231. if len(f.Name) > 0 {
  232. if f.Name == field.AName {
  233. return true
  234. }
  235. _, ok := aliasRef[f.Name]
  236. if ok {
  237. aliasRef[field.AName][f.Name] = struct{}{}
  238. v, ok1 := aliasRef[f.Name]
  239. if ok1 {
  240. _, ok2 := v[field.AName]
  241. if ok2 {
  242. hasCycleAlias = true
  243. return false
  244. }
  245. }
  246. }
  247. }
  248. }
  249. return true
  250. })
  251. if hasCycleAlias {
  252. return true
  253. }
  254. }
  255. }
  256. return false
  257. }
  258. func aliasFieldTopoSort(s *ast.SelectStatement, streamStmts []*streamInfo) {
  259. nonAliasFields := make([]ast.Field, 0)
  260. aliasDegreeMap := make(map[string]*aliasTopoDegree)
  261. for _, field := range s.Fields {
  262. if field.AName != "" {
  263. aliasDegreeMap[field.AName] = &aliasTopoDegree{
  264. alias: field.AName,
  265. degree: -1,
  266. field: field,
  267. }
  268. } else {
  269. nonAliasFields = append(nonAliasFields, field)
  270. }
  271. }
  272. for !isAliasFieldTopoSortFinish(aliasDegreeMap) {
  273. for _, field := range s.Fields {
  274. if field.AName != "" && aliasDegreeMap[field.AName].degree < 0 {
  275. skip := false
  276. degree := 0
  277. ast.WalkFunc(field.Expr, func(node ast.Node) bool {
  278. switch f := node.(type) {
  279. case *ast.FieldRef:
  280. if fDegree, ok := aliasDegreeMap[f.Name]; ok && fDegree.degree >= 0 {
  281. if degree < fDegree.degree+1 {
  282. degree = fDegree.degree + 1
  283. }
  284. return true
  285. }
  286. if !isFieldRefNameExists(f.Name, streamStmts) {
  287. skip = true
  288. return false
  289. }
  290. }
  291. return true
  292. })
  293. if !skip {
  294. aliasDegreeMap[field.AName].degree = degree
  295. }
  296. }
  297. }
  298. }
  299. as := make(aliasTopoDegrees, 0)
  300. for _, degree := range aliasDegreeMap {
  301. as = append(as, degree)
  302. }
  303. sort.Sort(as)
  304. s.Fields = make([]ast.Field, 0)
  305. for _, d := range as {
  306. s.Fields = append(s.Fields, d.field)
  307. }
  308. s.Fields = append(s.Fields, nonAliasFields...)
  309. }
  310. func isFieldRefNameExists(name string, streamStmts []*streamInfo) bool {
  311. for _, streamStmt := range streamStmts {
  312. for _, col := range streamStmt.schema {
  313. if col.Name == name {
  314. return true
  315. }
  316. }
  317. }
  318. return false
  319. }
  320. func isAliasFieldTopoSortFinish(aliasDegrees map[string]*aliasTopoDegree) bool {
  321. for _, aliasDegree := range aliasDegrees {
  322. if aliasDegree.degree < 0 {
  323. return false
  324. }
  325. }
  326. return true
  327. }
  328. func validate(s *ast.SelectStatement) (err error) {
  329. isAggStmt := false
  330. if xsql.IsAggregate(s.Condition) {
  331. return fmt.Errorf("Not allowed to call aggregate functions in WHERE clause.")
  332. }
  333. if !allAggregate(s.Having) {
  334. return fmt.Errorf("Not allowed to call non-aggregate functions in HAVING clause.")
  335. }
  336. for _, d := range s.Dimensions {
  337. isAggStmt = true
  338. if xsql.IsAggregate(d.Expr) {
  339. return fmt.Errorf("Not allowed to call aggregate functions in GROUP BY clause.")
  340. }
  341. }
  342. if s.Joins != nil {
  343. isAggStmt = true
  344. }
  345. ast.WalkFunc(s, func(n ast.Node) bool {
  346. switch f := n.(type) {
  347. case *ast.Call:
  348. // aggregate call should not have any aggregate arg
  349. if function.IsAggFunc(f.Name) {
  350. for _, arg := range f.Args {
  351. tr := xsql.IsAggregate(arg)
  352. if tr {
  353. err = fmt.Errorf("invalid argument for func %s: aggregate argument is not allowed", f.Name)
  354. return false
  355. }
  356. }
  357. }
  358. if isAggStmt && function.NoAggFunc(f.Name) {
  359. err = fmt.Errorf("function %s is not allowed in an aggregate query", f.Name)
  360. return false
  361. }
  362. case *ast.Window:
  363. // agg func check is done in dimensions.
  364. // in window trigger condition, NoAggFunc is allowed unlike normal condition so return false to skip that check
  365. return false
  366. }
  367. return true
  368. })
  369. return
  370. }
  371. // file-private functions below
  372. // allAggregate checks if all expressions of binary expression are aggregate
  373. func allAggregate(expr ast.Expr) (r bool) {
  374. r = true
  375. ast.WalkFunc(expr, func(n ast.Node) bool {
  376. switch f := expr.(type) {
  377. case *ast.BinaryExpr:
  378. switch f.OP {
  379. case ast.SUBSET, ast.ARROW:
  380. // do nothing
  381. default:
  382. r = allAggregate(f.LHS) && allAggregate(f.RHS)
  383. return false
  384. }
  385. case *ast.Call, *ast.FieldRef:
  386. if !xsql.IsAggregate(f) {
  387. r = false
  388. return false
  389. }
  390. }
  391. return true
  392. })
  393. return
  394. }
  395. func convertStreamInfo(streamStmt *ast.StreamStmt) (*streamInfo, error) {
  396. ss := streamStmt.StreamFields
  397. var err error
  398. if streamStmt.Options.SCHEMAID != "" {
  399. ss, err = schema.InferFromSchemaFile(streamStmt.Options.FORMAT, streamStmt.Options.SCHEMAID)
  400. if err != nil {
  401. return nil, err
  402. }
  403. }
  404. return &streamInfo{
  405. stmt: streamStmt,
  406. schema: ss,
  407. }, nil
  408. }
  409. type fieldsMap struct {
  410. content map[string]streamFieldStore
  411. aliasNames map[string]struct{}
  412. isSchemaless bool
  413. defaultStream ast.StreamName
  414. }
  415. func newFieldsMap(isSchemaless bool, defaultStream ast.StreamName) *fieldsMap {
  416. return &fieldsMap{content: make(map[string]streamFieldStore), aliasNames: map[string]struct{}{}, isSchemaless: isSchemaless, defaultStream: defaultStream}
  417. }
  418. func (f *fieldsMap) reserve(fieldName string, streamName ast.StreamName) {
  419. lname := strings.ToLower(fieldName)
  420. if fm, ok := f.content[lname]; ok {
  421. fm.add(streamName)
  422. } else {
  423. fm := newStreamFieldStore(f.isSchemaless, f.defaultStream)
  424. fm.add(streamName)
  425. f.content[lname] = fm
  426. }
  427. }
  428. func (f *fieldsMap) save(fieldName string, streamName ast.StreamName, field *ast.AliasRef) error {
  429. lname := strings.ToLower(fieldName)
  430. fm, ok := f.content[lname]
  431. if !ok {
  432. if streamName == ast.AliasStream || f.isSchemaless {
  433. fm = newStreamFieldStore(f.isSchemaless, f.defaultStream)
  434. f.content[lname] = fm
  435. } else {
  436. return fmt.Errorf("unknown field %s", fieldName)
  437. }
  438. }
  439. err := fm.ref(streamName, field)
  440. if err != nil {
  441. return fmt.Errorf("%s%s", err, fieldName)
  442. }
  443. return nil
  444. }
  445. func (f *fieldsMap) bindAlias(aliasName string) {
  446. f.aliasNames[aliasName] = struct{}{}
  447. }
  448. func (f *fieldsMap) bind(fr *ast.FieldRef) error {
  449. lname := strings.ToLower(fr.Name)
  450. fm, ok1 := f.content[lname]
  451. _, ok2 := f.aliasNames[lname]
  452. if !ok1 && !ok2 {
  453. if f.isSchemaless && fr.Name != "" {
  454. fm = newStreamFieldStore(f.isSchemaless, f.defaultStream)
  455. f.content[lname] = fm
  456. } else {
  457. return fmt.Errorf("unknown field %s", fr.Name)
  458. }
  459. }
  460. if fm != nil {
  461. err := fm.bindRef(fr)
  462. if err != nil {
  463. return fmt.Errorf("%s%s", err, fr.Name)
  464. }
  465. }
  466. return nil
  467. }
  468. type streamFieldStore interface {
  469. add(k ast.StreamName)
  470. ref(k ast.StreamName, v *ast.AliasRef) error
  471. bindRef(f *ast.FieldRef) error
  472. }
  473. func newStreamFieldStore(isSchemaless bool, defaultStream ast.StreamName) streamFieldStore {
  474. if !isSchemaless {
  475. return &streamFieldMap{content: make(map[ast.StreamName]*ast.AliasRef)}
  476. } else {
  477. return &streamFieldMapSchemaless{content: make(map[ast.StreamName]*ast.AliasRef), defaultStream: defaultStream}
  478. }
  479. }
  480. type streamFieldMap struct {
  481. content map[ast.StreamName]*ast.AliasRef
  482. }
  483. // add the stream name must not be default.
  484. // This is used when traversing stream schema
  485. func (s *streamFieldMap) add(k ast.StreamName) {
  486. s.content[k] = nil
  487. }
  488. // bind for schema field, all keys must be created before running bind
  489. // can bind alias & col. For alias, the stream name must be empty; For col, the field must be a col
  490. func (s *streamFieldMap) ref(k ast.StreamName, v *ast.AliasRef) error {
  491. if k == ast.AliasStream { // must not exist, save alias ref for alias
  492. _, ok := s.content[k]
  493. if ok {
  494. return fmt.Errorf("duplicate alias ")
  495. }
  496. s.content[k] = v
  497. } else { // the key must exist after the schema travers, do validation
  498. if k == ast.DefaultStream { // In schema mode, default stream won't be a key
  499. l := len(s.content)
  500. if l == 0 {
  501. return fmt.Errorf("unknow field ")
  502. } else if l == 1 {
  503. // valid, do nothing
  504. } else {
  505. return fmt.Errorf("ambiguous field ")
  506. }
  507. } else {
  508. _, ok := s.content[k]
  509. if !ok {
  510. return fmt.Errorf("unknow field %s.", k)
  511. }
  512. }
  513. }
  514. return nil
  515. }
  516. func (s *streamFieldMap) bindRef(fr *ast.FieldRef) error {
  517. l := len(s.content)
  518. if fr.StreamName == "" {
  519. fr.StreamName = ast.DefaultStream
  520. }
  521. k := fr.StreamName
  522. if k == ast.DefaultStream {
  523. switch l {
  524. case 0:
  525. return fmt.Errorf("unknown field ")
  526. case 1: // if alias, return this
  527. for sk, sv := range s.content {
  528. fr.RefSelection(sv)
  529. fr.StreamName = sk
  530. }
  531. return nil
  532. default:
  533. r, ok := s.content[ast.AliasStream] // if alias exists
  534. if ok {
  535. fr.RefSelection(r)
  536. fr.StreamName = ast.AliasStream
  537. return nil
  538. } else {
  539. return fmt.Errorf("ambiguous field ")
  540. }
  541. }
  542. } else {
  543. r, ok := s.content[k]
  544. if ok {
  545. fr.RefSelection(r)
  546. return nil
  547. } else {
  548. return fmt.Errorf("unknown field %s.", k)
  549. }
  550. }
  551. }
  552. type streamFieldMapSchemaless struct {
  553. content map[ast.StreamName]*ast.AliasRef
  554. defaultStream ast.StreamName
  555. }
  556. // add this should not be called for schemaless
  557. func (s *streamFieldMapSchemaless) add(k ast.StreamName) {
  558. s.content[k] = nil
  559. }
  560. // bind for schemaless field, create column if not exist
  561. // can bind alias & col. For alias, the stream name must be empty; For col, the field must be a col
  562. func (s *streamFieldMapSchemaless) ref(k ast.StreamName, v *ast.AliasRef) error {
  563. if k == ast.AliasStream { // must not exist
  564. _, ok := s.content[k]
  565. if ok {
  566. return fmt.Errorf("duplicate alias ")
  567. }
  568. s.content[k] = v
  569. } else { // the key may or may not exist. But always have only one default stream field.
  570. // Replace with stream name if another stream found. The key can be duplicate
  571. l := len(s.content)
  572. if k == ast.DefaultStream { // In schemaless mode, default stream can only exist when length is 1
  573. if l < 1 {
  574. // valid, do nothing
  575. } else {
  576. return fmt.Errorf("ambiguous field ")
  577. }
  578. } else {
  579. if l == 1 {
  580. for sk := range s.content {
  581. if sk == ast.DefaultStream {
  582. delete(s.content, k)
  583. }
  584. }
  585. }
  586. }
  587. }
  588. return nil
  589. }
  590. func (s *streamFieldMapSchemaless) bindRef(fr *ast.FieldRef) error {
  591. l := len(s.content)
  592. if fr.StreamName == "" || fr.StreamName == ast.DefaultStream {
  593. if l == 1 {
  594. for sk := range s.content {
  595. fr.StreamName = sk
  596. }
  597. }
  598. }
  599. k := fr.StreamName
  600. if k == ast.DefaultStream {
  601. switch l {
  602. case 0: // must be a column because alias are fields and have been traversed
  603. // reserve a hole and do nothing
  604. fr.StreamName = s.defaultStream
  605. s.content[s.defaultStream] = nil
  606. return nil
  607. case 1: // if alias or single col, return this
  608. for sk, sv := range s.content {
  609. fr.RefSelection(sv)
  610. fr.StreamName = sk
  611. }
  612. return nil
  613. default:
  614. r, ok := s.content[ast.AliasStream] // if alias exists
  615. if ok {
  616. fr.RefSelection(r)
  617. fr.StreamName = ast.AliasStream
  618. return nil
  619. } else {
  620. fr.StreamName = s.defaultStream
  621. }
  622. }
  623. }
  624. if fr.StreamName != ast.DefaultStream {
  625. r, ok := s.content[k]
  626. if !ok { // reserver a hole
  627. s.content[k] = nil
  628. } else {
  629. fr.RefSelection(r)
  630. }
  631. return nil
  632. }
  633. return fmt.Errorf("ambiguous field ")
  634. }