Browse Source

Between and like support (#1327)

* feat(parser): support between expression

Signed-off-by: Jiyong Huang <huangjy@emqx.io>

* feat(parser): support like expression

Signed-off-by: Jiyong Huang <huangjy@emqx.io>
ngjaying 2 years ago
parent
commit
274b1c716f

+ 1 - 1
docs/en_US/sqls/lexical_elements.md

@@ -64,7 +64,7 @@ CREATE STREAM `stream` (
 Following operators are provided.
 
 ```
-+, -, *, /, %, &, |, ^, =, !=, <, <=, >, >=, [], ->, (), IN, NOT IN
++, -, *, /, %, &, |, ^, =, !=, <, <=, >, >=, [], ->, (), IN, NOT IN, BETWEEN, NOT BETWEEN
 ```
 
 ## Literals

+ 25 - 0
docs/en_US/sqls/query_language_elements.md

@@ -208,6 +208,31 @@ Is the operator used to test the condition of one expression being less than the
 
 Is the operator used to test the condition of one expression being less than or equal to the other expression.
 
+**[NOT] BETWEEN**
+
+Is the operator used to test the condition of one expression in (not) within the range specified.
+
+```sql
+expression [NOT] BETWEEN expression1 AND expression2
+```
+
+**[NOT] LIKE**
+
+Is the operator used to check if the STRING in the first operand matches a pattern specified by the second operand. Patterns can contain these characters:
+
+- "%" matches any number of characters.
+- "_" matches a single character.
+
+```sql
+expression [NOT] LIKE expression1
+```
+
+Example:
+
+```sql
+a LIKE "string%"
+```
+
 **[NOT] IN**
 
 Is the operator used to test the condition of one expression (not) being part of to the other expression. support these two formats

+ 1 - 1
docs/zh_CN/sqls/lexical_elements.md

@@ -64,7 +64,7 @@ CREATE STREAM `stream` (
 提供了以下运算符。
 
 ```
-+, -, *, /, %, &, |, ^, =, !=, <, <=, >, >=, [], ->, (), IN, NOT IN
++, -, *, /, %, &, |, ^, =, !=, <, <=, >, >=, [], ->, (), IN, NOT IN, BETWEEN, NOT BETWEEN
 ```
 
 ## 字面量(Literals)

+ 26 - 0
docs/zh_CN/sqls/query_language_elements.md

@@ -209,6 +209,32 @@ WHERE <search_condition>
 
 用于测试一个表达式小于或等于另一个表达式的条件的运算符。
 
+
+**[NOT] BETWEEN**
+
+用于测试一个表达式的值是否在指定的区间。
+
+```sql
+expression [NOT] BETWEEN expression1 AND expression2
+```
+
+**[NOT] LIKE**
+
+用于测试字符串是否满足模式。模式可使用以下通配符:
+
+- "%" 匹配0个或多个字符。
+- "_" 匹配单个字符
+
+```sql
+expression [NOT] LIKE expression1
+```
+
+示例:
+
+```sql
+a LIKE "string%"
+```
+
 **[NOT] IN**
 
 用于测试一个表达式是否属于另一个表达式的条件的运算符。

+ 4 - 0
internal/xsql/lexical.go

@@ -217,6 +217,10 @@ func (s *Scanner) ScanIdent() (tok ast.Token, lit string) {
 		return ast.IN, lit
 	case "NOT":
 		return ast.NOT, lit
+	case "BETWEEN":
+		return ast.BETWEEN, lit
+	case "LIKE":
+		return ast.LIKE, lit
 	case "CREATE":
 		return ast.CREATE, lit
 	case "DROP":

+ 73 - 2
internal/xsql/parser.go

@@ -504,7 +504,6 @@ func (p *Parser) ParseExpr() (ast.Expr, error) {
 			op = ast.SUBSET
 			p.unscan()
 		} else if op == ast.IN { //IN is a special token, need to unscan
-			op = ast.IN
 			p.unscan()
 		} else if op == ast.NOT {
 			afterNot, tk1 := p.scanIgnoreWhitespace()
@@ -513,16 +512,65 @@ func (p *Parser) ParseExpr() (ast.Expr, error) {
 				op = ast.NOTIN
 				p.unscan()
 				break
+			case ast.BETWEEN:
+				op = ast.NOTBETWEEN
+				node := root
+				var lhs ast.Expr
+				for {
+					r, ok := node.RHS.(*ast.BinaryExpr)
+					if !ok || r.OP.Precedence() >= op.Precedence() {
+						lhs = node.RHS
+						break
+					}
+					node = r
+				}
+				expr, err := p.parseBetween(lhs, ast.NOTBETWEEN)
+				if err != nil {
+					return nil, err
+				}
+
+				node.RHS = expr
+				continue
+			case ast.LIKE:
+				op = ast.NOTLIKE
 			default:
 				return nil, fmt.Errorf("found %q, expected expression", tk1)
 			}
+		} else if op == ast.BETWEEN {
+			node := root
+			var lhs ast.Expr
+			for {
+				r, ok := node.RHS.(*ast.BinaryExpr)
+				if !ok || r.OP.Precedence() >= op.Precedence() {
+					lhs = node.RHS
+					break
+				}
+				node = r
+			}
+			expr, err := p.parseBetween(lhs, op)
+			if err != nil {
+				return nil, err
+			}
+			node.RHS = expr
+			continue
 		}
 
 		var rhs ast.Expr
 		if rhs, err = p.parseUnaryExpr(op == ast.ARROW); err != nil {
 			return nil, err
 		}
-
+		if op == ast.LIKE || op == ast.NOTLIKE {
+			lp := &ast.LikePattern{
+				Expr: rhs,
+			}
+			if l, ok := lp.Expr.(*ast.StringLiteral); ok {
+				lp.Pattern, err = lp.Compile(l.Val)
+				if err != nil {
+					return nil, fmt.Errorf("invalid LIKE pattern: %s", err)
+				}
+			}
+			rhs = lp
+		}
 		for node := root; ; {
 			r, ok := node.RHS.(*ast.BinaryExpr)
 			if !ok || r.OP.Precedence() >= op.Precedence() {
@@ -534,6 +582,29 @@ func (p *Parser) ParseExpr() (ast.Expr, error) {
 	}
 }
 
+func (p *Parser) parseBetween(lhs ast.Expr, op ast.Token) (ast.Expr, error) {
+	alhs, err := p.parseUnaryExpr(false)
+	if err != nil {
+		return nil, err
+	}
+	opp, _ := p.scanIgnoreWhitespace()
+	if opp != ast.AND {
+		return nil, fmt.Errorf("expect AND expression after between but found %s", opp)
+	}
+	arhs, err := p.parseUnaryExpr(false)
+	if err != nil {
+		return nil, err
+	}
+	return &ast.BinaryExpr{
+		LHS: lhs,
+		OP:  op,
+		RHS: &ast.BetweenExpr{
+			Lower:  alhs,
+			Higher: arhs,
+		},
+	}, nil
+}
+
 func (p *Parser) parseUnaryExpr(isSubField bool) (ast.Expr, error) {
 	if tok1, _ := p.scanIgnoreWhitespace(); tok1 == ast.LPAREN {
 		expr, err := p.ParseExpr()

+ 159 - 0
internal/xsql/parser_test.go

@@ -20,12 +20,16 @@ import (
 	"github.com/lf-edge/ekuiper/pkg/ast"
 	"math"
 	"reflect"
+	"regexp"
 	"strings"
 	"testing"
 )
 
 // Ensure the parser can parse strings into Statement ASTs.
 func TestParser_ParseStatement(t *testing.T) {
+	re1, _ := regexp.Compile("^foo$")
+	re2, _ := regexp.Compile("^fo.o.*$")
+	re3, _ := regexp.Compile("^foo\\\\%$")
 	var tests = []struct {
 		s    string
 		stmt *ast.SelectStatement
@@ -867,6 +871,161 @@ func TestParser_ParseStatement(t *testing.T) {
 		},
 
 		{
+			s: `SELECT power(.2, 4) AS f1 FROM tbl WHERE f1 BETWEEN 1 AND 2`,
+			stmt: &ast.SelectStatement{
+				Fields: []ast.Field{
+					{
+						AName: "f1",
+						Name:  "power",
+						Expr: &ast.Call{
+							Name: "power",
+							Args: []ast.Expr{&ast.NumberLiteral{Val: 0.2}, &ast.IntegerLiteral{Val: 4}},
+						},
+					},
+				},
+				Sources: []ast.Source{&ast.Table{Name: "tbl"}},
+				Condition: &ast.BinaryExpr{
+					LHS: &ast.FieldRef{Name: "f1", StreamName: ast.DefaultStream},
+					OP:  ast.BETWEEN,
+					RHS: &ast.BetweenExpr{
+						Lower:  &ast.IntegerLiteral{Val: 1},
+						Higher: &ast.IntegerLiteral{Val: 2},
+					},
+				},
+			},
+		},
+		{
+			s: `SELECT a FROM tbl WHERE f1 > 4 AND f2 BETWEEN 1 AND 2`,
+			stmt: &ast.SelectStatement{
+				Fields: []ast.Field{
+					{
+						AName: "",
+						Name:  "a",
+						Expr:  &ast.FieldRef{Name: "a", StreamName: ast.DefaultStream},
+					},
+				},
+				Sources: []ast.Source{&ast.Table{Name: "tbl"}},
+				Condition: &ast.BinaryExpr{
+					OP: ast.AND,
+					LHS: &ast.BinaryExpr{
+						LHS: &ast.FieldRef{Name: "f1", StreamName: ast.DefaultStream},
+						OP:  ast.GT,
+						RHS: &ast.IntegerLiteral{Val: 4},
+					},
+					RHS: &ast.BinaryExpr{
+						LHS: &ast.FieldRef{Name: "f2", StreamName: ast.DefaultStream},
+						OP:  ast.BETWEEN,
+						RHS: &ast.BetweenExpr{
+							Lower:  &ast.IntegerLiteral{Val: 1},
+							Higher: &ast.IntegerLiteral{Val: 2},
+						},
+					},
+				},
+			},
+		},
+		{
+			s: `SELECT a FROM tbl WHERE f1 NOT BETWEEN b AND c AND f2 BETWEEN 1 AND 2 AND f3 > 4`,
+			stmt: &ast.SelectStatement{
+				Fields: []ast.Field{
+					{
+						AName: "",
+						Name:  "a",
+						Expr:  &ast.FieldRef{Name: "a", StreamName: ast.DefaultStream},
+					},
+				},
+				Sources: []ast.Source{&ast.Table{Name: "tbl"}},
+				Condition: &ast.BinaryExpr{
+					OP: ast.AND,
+					LHS: &ast.BinaryExpr{
+						OP: ast.AND,
+						LHS: &ast.BinaryExpr{
+							LHS: &ast.FieldRef{Name: "f1", StreamName: ast.DefaultStream},
+							OP:  ast.NOTBETWEEN,
+							RHS: &ast.BetweenExpr{
+								Lower:  &ast.FieldRef{Name: "b", StreamName: ast.DefaultStream},
+								Higher: &ast.FieldRef{Name: "c", StreamName: ast.DefaultStream},
+							},
+						},
+						RHS: &ast.BinaryExpr{
+							LHS: &ast.FieldRef{Name: "f2", StreamName: ast.DefaultStream},
+							OP:  ast.BETWEEN,
+							RHS: &ast.BetweenExpr{
+								Lower:  &ast.IntegerLiteral{Val: 1},
+								Higher: &ast.IntegerLiteral{Val: 2},
+							},
+						},
+					},
+					RHS: &ast.BinaryExpr{
+						OP:  ast.GT,
+						LHS: &ast.FieldRef{Name: "f3", StreamName: ast.DefaultStream},
+						RHS: &ast.IntegerLiteral{Val: 4},
+					},
+				},
+			},
+		},
+		{
+			s:   `SELECT a FROM tbl WHERE f1 NOT BETWEEN b`,
+			err: "expect AND expression after between but found EOF",
+		},
+		{
+			s:   `SELECT a FROM tbl WHERE f1 NOT BETWEEN 1 OR 2`,
+			err: "expect AND expression after between but found OR",
+		},
+		{
+			s: `SELECT a FROM tbl WHERE a LIKE "foo"`,
+			stmt: &ast.SelectStatement{
+				Fields: []ast.Field{
+					{
+						AName: "",
+						Name:  "a",
+						Expr:  &ast.FieldRef{Name: "a", StreamName: ast.DefaultStream},
+					},
+				},
+				Sources: []ast.Source{&ast.Table{Name: "tbl"}},
+				Condition: &ast.BinaryExpr{
+					LHS: &ast.FieldRef{Name: "a", StreamName: ast.DefaultStream},
+					OP:  ast.LIKE,
+					RHS: &ast.LikePattern{Expr: &ast.StringLiteral{Val: "foo"}, Pattern: re1},
+				},
+			},
+		},
+		{
+			s: `SELECT a FROM tbl WHERE a NOT LIKE "fo_o%"`,
+			stmt: &ast.SelectStatement{
+				Fields: []ast.Field{
+					{
+						AName: "",
+						Name:  "a",
+						Expr:  &ast.FieldRef{Name: "a", StreamName: ast.DefaultStream},
+					},
+				},
+				Sources: []ast.Source{&ast.Table{Name: "tbl"}},
+				Condition: &ast.BinaryExpr{
+					LHS: &ast.FieldRef{Name: "a", StreamName: ast.DefaultStream},
+					OP:  ast.NOTLIKE,
+					RHS: &ast.LikePattern{Expr: &ast.StringLiteral{Val: "fo_o%"}, Pattern: re2},
+				},
+			},
+		},
+		{
+			s: `SELECT a FROM tbl WHERE a LIKE "foo\\%"`,
+			stmt: &ast.SelectStatement{
+				Fields: []ast.Field{
+					{
+						AName: "",
+						Name:  "a",
+						Expr:  &ast.FieldRef{Name: "a", StreamName: ast.DefaultStream},
+					},
+				},
+				Sources: []ast.Source{&ast.Table{Name: "tbl"}},
+				Condition: &ast.BinaryExpr{
+					LHS: &ast.FieldRef{Name: "a", StreamName: ast.DefaultStream},
+					OP:  ast.LIKE,
+					RHS: &ast.LikePattern{Expr: &ast.StringLiteral{Val: "foo\\%"}, Pattern: re3},
+				},
+			},
+		},
+		{
 			s: `SELECT deviceId, name FROM topic/sensor1 WHERE deviceId=1 AND name = "dname"`,
 			stmt: &ast.SelectStatement{
 				Fields: []ast.Field{

+ 65 - 1
internal/xsql/valuer.go

@@ -21,6 +21,7 @@ import (
 	"github.com/lf-edge/ekuiper/pkg/cast"
 	"math"
 	"reflect"
+	"regexp"
 	"time"
 )
 
@@ -396,6 +397,24 @@ func (v *ValuerEval) Eval(expr ast.Expr) interface{} {
 		return v.evalCase(expr)
 	case *ast.ValueSetExpr:
 		return v.evalValueSet(expr)
+	case *ast.BetweenExpr:
+		return []interface{}{
+			v.Eval(expr.Lower), v.Eval(expr.Higher),
+		}
+	case *ast.LikePattern:
+		if expr.Pattern != nil {
+			return expr.Pattern
+		}
+		v := v.Eval(expr.Expr)
+		str, ok := v.(string)
+		if !ok {
+			return fmt.Errorf("invalid LIKE pattern, must be a string but got %v", v)
+		}
+		re, err := expr.Compile(str)
+		if err != nil {
+			return err
+		}
+		return re
 	default:
 		return nil
 	}
@@ -432,7 +451,52 @@ func (v *ValuerEval) evalBinaryExpr(expr *ast.BinaryExpr) interface{} {
 	if isSetOperator(expr.OP) {
 		return v.evalSetsExpr(lhs, expr.OP, rhs)
 	}
-	return v.simpleDataEval(lhs, rhs, expr.OP)
+	switch expr.OP {
+	case ast.BETWEEN, ast.NOTBETWEEN:
+		arr, ok := rhs.([]interface{})
+		if !ok {
+			return fmt.Errorf("between operator expects two arguments, but found %v", rhs)
+		}
+		andLeft := v.simpleDataEval(lhs, arr[0], ast.GTE)
+		switch andLeft.(type) {
+		case error:
+			return fmt.Errorf("between operator cannot compare %[1]T(%[1]v) and %[2]T(%[2]v)", lhs, arr[0])
+		}
+		andRight := v.simpleDataEval(lhs, arr[1], ast.LTE)
+		switch andRight.(type) {
+		case error:
+			return fmt.Errorf("between operator cannot compare %[1]T(%[1]v) and %[2]T(%[2]v)", lhs, arr[1])
+		}
+		r := v.simpleDataEval(andLeft, andRight, ast.AND)
+		br, ok := r.(bool)
+		if expr.OP == ast.NOTBETWEEN && ok {
+			return !br
+		} else {
+			return r
+		}
+	case ast.LIKE, ast.NOTLIKE:
+		ls, ok := lhs.(string)
+		if !ok {
+			return fmt.Errorf("LIKE operator left operand expects string, but found %v", lhs)
+		}
+		var result bool
+		switch rr := rhs.(type) {
+		case string:
+		case *regexp.Regexp: // literal
+			result = rr.MatchString(ls)
+		}
+		rs, ok := rhs.(*regexp.Regexp)
+		if !ok {
+			return fmt.Errorf("LIKE operator right operand expects string, but found %v", rhs)
+		}
+		result = rs.MatchString(ls)
+		if expr.OP == ast.NOTLIKE {
+			result = !result
+		}
+		return result
+	default:
+		return v.simpleDataEval(lhs, rhs, expr.OP)
+	}
 }
 
 func (v *ValuerEval) evalCase(expr *ast.CaseExpr) interface{} {

File diff suppressed because it is too large
+ 96 - 25
internal/xsql/valuer_test.go


+ 29 - 0
pkg/ast/expr.go

@@ -16,6 +16,8 @@ package ast
 
 import (
 	"fmt"
+	"regexp"
+	"strings"
 )
 
 type Node interface {
@@ -167,6 +169,33 @@ type ValueSetExpr struct {
 func (c *ValueSetExpr) expr() {}
 func (c *ValueSetExpr) node() {}
 
+type BetweenExpr struct {
+	Lower  Expr
+	Higher Expr
+}
+
+func (b *BetweenExpr) expr() {}
+func (b *BetweenExpr) node() {}
+
+type LikePattern struct {
+	Expr    Expr
+	Pattern *regexp.Regexp
+}
+
+func (l *LikePattern) expr() {}
+func (l *LikePattern) node() {}
+
+func (l *LikePattern) Compile(likestr string) (*regexp.Regexp, error) {
+	likestr = strings.ReplaceAll(strings.ReplaceAll(likestr, `\%`, `!@#`), `\_`, `!@$`)
+	regstr := strings.ReplaceAll(strings.ReplaceAll(likestr, "%", ".*"), "_", ".")
+	regstr = strings.ReplaceAll(strings.ReplaceAll(strings.ReplaceAll(regstr, `!@$`, `\_`), `!@#`, `\%`), `\`, `\\`)
+	re, err := regexp.Compile("^" + regstr + "$")
+	if err != nil {
+		return nil, err
+	}
+	return re, nil
+}
+
 type StreamName string
 
 func (sn *StreamName) node() {}

+ 1 - 1
pkg/ast/funcArgsValidator.go

@@ -43,7 +43,7 @@ func IsBooleanArg(arg Expr) bool {
 		return true
 	case *BinaryExpr:
 		switch t.OP {
-		case AND, OR, EQ, NEQ, LT, LTE, GT, GTE:
+		case AND, OR, EQ, NEQ, LT, LTE, GT, GTE, BETWEEN, NOTBETWEEN, IN, NOTIN, LIKE, NOTLIKE:
 			return true
 		default:
 			return false

+ 14 - 6
pkg/ast/token.go

@@ -58,6 +58,10 @@ const (
 	IN     // IN
 	NOT    // NOT
 	NOTIN  // NOT
+	BETWEEN
+	NOTBETWEEN
+	LIKE
+	NOTLIKE
 
 	operatorEnd
 
@@ -234,11 +238,15 @@ var Tokens = []string{
 	RETAIN_SIZE:       "RETAIN_SIZE",
 	SHARED:            "SHARED",
 
-	AND:   "AND",
-	OR:    "OR",
-	TRUE:  "TRUE",
-	FALSE: "FALSE",
-	NOTIN: "NOTIN",
+	AND:        "AND",
+	OR:         "OR",
+	TRUE:       "TRUE",
+	FALSE:      "FALSE",
+	NOTIN:      "NOT IN",
+	BETWEEN:    "BETWEEN",
+	NOTBETWEEN: "NOT BETWEEN",
+	LIKE:       "LIKE",
+	NOTLIKE:    "NOT LIKE",
 
 	DD: "DD",
 	HH: "HH",
@@ -275,7 +283,7 @@ func (tok Token) Precedence() int {
 		return 1
 	case AND:
 		return 2
-	case EQ, NEQ, LT, LTE, GT, GTE, IN, NOTIN:
+	case EQ, NEQ, LT, LTE, GT, GTE, IN, NOTIN, BETWEEN, NOTBETWEEN, LIKE, NOTLIKE:
 		return 3
 	case ADD, SUB, BITWISE_OR, BITWISE_XOR:
 		return 4