Преглед изворни кода

feat(file): refactor file source

1. Add a lot of new properties
2. Add new format csv
3. Allow to use in stream
4. Monitor both folder and file

Signed-off-by: Jiyong Huang <huangjy@emqx.io>
Jiyong Huang пре 2 година
родитељ
комит
93caa1572a

+ 18 - 0
etc/sources/file.yaml

@@ -1,10 +1,28 @@
 default:
+  # The type of the file, could be json, csv and lines
   fileType: json
   # The directory of the file relative to kuiper root or an absolute path.
   # Do not include the file name here. The file name should be defined in the stream data source
   path: data
   # The interval between reading the files, time unit is ms. If only read once, set it to 0
   interval: 0
+  # When using as a stream, the sending interval between each event in millisecond
+  sendInterval: 10
+  # After read
+  # 0: keep the file
+  # 1: delete the file
+  # 2: move the file to moveTo
+  actionAfterRead: 0
+  # The path to move the file to after read, only valid when the actionAfterRead is 1
+  moveTo: /tmp/kuiper/moved
+  # If the first line is header
+  hasHeader: false
+  # Define the columns. If header is defined, this will be override
+  # columns: [id, name]
+  # How many lines to be ignored at the beginning. Notice that, empty line will be ignored and not be calculated.
+  ignoreStartLines: 0
+  # How many lines to be ignored in the end. Notice that, empty line will be ignored and not be calculated.
+  ignoreEndLines: 0
 
 test:
   path: test

+ 3 - 3
internal/topo/node/source_node.go

@@ -90,10 +90,10 @@ func (m *SourceNode) Open(ctx api.StreamContext, errCh chan<- error) {
 				}
 			}
 			m.bufferLength = bl
-			// Set retain size for table type
-			if m.options.RETAIN_SIZE > 0 && m.streamType == ast.TypeTable {
-				props["$retainSize"] = m.options.RETAIN_SIZE
+			if m.streamType == ast.TypeTable {
+				props["isTable"] = true
 			}
+			props["delimiter"] = m.options.DELIMITER
 			converter, err := converter.GetOrCreateConverter(m.options)
 			if err != nil {
 				msg := fmt.Sprintf("cannot get converter from format %s, schemaId %s: %v", m.options.FORMAT, m.options.SCHEMAID, err)

+ 0 - 1
internal/topo/operator/preprocessor_test.go

@@ -155,7 +155,6 @@ func TestPreprocessor_Apply(t *testing.T) {
 				Name: ast.StreamName("demo"),
 				StreamFields: []ast.StreamField{
 					{Name: "abc", FieldType: &ast.BasicType{Type: ast.FLOAT}},
-					{Name: "def", FieldType: &ast.BasicType{Type: ast.BOOLEAN}},
 				},
 			},
 			data:   []byte(`{"a": {"b" : "hello"}}`),

+ 266 - 36
internal/topo/source/file_source.go

@@ -15,38 +15,56 @@
 package source
 
 import (
+	"bufio"
+	"encoding/csv"
+	"encoding/json"
 	"errors"
 	"fmt"
 	"github.com/lf-edge/ekuiper/internal/conf"
-	"github.com/lf-edge/ekuiper/internal/pkg/filex"
+	"github.com/lf-edge/ekuiper/internal/xsql"
 	"github.com/lf-edge/ekuiper/pkg/api"
 	"github.com/lf-edge/ekuiper/pkg/cast"
+	"io"
 	"os"
-	"path"
 	"path/filepath"
+	"strconv"
+	"strings"
 	"time"
 )
 
 type FileType string
 
 const (
-	JSON_TYPE FileType = "json"
+	JSON_TYPE  FileType = "json"
+	CSV_TYPE   FileType = "csv"
+	LINES_TYPE FileType = "lines"
 )
 
-var fileTypes = map[FileType]bool{
-	JSON_TYPE: true,
+var fileTypes = map[FileType]struct{}{
+	JSON_TYPE:  {},
+	CSV_TYPE:   {},
+	LINES_TYPE: {},
 }
 
 type FileSourceConfig struct {
-	FileType   FileType `json:"fileType"`
-	Path       string   `json:"path"`
-	Interval   int      `json:"interval"`
-	RetainSize int      `json:"$retainSize"`
+	FileType         FileType `json:"fileType"`
+	Path             string   `json:"path"`
+	Interval         int      `json:"interval"`
+	IsTable          bool     `json:"isTable"`
+	SendInterval     int      `json:"sendInterval"`
+	ActionAfterRead  int      `json:"actionAfterRead"`
+	MoveTo           string   `json:"moveTo"`
+	HasHeader        bool     `json:"hasHeader"`
+	Columns          []string `json:"columns"`
+	IgnoreStartLines int      `json:"ignoreStartLines"`
+	IgnoreEndLines   int      `json:"ignoreEndLines"`
+	Delimiter        string   `json:"delimiter"`
 }
 
-// The BATCH to load data from file at once
+// FileSource The BATCH to load data from file at once
 type FileSource struct {
 	file   string
+	isDir  bool
 	config *FileSourceConfig
 }
 
@@ -57,7 +75,9 @@ func (fs *FileSource) Close(ctx api.StreamContext) error {
 }
 
 func (fs *FileSource) Configure(fileName string, props map[string]interface{}) error {
-	cfg := &FileSourceConfig{}
+	cfg := &FileSourceConfig{
+		FileType: JSON_TYPE,
+	}
 	err := cast.MapToStruct(props, cfg)
 	if err != nil {
 		return fmt.Errorf("read properties %v fail with error: %v", props, err)
@@ -71,9 +91,6 @@ func (fs *FileSource) Configure(fileName string, props map[string]interface{}) e
 	if cfg.Path == "" {
 		return errors.New("missing property Path")
 	}
-	if fileName == "" {
-		return errors.New("file name must be specified")
-	}
 	if !filepath.IsAbs(cfg.Path) {
 		cfg.Path, err = conf.GetLoc(cfg.Path)
 		if err != nil {
@@ -81,15 +98,49 @@ func (fs *FileSource) Configure(fileName string, props map[string]interface{}) e
 		}
 	}
 	if fileName != "/$$TEST_CONNECTION$$" {
-		fs.file = path.Join(cfg.Path, fileName)
-
-		if fi, err := os.Stat(fs.file); err != nil {
+		fs.file = filepath.Join(cfg.Path, fileName)
+		fi, err := os.Stat(fs.file)
+		if err != nil {
 			if os.IsNotExist(err) {
 				return fmt.Errorf("file %s not exist", fs.file)
-			} else if !fi.Mode().IsRegular() {
-				return fmt.Errorf("file %s is not a regular file", fs.file)
 			}
 		}
+		if fi.IsDir() {
+			fs.isDir = true
+		}
+	}
+	if cfg.IgnoreStartLines < 0 {
+		cfg.IgnoreStartLines = 0
+	}
+	if cfg.IgnoreEndLines < 0 {
+		cfg.IgnoreEndLines = 0
+	}
+	if cfg.ActionAfterRead < 0 || cfg.ActionAfterRead > 2 {
+		return fmt.Errorf("invalid actionAfterRead: %d", cfg.ActionAfterRead)
+	}
+	if cfg.ActionAfterRead == 2 {
+		if cfg.MoveTo == "" {
+			return fmt.Errorf("missing moveTo when actionAfterRead is 2")
+		} else {
+			if !filepath.IsAbs(cfg.MoveTo) {
+				cfg.MoveTo, err = conf.GetLoc(cfg.MoveTo)
+				if err != nil {
+					return fmt.Errorf("invalid moveTo %s: %v", cfg.MoveTo, err)
+				}
+			}
+			fileInfo, err := os.Stat(cfg.MoveTo)
+			if err != nil {
+				err := os.MkdirAll(cfg.MoveTo, os.ModePerm)
+				if err != nil {
+					return fmt.Errorf("fail to create dir for moveTo %s: %v", cfg.MoveTo, err)
+				}
+			} else if !fileInfo.IsDir() {
+				return fmt.Errorf("moveTo %s is not a directory", cfg.MoveTo)
+			}
+		}
+	}
+	if cfg.Delimiter == "" {
+		cfg.Delimiter = ","
 	}
 	fs.config = cfg
 	return nil
@@ -98,8 +149,12 @@ func (fs *FileSource) Configure(fileName string, props map[string]interface{}) e
 func (fs *FileSource) Open(ctx api.StreamContext, consumer chan<- api.SourceTuple, errCh chan<- error) {
 	err := fs.Load(ctx, consumer)
 	if err != nil {
-		errCh <- err
-		return
+		select {
+		case consumer <- &xsql.ErrorSourceTuple{Error: err}:
+			ctx.GetLogger().Errorf("find error when loading file %s with err %v", fs.file, err)
+		case <-ctx.Done():
+			return
+		}
 	}
 	if fs.config.Interval > 0 {
 		ticker := time.NewTicker(time.Millisecond * time.Duration(fs.config.Interval))
@@ -122,38 +177,213 @@ func (fs *FileSource) Open(ctx api.StreamContext, consumer chan<- api.SourceTupl
 }
 
 func (fs *FileSource) Load(ctx api.StreamContext, consumer chan<- api.SourceTuple) error {
+	if fs.isDir {
+		ctx.GetLogger().Debugf("Monitor dir %s", fs.file)
+		entries, err := os.ReadDir(fs.file)
+		if err != nil {
+			return err
+		}
+		for _, entry := range entries {
+			if entry.IsDir() {
+				continue
+			}
+			file := filepath.Join(fs.file, entry.Name())
+			err := fs.parseFile(ctx, file, consumer)
+			if err != nil {
+				ctx.GetLogger().Errorf("parse file %s fail with error: %v", file, err)
+				continue
+			}
+		}
+	} else {
+		err := fs.parseFile(ctx, fs.file, consumer)
+		if err != nil {
+			return err
+		}
+	}
+	// Send EOF if retain size not set if used in table
+	if fs.config.IsTable {
+		select {
+		case consumer <- api.NewDefaultSourceTuple(nil, nil):
+			// do nothing
+		case <-ctx.Done():
+			return nil
+		}
+	}
+	ctx.GetLogger().Debug("All tuples sent")
+	return nil
+}
+
+func (fs *FileSource) parseFile(ctx api.StreamContext, file string, consumer chan<- api.SourceTuple) (result error) {
+	r, err := fs.prepareFile(ctx, file)
+	if err != nil {
+		ctx.GetLogger().Debugf("prepare file %s error: %v", file, err)
+		return err
+	}
+	meta := map[string]interface{}{
+		"file": file,
+	}
+	defer func() {
+		ctx.GetLogger().Debugf("Finish loading from file %s", file)
+		if closer, ok := r.(io.Closer); ok {
+			ctx.GetLogger().Debugf("Close reader")
+			closer.Close()
+		}
+		if result == nil {
+			switch fs.config.ActionAfterRead {
+			case 1:
+				if err := os.Remove(file); err != nil {
+					result = err
+				}
+				ctx.GetLogger().Debugf("Remove file %s", file)
+			case 2:
+				targetFile := filepath.Join(fs.config.MoveTo, filepath.Base(file))
+				if err := os.Rename(file, targetFile); err != nil {
+					result = err
+				}
+				ctx.GetLogger().Debugf("Move file %s to %s", file, targetFile)
+			}
+		}
+	}()
+	if err := fs.publish(ctx, r, consumer, meta); err != nil {
+		return err
+	}
+	return nil
+}
+
+func (fs *FileSource) publish(ctx api.StreamContext, file io.Reader, consumer chan<- api.SourceTuple, meta map[string]interface{}) error {
+	ctx.GetLogger().Debug("Start to load")
 	switch fs.config.FileType {
 	case JSON_TYPE:
-		ctx.GetLogger().Debugf("Start to load from file %s", fs.file)
+		r := json.NewDecoder(file)
 		resultMap := make([]map[string]interface{}, 0)
-		err := filex.ReadJsonUnmarshal(fs.file, &resultMap)
+		err := r.Decode(&resultMap)
 		if err != nil {
 			return fmt.Errorf("loaded %s, check error %s", fs.file, err)
 		}
 		ctx.GetLogger().Debug("Sending tuples")
-		if fs.config.RetainSize > 0 && fs.config.RetainSize < len(resultMap) {
-			resultMap = resultMap[(len(resultMap) - fs.config.RetainSize):]
-			ctx.GetLogger().Debug("Sending tuples for retain size %d", fs.config.RetainSize)
-		}
 		for _, m := range resultMap {
 			select {
-			case consumer <- api.NewDefaultSourceTuple(m, nil):
-				// do nothing
+			case consumer <- api.NewDefaultSourceTuple(m, meta):
 			case <-ctx.Done():
 				return nil
 			}
+			if fs.config.SendInterval > 0 {
+				time.Sleep(time.Millisecond * time.Duration(fs.config.SendInterval))
+			}
+		}
+		return nil
+	case CSV_TYPE:
+		r := csv.NewReader(file)
+		r.Comma = rune(fs.config.Delimiter[0])
+		r.TrimLeadingSpace = true
+		r.FieldsPerRecord = -1
+		cols := fs.config.Columns
+		if fs.config.HasHeader {
+			var err error
+			ctx.GetLogger().Debug("Has header")
+			cols, err = r.Read()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				ctx.GetLogger().Warnf("Read file %s encounter error: %v", fs.file, err)
+				return err
+			}
+			ctx.GetLogger().Debugf("Got header %v", cols)
 		}
-		// Send EOF if retain size not set
-		if fs.config.RetainSize == 0 {
+		for {
+			record, err := r.Read()
+			if err == io.EOF {
+				break
+			}
+			if err != nil {
+				ctx.GetLogger().Warnf("Read file %s encounter error: %v", fs.file, err)
+				continue
+			}
+			ctx.GetLogger().Debugf("Read" + strings.Join(record, ","))
+			var m map[string]interface{}
+			if cols == nil {
+				m = make(map[string]interface{}, len(record))
+				for i, v := range record {
+					m["cols"+strconv.Itoa(i)] = v
+				}
+			} else {
+				m = make(map[string]interface{}, len(cols))
+				for i, v := range cols {
+					m[v] = record[i]
+				}
+			}
 			select {
-			case consumer <- api.NewDefaultSourceTuple(nil, nil):
-				// do nothing
+			case consumer <- api.NewDefaultSourceTuple(m, meta):
 			case <-ctx.Done():
 				return nil
 			}
+			if fs.config.SendInterval > 0 {
+				time.Sleep(time.Millisecond * time.Duration(fs.config.SendInterval))
+			}
 		}
-		ctx.GetLogger().Debug("All tuples sent")
-		return nil
+	default:
+		return fmt.Errorf("invalid file type %s", fs.config.FileType)
+	}
+	return nil
+}
+
+// prepareFile prepare file by deleting ignore lines
+func (fs *FileSource) prepareFile(ctx api.StreamContext, file string) (io.Reader, error) {
+	f, err := os.Open(file)
+	if err != nil {
+		ctx.GetLogger().Error(err)
+		return nil, err
+	}
+	if fs.config.IgnoreStartLines > 0 || fs.config.IgnoreEndLines > 0 {
+		r, w := io.Pipe()
+		go func() {
+			defer func() {
+				ctx.GetLogger().Debugf("Close pipe files %s", file)
+				w.Close()
+				f.Close()
+			}()
+			scanner := bufio.NewScanner(f)
+			scanner.Split(bufio.ScanLines)
+			ln := 0
+			// This is a queue to store the lines that should be ignored
+			tempLines := make([]string, 0, fs.config.IgnoreEndLines)
+			for scanner.Scan() {
+				if ln >= fs.config.IgnoreStartLines {
+					if fs.config.IgnoreEndLines > 0 { // the last n line are left in the tempLines
+						slot := (ln - fs.config.IgnoreStartLines) % fs.config.IgnoreEndLines
+						if len(tempLines) <= slot { // first round
+							tempLines = append(tempLines, scanner.Text())
+						} else {
+							_, err := w.Write([]byte(tempLines[slot]))
+							if err != nil {
+								ctx.GetLogger().Error(err)
+								break
+							}
+							_, err = w.Write([]byte{'\n'})
+							if err != nil {
+								ctx.GetLogger().Error(err)
+								break
+							}
+							tempLines[slot] = scanner.Text()
+						}
+					} else {
+						_, err = w.Write(scanner.Bytes())
+						if err != nil {
+							ctx.GetLogger().Error(err)
+							break
+						}
+						_, err = w.Write([]byte{'\n'})
+						if err != nil {
+							ctx.GetLogger().Error(err)
+							break
+						}
+					}
+				}
+				ln++
+			}
+		}()
+		return r, nil
 	}
-	return fmt.Errorf("invalid file type %s", fs.config.FileType)
+	return f, nil
 }

+ 196 - 0
internal/topo/source/file_source_test.go

@@ -0,0 +1,196 @@
+// Copyright 2022 EMQ Technologies Co., Ltd.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package source
+
+import (
+	"fmt"
+	"github.com/lf-edge/ekuiper/internal/topo/mock"
+	"github.com/lf-edge/ekuiper/pkg/api"
+	"io"
+	"os"
+	"path/filepath"
+	"testing"
+	"time"
+)
+
+func TestJsonFile(t *testing.T) {
+	path, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	meta := map[string]interface{}{
+		"file": filepath.Join(path, "test", "test.json"),
+	}
+	exp := []api.SourceTuple{
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": float64(1), "name": "John Doe"}, meta),
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": float64(2), "name": "Jane Doe"}, meta),
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": float64(3), "name": "John Smith"}, meta),
+	}
+	p := map[string]interface{}{
+		"path": filepath.Join(path, "test"),
+	}
+	r := &FileSource{}
+	err = r.Configure("test.json", p)
+	if err != nil {
+		t.Errorf(err.Error())
+		return
+	}
+	mock.TestSourceOpen(r, exp, t)
+}
+
+func TestJsonFolder(t *testing.T) {
+	path, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	moveToFolder := filepath.Join(path, "test", "moveTo")
+	exp := []api.SourceTuple{
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": float64(1), "name": "John Doe", "height": 1.82}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f1.json")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": float64(2), "name": "Jane Doe", "height": 1.65}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f1.json")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": float64(3), "name": "Will Doe", "height": 1.76}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f2.json")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": float64(4), "name": "Dude Doe", "height": 1.92}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f3.json")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": float64(5), "name": "Jane Doe", "height": 1.72}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f3.json")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": float64(6), "name": "John Smith", "height": 2.22}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f3.json")}),
+	}
+	p := map[string]interface{}{
+		"path":            filepath.Join(path, "test"),
+		"actionAfterRead": 2,
+		"moveTo":          moveToFolder,
+	}
+	r := &FileSource{}
+	err = r.Configure("json", p)
+	if err != nil {
+		t.Errorf(err.Error())
+		return
+	}
+	mock.TestSourceOpen(r, exp, t)
+	files, err := os.ReadDir(moveToFolder)
+	if err != nil {
+		t.Error(err)
+	}
+	if len(files) != 3 {
+		t.Errorf("expect 3 files in moveTo folder, but got %d", len(files))
+	}
+	for _, f := range files {
+		os.Rename(filepath.Join(moveToFolder, f.Name()), filepath.Join(path, "test", "json", f.Name()))
+	}
+}
+
+func TestCSVFolder(t *testing.T) {
+	// Move test files to temp folder
+	path, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	testFolder := filepath.Join(path, "test", "csvTemp")
+	err = os.MkdirAll(testFolder, 0755)
+	if err != nil {
+		t.Fatal(err)
+	}
+	files, err := os.ReadDir(filepath.Join(path, "test", "csv"))
+	if err != nil {
+		t.Fatal(err)
+	}
+	for _, f := range files {
+		err = copy(filepath.Join(path, "test", "csv", f.Name()), filepath.Join(testFolder, f.Name()))
+		if err != nil {
+			t.Fatal(err)
+		}
+	}
+	// Start testing
+	exp := []api.SourceTuple{
+		api.NewDefaultSourceTuple(map[string]interface{}{"@": "#", "id": "1", "ts": "1670170500", "value": "161.927872"}, map[string]interface{}{"file": filepath.Join(path, "test", "csvTemp", "a.csv")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"@": "#", "id": "2", "ts": "1670170900", "value": "176"}, map[string]interface{}{"file": filepath.Join(path, "test", "csvTemp", "a.csv")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": "33", "ts": "1670270500", "humidity": "89"}, map[string]interface{}{"file": filepath.Join(path, "test", "csvTemp", "b.csv")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"id": "44", "ts": "1670270900", "humidity": "76"}, map[string]interface{}{"file": filepath.Join(path, "test", "csvTemp", "b.csv")}),
+	}
+	p := map[string]interface{}{
+		"fileType":         "csv",
+		"path":             filepath.Join(path, "test"),
+		"actionAfterRead":  1,
+		"hasHeader":        true,
+		"delimiter":        "\t",
+		"ignoreStartLines": 3,
+		"ignoreEndLines":   1,
+	}
+	r := &FileSource{}
+	err = r.Configure("csvTemp", p)
+	if err != nil {
+		t.Errorf(err.Error())
+		return
+	}
+	mock.TestSourceOpen(r, exp, t)
+	// wait for file deleted takes effect
+	time.Sleep(100 * time.Millisecond)
+	files, err = os.ReadDir(testFolder)
+	if err != nil {
+		t.Error(err)
+	}
+	if len(files) != 0 {
+		t.Errorf("expect 0 files in csvTemp folder, but got %d", len(files))
+	}
+}
+
+func copy(src, dst string) error {
+	sourceFileStat, err := os.Stat(src)
+	if err != nil {
+		return err
+	}
+
+	if !sourceFileStat.Mode().IsRegular() {
+		return fmt.Errorf("%s is not a regular file", src)
+	}
+
+	source, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer source.Close()
+
+	destination, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer destination.Close()
+	_, err = io.Copy(destination, source)
+	return err
+}
+
+func TestCSVFile(t *testing.T) {
+	path, err := os.Getwd()
+	if err != nil {
+		t.Fatal(err)
+	}
+	exp := []api.SourceTuple{
+		api.NewDefaultSourceTuple(map[string]interface{}{"ns": "@", "id": "id", "ts": "ts", "number": "value"}, map[string]interface{}{"file": filepath.Join(path, "test", "csv", "a.csv")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"ns": "#", "id": "1", "ts": "1670170500", "number": "161.927872"}, map[string]interface{}{"file": filepath.Join(path, "test", "csv", "a.csv")}),
+		api.NewDefaultSourceTuple(map[string]interface{}{"ns": "#", "id": "2", "ts": "1670170900", "number": "176"}, map[string]interface{}{"file": filepath.Join(path, "test", "csv", "a.csv")}),
+	}
+	p := map[string]interface{}{
+		"fileType":         "csv",
+		"path":             filepath.Join(path, "test", "csv"),
+		"delimiter":        "\t",
+		"ignoreStartLines": 3,
+		"ignoreEndLines":   1,
+		"columns":          []string{"ns", "id", "ts", "number"},
+	}
+	r := &FileSource{}
+	err = r.Configure("a.csv", p)
+	if err != nil {
+		t.Errorf(err.Error())
+		return
+	}
+	mock.TestSourceOpen(r, exp, t)
+}

+ 7 - 0
internal/topo/source/test/csv/a.csv

@@ -0,0 +1,7 @@
+//	some irrelevant content
+<!	Version=1.0	Code=UTF-8	!>
+<special content>
+@	id	ts	value
+#	1	1670170500	161.927872
+#	2	1670170900	176
+<special content>

+ 7 - 0
internal/topo/source/test/csv/b.csv

@@ -0,0 +1,7 @@
+//	some irrelevant content
+<!	Version=1.0	Code=UTF-8	!>
+<special content>
+id	ts	humidity
+33	1670270500	89
+44	1670270900	76
+<special content>

+ 12 - 0
internal/topo/source/test/json/f1.json

@@ -0,0 +1,12 @@
+[
+  {
+    "id": 1,
+    "name": "John Doe",
+    "height": 1.82
+  },
+  {
+    "id": 2,
+    "name": "Jane Doe",
+    "height": 1.65
+  }
+]

+ 7 - 0
internal/topo/source/test/json/f2.json

@@ -0,0 +1,7 @@
+[
+  {
+    "id": 3,
+    "name": "Will Doe",
+    "height": 1.76
+  }
+]

+ 17 - 0
internal/topo/source/test/json/f3.json

@@ -0,0 +1,17 @@
+[
+  {
+    "id": 4,
+    "name": "Dude Doe",
+    "height": 1.92
+  },
+  {
+    "id": 5,
+    "name": "Jane Doe",
+    "height": 1.72
+  },
+  {
+    "id": 6,
+    "name": "John Smith",
+    "height": 2.22
+  }
+]

+ 14 - 0
internal/topo/source/test/test.json

@@ -0,0 +1,14 @@
+[
+  {
+    "id": 1,
+    "name": "John Doe"
+  },
+  {
+    "id": 2,
+    "name": "Jane Doe"
+  },
+  {
+    "id": 3,
+    "name": "John Smith"
+  }
+]

+ 2 - 0
internal/topo/topotest/mock_topo.go

@@ -180,6 +180,8 @@ func sendData(t *testing.T, dataLength int, metrics map[string]interface{}, data
 	mockClock.Add(0)
 	// TODO assume multiple data source send the data in order and has the same length
 	for i := 0; i < dataLength; i++ {
+		// wait for table to load
+		time.Sleep(100 * time.Millisecond)
 		for _, d := range datas {
 			time.Sleep(time.Duration(wait) * time.Millisecond)
 			// Make sure time is going forward only