file_source_test.go 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281
  1. // Copyright 2022-2023 EMQ Technologies Co., Ltd.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package file
  15. import (
  16. "fmt"
  17. "io"
  18. "os"
  19. "path/filepath"
  20. "testing"
  21. "time"
  22. "github.com/benbjohnson/clock"
  23. "github.com/stretchr/testify/assert"
  24. "github.com/lf-edge/ekuiper/internal/conf"
  25. "github.com/lf-edge/ekuiper/internal/io/mock"
  26. "github.com/lf-edge/ekuiper/pkg/api"
  27. )
  28. func TestJsonFile(t *testing.T) {
  29. path, err := os.Getwd()
  30. if err != nil {
  31. t.Fatal(err)
  32. }
  33. meta := map[string]interface{}{
  34. "file": filepath.Join(path, "test", "test.json"),
  35. }
  36. mc := conf.Clock.(*clock.Mock)
  37. exp := []api.SourceTuple{
  38. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(1), "name": "John Doe"}, meta, mc.Now()),
  39. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(2), "name": "Jane Doe"}, meta, mc.Now()),
  40. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(3), "name": "John Smith"}, meta, mc.Now()),
  41. }
  42. p := map[string]interface{}{
  43. "path": filepath.Join(path, "test"),
  44. }
  45. r := &FileSource{}
  46. err = r.Configure("test.json", p)
  47. if err != nil {
  48. t.Errorf(err.Error())
  49. return
  50. }
  51. mock.TestSourceOpen(r, exp, t)
  52. }
  53. func TestJsonFolder(t *testing.T) {
  54. path, err := os.Getwd()
  55. if err != nil {
  56. t.Fatal(err)
  57. }
  58. mc := conf.Clock.(*clock.Mock)
  59. moveToFolder := filepath.Join(path, "test", "moveTo")
  60. exp := []api.SourceTuple{
  61. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(1), "name": "John Doe", "height": 1.82}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f1.json")}, mc.Now()),
  62. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(2), "name": "Jane Doe", "height": 1.65}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f1.json")}, mc.Now()),
  63. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(3), "name": "Will Doe", "height": 1.76}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f2.json")}, mc.Now()),
  64. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(4), "name": "Dude Doe", "height": 1.92}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f3.json")}, mc.Now()),
  65. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(5), "name": "Jane Doe", "height": 1.72}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f3.json")}, mc.Now()),
  66. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(6), "name": "John Smith", "height": 2.22}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f3.json")}, mc.Now()),
  67. }
  68. p := map[string]interface{}{
  69. "path": filepath.Join(path, "test"),
  70. "actionAfterRead": 2,
  71. "moveTo": moveToFolder,
  72. }
  73. r := &FileSource{}
  74. err = r.Configure("json", p)
  75. if err != nil {
  76. t.Errorf(err.Error())
  77. return
  78. }
  79. mock.TestSourceOpen(r, exp, t)
  80. // wait for the move to finish
  81. time.Sleep(100 * time.Millisecond)
  82. files, err := os.ReadDir(moveToFolder)
  83. if err != nil {
  84. t.Error(err)
  85. }
  86. if len(files) != 3 {
  87. t.Errorf("expect 3 files in moveTo folder, but got %d", len(files))
  88. }
  89. for _, f := range files {
  90. os.Rename(filepath.Join(moveToFolder, f.Name()), filepath.Join(path, "test", "json", f.Name()))
  91. }
  92. }
  93. func TestJsonFolderParallel(t *testing.T) {
  94. path, err := os.Getwd()
  95. if err != nil {
  96. t.Fatal(err)
  97. }
  98. mc := conf.Clock.(*clock.Mock)
  99. exp := []api.SourceTuple{
  100. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(1), "name": "John Doe", "height": 1.82}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f1.json")}, mc.Now()),
  101. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(2), "name": "Jane Doe", "height": 1.65}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f1.json")}, mc.Now()),
  102. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(3), "name": "Will Doe", "height": 1.76}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f2.json")}, mc.Now()),
  103. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(4), "name": "Dude Doe", "height": 1.92}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f3.json")}, mc.Now()),
  104. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(5), "name": "Jane Doe", "height": 1.72}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f3.json")}, mc.Now()),
  105. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(6), "name": "John Smith", "height": 2.22}, map[string]interface{}{"file": filepath.Join(path, "test", "json", "f3.json")}, mc.Now()),
  106. }
  107. p := map[string]interface{}{
  108. "path": filepath.Join(path, "test"),
  109. "parallel": true,
  110. }
  111. r := &FileSource{}
  112. err = r.Configure("json", p)
  113. if err != nil {
  114. t.Errorf(err.Error())
  115. return
  116. }
  117. result, err := mock.RunMockSource(r, len(exp))
  118. if err != nil {
  119. t.Errorf(err.Error())
  120. return
  121. }
  122. checkIds := make([]bool, len(exp))
  123. // The result is not ordered, so we need to check the ids
  124. for i, m := range result {
  125. id, ok := m.Message()["id"]
  126. if !ok {
  127. t.Errorf("missing id in message %d: %v", i, r)
  128. }
  129. idInt := int(id.(float64)) - 1
  130. if checkIds[idInt] == true {
  131. t.Errorf("id %d already exists", idInt)
  132. }
  133. checkIds[idInt] = true
  134. assert.Equal(t, exp[idInt], m)
  135. }
  136. }
  137. func TestCSVFolder(t *testing.T) {
  138. // Move test files to temp folder
  139. path, err := os.Getwd()
  140. if err != nil {
  141. t.Fatal(err)
  142. }
  143. testFolder := filepath.Join(path, "test", "csvTemp")
  144. err = os.MkdirAll(testFolder, 0o755)
  145. if err != nil {
  146. t.Fatal(err)
  147. }
  148. files, err := os.ReadDir(filepath.Join(path, "test", "csv"))
  149. if err != nil {
  150. t.Fatal(err)
  151. }
  152. for _, f := range files {
  153. err = copy(filepath.Join(path, "test", "csv", f.Name()), filepath.Join(testFolder, f.Name()))
  154. if err != nil {
  155. t.Fatal(err)
  156. }
  157. }
  158. mc := conf.Clock.(*clock.Mock)
  159. // Start testing
  160. exp := []api.SourceTuple{
  161. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"@": "#", "id": "1", "ts": "1670170500", "value": "161.927872"}, map[string]interface{}{"file": filepath.Join(path, "test", "csvTemp", "a.csv")}, mc.Now()),
  162. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"@": "#", "id": "2", "ts": "1670170900", "value": "176"}, map[string]interface{}{"file": filepath.Join(path, "test", "csvTemp", "a.csv")}, mc.Now()),
  163. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": "33", "ts": "1670270500", "humidity": "89"}, map[string]interface{}{"file": filepath.Join(path, "test", "csvTemp", "b.csv")}, mc.Now()),
  164. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": "44", "ts": "1670270900", "humidity": "76"}, map[string]interface{}{"file": filepath.Join(path, "test", "csvTemp", "b.csv")}, mc.Now()),
  165. }
  166. p := map[string]interface{}{
  167. "fileType": "csv",
  168. "path": filepath.Join(path, "test"),
  169. "actionAfterRead": 1,
  170. "hasHeader": true,
  171. "delimiter": "\t",
  172. "ignoreStartLines": 3,
  173. "ignoreEndLines": 1,
  174. }
  175. r := &FileSource{}
  176. err = r.Configure("csvTemp", p)
  177. if err != nil {
  178. t.Errorf(err.Error())
  179. return
  180. }
  181. mock.TestSourceOpen(r, exp, t)
  182. // wait for file deleted takes effect
  183. time.Sleep(100 * time.Millisecond)
  184. files, err = os.ReadDir(testFolder)
  185. if err != nil {
  186. t.Error(err)
  187. }
  188. if len(files) != 0 {
  189. t.Errorf("expect 0 files in csvTemp folder, but got %d", len(files))
  190. }
  191. }
  192. func copy(src, dst string) error {
  193. sourceFileStat, err := os.Stat(src)
  194. if err != nil {
  195. return err
  196. }
  197. if !sourceFileStat.Mode().IsRegular() {
  198. return fmt.Errorf("%s is not a regular file", src)
  199. }
  200. source, err := os.Open(src)
  201. if err != nil {
  202. return err
  203. }
  204. defer source.Close()
  205. destination, err := os.Create(dst)
  206. if err != nil {
  207. return err
  208. }
  209. defer destination.Close()
  210. _, err = io.Copy(destination, source)
  211. return err
  212. }
  213. func TestCSVFile(t *testing.T) {
  214. path, err := os.Getwd()
  215. if err != nil {
  216. t.Fatal(err)
  217. }
  218. mc := conf.Clock.(*clock.Mock)
  219. exp := []api.SourceTuple{
  220. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"ns": "@", "id": "id", "ts": "ts", "number": "value"}, map[string]interface{}{"file": filepath.Join(path, "test", "csv", "a.csv")}, mc.Now()),
  221. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"ns": "#", "id": "1", "ts": "1670170500", "number": "161.927872"}, map[string]interface{}{"file": filepath.Join(path, "test", "csv", "a.csv")}, mc.Now()),
  222. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"ns": "#", "id": "2", "ts": "1670170900", "number": "176"}, map[string]interface{}{"file": filepath.Join(path, "test", "csv", "a.csv")}, mc.Now()),
  223. }
  224. p := map[string]interface{}{
  225. "fileType": "csv",
  226. "path": filepath.Join(path, "test", "csv"),
  227. "delimiter": "\t",
  228. "ignoreStartLines": 3,
  229. "ignoreEndLines": 1,
  230. "columns": []string{"ns", "id", "ts", "number"},
  231. }
  232. r := &FileSource{}
  233. err = r.Configure("a.csv", p)
  234. if err != nil {
  235. t.Errorf(err.Error())
  236. return
  237. }
  238. mock.TestSourceOpen(r, exp, t)
  239. }
  240. func TestJsonLines(t *testing.T) {
  241. path, err := os.Getwd()
  242. if err != nil {
  243. t.Fatal(err)
  244. }
  245. meta := map[string]interface{}{
  246. "file": filepath.Join(path, "test", "test.lines"),
  247. }
  248. mc := conf.Clock.(*clock.Mock)
  249. exp := []api.SourceTuple{
  250. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(1), "name": "John Doe"}, meta, mc.Now()),
  251. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(2), "name": "Jane Doe"}, meta, mc.Now()),
  252. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(3), "name": "John Smith"}, meta, mc.Now()),
  253. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(4), "name": "John Smith"}, meta, mc.Now()),
  254. api.NewDefaultSourceTupleWithTime(map[string]interface{}{"id": float64(5), "name": "John Smith"}, meta, mc.Now()),
  255. }
  256. p := map[string]interface{}{
  257. "path": filepath.Join(path, "test"),
  258. "fileType": "lines",
  259. }
  260. r := &FileSource{}
  261. err = r.Configure("test.lines", p)
  262. if err != nil {
  263. t.Errorf(err.Error())
  264. return
  265. }
  266. mock.TestSourceOpen(r, exp, t)
  267. }