sink_node.go 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597
  1. // Copyright 2022-2023 EMQ Technologies Co., Ltd.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package node
  15. import (
  16. "fmt"
  17. "strings"
  18. "sync"
  19. "github.com/lf-edge/ekuiper/internal/binder/io"
  20. "github.com/lf-edge/ekuiper/internal/conf"
  21. sinkUtil "github.com/lf-edge/ekuiper/internal/io/sink"
  22. "github.com/lf-edge/ekuiper/internal/topo/context"
  23. "github.com/lf-edge/ekuiper/internal/topo/node/cache"
  24. nodeConf "github.com/lf-edge/ekuiper/internal/topo/node/conf"
  25. "github.com/lf-edge/ekuiper/internal/topo/node/metric"
  26. "github.com/lf-edge/ekuiper/internal/topo/transform"
  27. "github.com/lf-edge/ekuiper/internal/xsql"
  28. "github.com/lf-edge/ekuiper/pkg/api"
  29. "github.com/lf-edge/ekuiper/pkg/cast"
  30. "github.com/lf-edge/ekuiper/pkg/errorx"
  31. "github.com/lf-edge/ekuiper/pkg/infra"
  32. "github.com/lf-edge/ekuiper/pkg/message"
  33. )
  34. type SinkConf struct {
  35. Concurrency int `json:"concurrency"`
  36. Omitempty bool `json:"omitIfEmpty"`
  37. SendSingle bool `json:"sendSingle"`
  38. DataTemplate string `json:"dataTemplate"`
  39. Format string `json:"format"`
  40. SchemaId string `json:"schemaId"`
  41. Delimiter string `json:"delimiter"`
  42. BufferLength int `json:"bufferLength"`
  43. Fields []string `json:"fields"`
  44. DataField string `json:"dataField"`
  45. BatchSize int `json:"batchSize"`
  46. LingerInterval int `json:"lingerInterval"`
  47. conf.SinkConf
  48. }
  49. func (sc *SinkConf) isBatchSinkEnabled() bool {
  50. if sc.BatchSize > 0 || sc.LingerInterval > 0 {
  51. return true
  52. }
  53. return false
  54. }
  55. type SinkNode struct {
  56. *defaultSinkNode
  57. // static
  58. sinkType string
  59. mutex sync.RWMutex
  60. // configs (also static for sinks)
  61. options map[string]interface{}
  62. isMock bool
  63. // states varies after restart
  64. sinks []api.Sink
  65. }
  66. func NewSinkNode(name string, sinkType string, props map[string]interface{}) *SinkNode {
  67. bufferLength := 1024
  68. if c, ok := props["bufferLength"]; ok {
  69. if t, err := cast.ToInt(c, cast.STRICT); err != nil || t <= 0 {
  70. // invalid property bufferLength
  71. } else {
  72. bufferLength = t
  73. }
  74. }
  75. return &SinkNode{
  76. defaultSinkNode: &defaultSinkNode{
  77. input: make(chan interface{}, bufferLength),
  78. defaultNode: &defaultNode{
  79. name: name,
  80. concurrency: 1,
  81. ctx: nil,
  82. },
  83. },
  84. sinkType: sinkType,
  85. options: props,
  86. }
  87. }
  88. // NewSinkNodeWithSink Only for mock source, do not use it in production
  89. func NewSinkNodeWithSink(name string, sink api.Sink, props map[string]interface{}) *SinkNode {
  90. return &SinkNode{
  91. defaultSinkNode: &defaultSinkNode{
  92. input: make(chan interface{}, 1024),
  93. defaultNode: &defaultNode{
  94. name: name,
  95. concurrency: 1,
  96. ctx: nil,
  97. },
  98. },
  99. sinks: []api.Sink{sink},
  100. options: props,
  101. isMock: true,
  102. }
  103. }
  104. func (m *SinkNode) Open(ctx api.StreamContext, result chan<- error) {
  105. m.ctx = ctx
  106. logger := ctx.GetLogger()
  107. logger.Debugf("open sink node %s", m.name)
  108. go func() {
  109. err := infra.SafeRun(func() error {
  110. sconf, err := m.parseConf(logger)
  111. if err != nil {
  112. return err
  113. }
  114. tf, err := transform.GenTransform(sconf.DataTemplate, sconf.Format, sconf.SchemaId, sconf.Delimiter, sconf.DataField, sconf.Fields)
  115. if err != nil {
  116. msg := fmt.Sprintf("property dataTemplate %v is invalid: %v", sconf.DataTemplate, err)
  117. logger.Warnf(msg)
  118. return fmt.Errorf(msg)
  119. }
  120. ctx = context.WithValue(ctx.(*context.DefaultContext), context.TransKey, tf)
  121. m.reset()
  122. logger.Infof("open sink node %d instances", m.concurrency)
  123. for i := 0; i < m.concurrency; i++ { // workers
  124. go func(instance int) {
  125. panicOrError := infra.SafeRun(func() error {
  126. var (
  127. sink api.Sink
  128. err error
  129. )
  130. if !m.isMock {
  131. logger.Debugf("Trying to get sink for rule %s with options %v\n", ctx.GetRuleId(), m.options)
  132. sink, err = getSink(m.sinkType, m.options)
  133. if err != nil {
  134. return err
  135. }
  136. logger.Debugf("Successfully get the sink %s", m.sinkType)
  137. m.mutex.Lock()
  138. m.sinks = append(m.sinks, sink)
  139. m.mutex.Unlock()
  140. logger.Debugf("Now is to open sink for rule %s.\n", ctx.GetRuleId())
  141. if err := sink.Open(ctx); err != nil {
  142. return err
  143. }
  144. logger.Debugf("Successfully open sink for rule %s.\n", ctx.GetRuleId())
  145. } else {
  146. sink = m.sinks[instance]
  147. }
  148. stats, err := metric.NewStatManager(ctx, "sink")
  149. if err != nil {
  150. return err
  151. }
  152. m.mutex.Lock()
  153. m.statManagers = append(m.statManagers, stats)
  154. m.mutex.Unlock()
  155. var sendManager *sinkUtil.SendManager
  156. if sconf.isBatchSinkEnabled() {
  157. sendManager, err = sinkUtil.NewSendManager(sconf.BatchSize, sconf.LingerInterval)
  158. if err != nil {
  159. return err
  160. }
  161. go sendManager.Run(ctx)
  162. go doCollectDataInBatch(ctx, sink, sendManager, stats)
  163. }
  164. if !sconf.EnableCache {
  165. for {
  166. select {
  167. case data := <-m.input:
  168. processed := false
  169. if data, processed = m.preprocess(data); processed {
  170. break
  171. }
  172. stats.SetBufferLength(int64(len(m.input)))
  173. stats.IncTotalRecordsIn()
  174. err := doCollect(ctx, sink, data, sendManager, stats, sconf)
  175. if err != nil {
  176. logger.Warnf("sink collect error: %v", err)
  177. }
  178. case <-ctx.Done():
  179. logger.Infof("sink node %s instance %d done", m.name, instance)
  180. if err := sink.Close(ctx); err != nil {
  181. logger.Warnf("close sink node %s instance %d fails: %v", m.name, instance, err)
  182. }
  183. return nil
  184. }
  185. }
  186. } else {
  187. logger.Infof("Creating sink cache")
  188. dataCh := make(chan []map[string]interface{}, sconf.BufferLength)
  189. // cache for normal sink data
  190. c := cache.NewSyncCache(ctx, dataCh, result, &sconf.SinkConf, sconf.BufferLength)
  191. // One cache queue to mix live data and resend data. Send by time order
  192. if !sconf.ResendAlterQueue {
  193. // sync mode, the ack is already in order
  194. for {
  195. select {
  196. case data := <-m.input:
  197. processed := false
  198. if data, processed = m.preprocess(data); processed {
  199. break
  200. }
  201. stats.IncTotalRecordsIn()
  202. stats.SetBufferLength(int64(len(dataCh) + c.CacheLength))
  203. outs := itemToMap(data)
  204. if sconf.Omitempty && (data == nil || len(outs) == 0) {
  205. ctx.GetLogger().Debugf("receive empty in sink")
  206. break
  207. }
  208. select {
  209. case dataCh <- outs:
  210. case <-ctx.Done():
  211. }
  212. case data := <-c.Out:
  213. stats.ProcessTimeStart()
  214. stats.SetBufferLength(int64(len(dataCh) + c.CacheLength))
  215. err := doCollectMaps(ctx, sink, sconf, data, sendManager, stats, false)
  216. ack := checkAck(ctx, data, err)
  217. select {
  218. case c.Ack <- ack:
  219. case <-ctx.Done():
  220. }
  221. stats.ProcessTimeEnd()
  222. case <-ctx.Done():
  223. logger.Infof("sink node %s instance %d done", m.name, instance)
  224. if err := sink.Close(ctx); err != nil {
  225. logger.Warnf("close sink node %s instance %d fails: %v", m.name, instance, err)
  226. }
  227. return nil
  228. }
  229. }
  230. } else {
  231. resendCh := make(chan []map[string]interface{}, sconf.BufferLength)
  232. rq := cache.NewSyncCache(ctx, resendCh, result, &sconf.SinkConf, sconf.BufferLength)
  233. receiveQ := func(data interface{}) {
  234. processed := false
  235. if data, processed = m.preprocess(data); processed {
  236. return
  237. }
  238. stats.IncTotalRecordsIn()
  239. stats.SetBufferLength(int64(len(dataCh) + c.CacheLength + rq.CacheLength))
  240. outs := itemToMap(data)
  241. if sconf.Omitempty && (data == nil || len(outs) == 0) {
  242. ctx.GetLogger().Debugf("receive empty in sink")
  243. return
  244. }
  245. select {
  246. case dataCh <- outs:
  247. case <-ctx.Done():
  248. }
  249. select {
  250. case resendCh <- nil:
  251. case <-ctx.Done():
  252. }
  253. }
  254. normalQ := func(data []map[string]interface{}) {
  255. stats.ProcessTimeStart()
  256. stats.SetBufferLength(int64(len(dataCh) + c.CacheLength + rq.CacheLength))
  257. ctx.GetLogger().Debugf("sending data: %v", data)
  258. err := doCollectMaps(ctx, sink, sconf, data, sendManager, stats, false)
  259. ack := checkAck(ctx, data, err)
  260. // If ack is false, add it to the resend queue
  261. if !ack {
  262. select {
  263. case resendCh <- data:
  264. case <-ctx.Done():
  265. }
  266. }
  267. // Always ack for the normal queue as fail items are handled by the resend queue
  268. select {
  269. case c.Ack <- true:
  270. case <-ctx.Done():
  271. }
  272. stats.ProcessTimeEnd()
  273. }
  274. resendQ := func(data []map[string]interface{}) {
  275. ctx.GetLogger().Debugf("resend data: %v", data)
  276. stats.SetBufferLength(int64(len(dataCh) + c.CacheLength + rq.CacheLength))
  277. if sconf.ResendIndicatorField != "" {
  278. for _, item := range data {
  279. item[sconf.ResendIndicatorField] = true
  280. }
  281. }
  282. err := doCollectMaps(ctx, sink, sconf, data, sendManager, stats, true)
  283. ack := checkAck(ctx, data, err)
  284. select {
  285. case rq.Ack <- ack:
  286. case <-ctx.Done():
  287. }
  288. }
  289. doneQ := func() {
  290. logger.Infof("sink node %s instance %d done", m.name, instance)
  291. if err := sink.Close(ctx); err != nil {
  292. logger.Warnf("close sink node %s instance %d fails: %v", m.name, instance, err)
  293. }
  294. }
  295. if sconf.ResendPriority == 0 {
  296. for {
  297. select {
  298. case data := <-m.input:
  299. receiveQ(data)
  300. case data := <-c.Out:
  301. normalQ(data)
  302. case data := <-rq.Out:
  303. resendQ(data)
  304. case <-ctx.Done():
  305. doneQ()
  306. return nil
  307. }
  308. }
  309. } else if sconf.ResendPriority < 0 { // normal queue has higher priority
  310. for {
  311. select {
  312. case data := <-m.input:
  313. receiveQ(data)
  314. case data := <-c.Out:
  315. normalQ(data)
  316. case <-ctx.Done():
  317. doneQ()
  318. return nil
  319. default:
  320. select {
  321. case data := <-c.Out:
  322. normalQ(data)
  323. case data := <-rq.Out:
  324. resendQ(data)
  325. }
  326. }
  327. }
  328. } else {
  329. for {
  330. select {
  331. case data := <-m.input:
  332. receiveQ(data)
  333. case data := <-rq.Out:
  334. resendQ(data)
  335. case <-ctx.Done():
  336. doneQ()
  337. return nil
  338. default:
  339. select {
  340. case data := <-c.Out:
  341. normalQ(data)
  342. case data := <-rq.Out:
  343. resendQ(data)
  344. }
  345. }
  346. }
  347. }
  348. }
  349. }
  350. })
  351. if panicOrError != nil {
  352. infra.DrainError(ctx, panicOrError, result)
  353. }
  354. }(i)
  355. }
  356. return nil
  357. })
  358. if err != nil {
  359. infra.DrainError(ctx, err, result)
  360. }
  361. }()
  362. }
  363. func checkAck(ctx api.StreamContext, data interface{}, err error) bool {
  364. if err != nil {
  365. if strings.HasPrefix(err.Error(), errorx.IOErr) { // do not log to prevent a lot of logs!
  366. return false
  367. } else {
  368. ctx.GetLogger().Warnf("sink node %s instance %d publish %s error: %v", ctx.GetOpId(), ctx.GetInstanceId(), data, err)
  369. }
  370. } else {
  371. ctx.GetLogger().Debugf("sent data: %v", data)
  372. }
  373. return true
  374. }
  375. func (m *SinkNode) parseConf(logger api.Logger) (*SinkConf, error) {
  376. sconf := &SinkConf{
  377. Concurrency: 1,
  378. Omitempty: false,
  379. SendSingle: false,
  380. DataTemplate: "",
  381. SinkConf: *conf.Config.Sink,
  382. BufferLength: 1024,
  383. }
  384. err := cast.MapToStruct(m.options, sconf)
  385. if err != nil {
  386. return nil, fmt.Errorf("read properties %v fail with error: %v", m.options, err)
  387. }
  388. if sconf.Concurrency <= 0 {
  389. logger.Warnf("invalid type for concurrency property, should be positive integer but found %t", sconf.Concurrency)
  390. sconf.Concurrency = 1
  391. }
  392. m.concurrency = sconf.Concurrency
  393. if sconf.Format == "" {
  394. sconf.Format = "json"
  395. } else if sconf.Format != message.FormatJson && sconf.Format != message.FormatProtobuf && sconf.Format != message.FormatBinary && sconf.Format != message.FormatCustom && sconf.Format != message.FormatDelimited {
  396. logger.Warnf("invalid type for format property, should be json protobuf or binary but found %s", sconf.Format)
  397. sconf.Format = "json"
  398. }
  399. err = cast.MapToStruct(m.options, &sconf.SinkConf)
  400. if err != nil {
  401. return nil, fmt.Errorf("read properties %v to cache conf fail with error: %v", m.options, err)
  402. }
  403. if sconf.DataField == "" {
  404. if v, ok := m.options["tableDataField"]; ok {
  405. sconf.DataField = v.(string)
  406. }
  407. }
  408. err = sconf.SinkConf.Validate()
  409. if err != nil {
  410. return nil, fmt.Errorf("invalid cache properties: %v", err)
  411. }
  412. return sconf, err
  413. }
  414. func (m *SinkNode) reset() {
  415. if !m.isMock {
  416. m.sinks = nil
  417. }
  418. m.statManagers = nil
  419. }
  420. func doCollect(ctx api.StreamContext, sink api.Sink, item interface{}, sendManager *sinkUtil.SendManager, stats metric.StatManager, sconf *SinkConf) error {
  421. stats.ProcessTimeStart()
  422. defer stats.ProcessTimeEnd()
  423. outs := itemToMap(item)
  424. if sconf.Omitempty && (item == nil || len(outs) == 0) {
  425. ctx.GetLogger().Debugf("receive empty in sink")
  426. return nil
  427. }
  428. return doCollectMaps(ctx, sink, sconf, outs, sendManager, stats, false)
  429. }
  430. func doCollectMaps(ctx api.StreamContext, sink api.Sink, sconf *SinkConf, outs []map[string]interface{}, sendManager *sinkUtil.SendManager, stats metric.StatManager, isResend bool) error {
  431. if !sconf.SendSingle {
  432. return doCollectData(ctx, sink, outs, sendManager, stats, isResend)
  433. } else {
  434. var err error
  435. for _, d := range outs {
  436. if sconf.Omitempty && (d == nil || len(d) == 0) {
  437. ctx.GetLogger().Debugf("receive empty in sink")
  438. continue
  439. }
  440. newErr := doCollectData(ctx, sink, d, sendManager, stats, isResend)
  441. if newErr != nil {
  442. err = newErr
  443. }
  444. }
  445. return err
  446. }
  447. }
  448. func itemToMap(item interface{}) []map[string]interface{} {
  449. var outs []map[string]interface{}
  450. switch val := item.(type) {
  451. case error:
  452. outs = []map[string]interface{}{
  453. {"error": val.Error()},
  454. }
  455. break
  456. case xsql.Collection: // The order is important here, because some element is both a collection and a row, such as WindowTuples, JoinTuples, etc.
  457. outs = val.ToMaps()
  458. break
  459. case xsql.Row:
  460. outs = []map[string]interface{}{
  461. val.ToMap(),
  462. }
  463. break
  464. case []map[string]interface{}: // for test only
  465. outs = val
  466. break
  467. case *xsql.WatermarkTuple:
  468. // just ignore
  469. default:
  470. outs = []map[string]interface{}{
  471. {"error": fmt.Sprintf("result is not a map slice but found %#v", val)},
  472. }
  473. }
  474. return outs
  475. }
  476. // doCollectData outData must be map or []map
  477. func doCollectData(ctx api.StreamContext, sink api.Sink, outData interface{}, sendManager *sinkUtil.SendManager, stats metric.StatManager, isResend bool) error {
  478. if sendManager != nil {
  479. switch v := outData.(type) {
  480. case map[string]interface{}:
  481. sendManager.RecvData(v)
  482. case []map[string]interface{}:
  483. for _, d := range v {
  484. sendManager.RecvData(d)
  485. }
  486. }
  487. return nil
  488. }
  489. select {
  490. case <-ctx.Done():
  491. ctx.GetLogger().Infof("sink node %s instance %d stops data resending", ctx.GetOpId(), ctx.GetInstanceId())
  492. return nil
  493. default:
  494. if isResend {
  495. return resendDataToSink(ctx, sink, outData, stats)
  496. } else {
  497. return sendDataToSink(ctx, sink, outData, stats)
  498. }
  499. }
  500. }
  501. func doCollectDataInBatch(ctx api.StreamContext, sink api.Sink, sendManager *sinkUtil.SendManager, stats metric.StatManager) {
  502. for {
  503. select {
  504. case <-ctx.Done():
  505. ctx.GetLogger().Infof("sink node %s instance %d stops data batch collecting", ctx.GetOpId(), ctx.GetInstanceId())
  506. return
  507. case outData := <-sendManager.GetOutputChan():
  508. if err := sendDataToSink(ctx, sink, outData, stats); err != nil {
  509. ctx.GetLogger().Warnf("sink node %s instance %d publish %s error: %v", ctx.GetOpId(), ctx.GetInstanceId(), outData, err)
  510. }
  511. }
  512. }
  513. }
  514. func sendDataToSink(ctx api.StreamContext, sink api.Sink, outData interface{}, stats metric.StatManager) error {
  515. if err := sink.Collect(ctx, outData); err != nil {
  516. stats.IncTotalExceptions(err.Error())
  517. return err
  518. } else {
  519. ctx.GetLogger().Debugf("success")
  520. stats.IncTotalRecordsOut()
  521. return nil
  522. }
  523. }
  524. func resendDataToSink(ctx api.StreamContext, sink api.Sink, outData interface{}, stats metric.StatManager) error {
  525. var err error
  526. switch st := sink.(type) {
  527. case api.ResendSink:
  528. err = st.CollectResend(ctx, outData)
  529. default:
  530. err = st.Collect(ctx, outData)
  531. }
  532. if err != nil {
  533. stats.IncTotalExceptions(err.Error())
  534. return err
  535. } else {
  536. ctx.GetLogger().Debugf("success resend")
  537. return nil
  538. }
  539. }
  540. func getSink(name string, action map[string]interface{}) (api.Sink, error) {
  541. var (
  542. s api.Sink
  543. err error
  544. )
  545. s, err = io.Sink(name)
  546. if s != nil {
  547. newAction := nodeConf.GetSinkConf(name, action)
  548. err = s.Configure(newAction)
  549. if err != nil {
  550. return nil, err
  551. }
  552. return s, nil
  553. } else {
  554. if err != nil {
  555. return nil, err
  556. } else {
  557. return nil, fmt.Errorf("sink %s not found", name)
  558. }
  559. }
  560. }
  561. // AddOutput Override defaultNode
  562. func (m *SinkNode) AddOutput(_ chan<- interface{}, name string) error {
  563. return fmt.Errorf("fail to add output %s, sink %s cannot add output", name, m.name)
  564. }
  565. // Broadcast Override defaultNode
  566. func (m *SinkNode) Broadcast(_ interface{}) error {
  567. return fmt.Errorf("sink %s cannot add broadcast", m.name)
  568. }