sink_node.go 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461
  1. // Copyright 2022-2023 EMQ Technologies Co., Ltd.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package node
  15. import (
  16. "fmt"
  17. "strings"
  18. "sync"
  19. "github.com/lf-edge/ekuiper/internal/binder/io"
  20. "github.com/lf-edge/ekuiper/internal/conf"
  21. sinkUtil "github.com/lf-edge/ekuiper/internal/io/sink"
  22. "github.com/lf-edge/ekuiper/internal/topo/context"
  23. "github.com/lf-edge/ekuiper/internal/topo/node/cache"
  24. nodeConf "github.com/lf-edge/ekuiper/internal/topo/node/conf"
  25. "github.com/lf-edge/ekuiper/internal/topo/node/metric"
  26. "github.com/lf-edge/ekuiper/internal/topo/transform"
  27. "github.com/lf-edge/ekuiper/internal/xsql"
  28. "github.com/lf-edge/ekuiper/pkg/api"
  29. "github.com/lf-edge/ekuiper/pkg/cast"
  30. "github.com/lf-edge/ekuiper/pkg/errorx"
  31. "github.com/lf-edge/ekuiper/pkg/infra"
  32. "github.com/lf-edge/ekuiper/pkg/message"
  33. )
  34. type SinkConf struct {
  35. Concurrency int `json:"concurrency"`
  36. RunAsync bool `json:"runAsync"` // deprecated, will remove in the next release
  37. Omitempty bool `json:"omitIfEmpty"`
  38. SendSingle bool `json:"sendSingle"`
  39. DataTemplate string `json:"dataTemplate"`
  40. Format string `json:"format"`
  41. SchemaId string `json:"schemaId"`
  42. Delimiter string `json:"delimiter"`
  43. BufferLength int `json:"bufferLength"`
  44. Fields []string `json:"fields"`
  45. DataField string `json:"dataField"`
  46. BatchSize int `json:"batchSize"`
  47. LingerInterval int `json:"lingerInterval"`
  48. conf.SinkConf
  49. }
  50. func (sc *SinkConf) isBatchSinkEnabled() bool {
  51. if sc.BatchSize > 0 || sc.LingerInterval > 0 {
  52. return true
  53. }
  54. return false
  55. }
  56. type SinkNode struct {
  57. *defaultSinkNode
  58. // static
  59. sinkType string
  60. mutex sync.RWMutex
  61. // configs (also static for sinks)
  62. options map[string]interface{}
  63. isMock bool
  64. // states varies after restart
  65. sinks []api.Sink
  66. }
  67. func NewSinkNode(name string, sinkType string, props map[string]interface{}) *SinkNode {
  68. bufferLength := 1024
  69. if c, ok := props["bufferLength"]; ok {
  70. if t, err := cast.ToInt(c, cast.STRICT); err != nil || t <= 0 {
  71. // invalid property bufferLength
  72. } else {
  73. bufferLength = t
  74. }
  75. }
  76. return &SinkNode{
  77. defaultSinkNode: &defaultSinkNode{
  78. input: make(chan interface{}, bufferLength),
  79. defaultNode: &defaultNode{
  80. name: name,
  81. concurrency: 1,
  82. ctx: nil,
  83. },
  84. },
  85. sinkType: sinkType,
  86. options: props,
  87. }
  88. }
  89. // NewSinkNodeWithSink Only for mock source, do not use it in production
  90. func NewSinkNodeWithSink(name string, sink api.Sink, props map[string]interface{}) *SinkNode {
  91. return &SinkNode{
  92. defaultSinkNode: &defaultSinkNode{
  93. input: make(chan interface{}, 1024),
  94. defaultNode: &defaultNode{
  95. name: name,
  96. concurrency: 1,
  97. ctx: nil,
  98. },
  99. },
  100. sinks: []api.Sink{sink},
  101. options: props,
  102. isMock: true,
  103. }
  104. }
  105. func (m *SinkNode) Open(ctx api.StreamContext, result chan<- error) {
  106. m.ctx = ctx
  107. logger := ctx.GetLogger()
  108. logger.Debugf("open sink node %s", m.name)
  109. go func() {
  110. err := infra.SafeRun(func() error {
  111. sconf, err := m.parseConf(logger)
  112. if err != nil {
  113. return err
  114. }
  115. tf, err := transform.GenTransform(sconf.DataTemplate, sconf.Format, sconf.SchemaId, sconf.Delimiter, sconf.DataField, sconf.Fields)
  116. if err != nil {
  117. msg := fmt.Sprintf("property dataTemplate %v is invalid: %v", sconf.DataTemplate, err)
  118. logger.Warnf(msg)
  119. return fmt.Errorf(msg)
  120. }
  121. ctx = context.WithValue(ctx.(*context.DefaultContext), context.TransKey, tf)
  122. m.reset()
  123. logger.Infof("open sink node %d instances", m.concurrency)
  124. for i := 0; i < m.concurrency; i++ { // workers
  125. go func(instance int) {
  126. panicOrError := infra.SafeRun(func() error {
  127. var (
  128. sink api.Sink
  129. err error
  130. )
  131. if !m.isMock {
  132. logger.Debugf("Trying to get sink for rule %s with options %v\n", ctx.GetRuleId(), m.options)
  133. sink, err = getSink(m.sinkType, m.options)
  134. if err != nil {
  135. return err
  136. }
  137. logger.Debugf("Successfully get the sink %s", m.sinkType)
  138. m.mutex.Lock()
  139. m.sinks = append(m.sinks, sink)
  140. m.mutex.Unlock()
  141. logger.Debugf("Now is to open sink for rule %s.\n", ctx.GetRuleId())
  142. if err := sink.Open(ctx); err != nil {
  143. return err
  144. }
  145. logger.Debugf("Successfully open sink for rule %s.\n", ctx.GetRuleId())
  146. } else {
  147. sink = m.sinks[instance]
  148. }
  149. stats, err := metric.NewStatManager(ctx, "sink")
  150. if err != nil {
  151. return err
  152. }
  153. m.mutex.Lock()
  154. m.statManagers = append(m.statManagers, stats)
  155. m.mutex.Unlock()
  156. var sendManager *sinkUtil.SendManager
  157. if sconf.isBatchSinkEnabled() {
  158. sendManager, err = sinkUtil.NewSendManager(sconf.BatchSize, sconf.LingerInterval)
  159. if err != nil {
  160. return err
  161. }
  162. go sendManager.Run(ctx)
  163. go doCollectDataInBatch(ctx, sink, sendManager, stats)
  164. }
  165. if !sconf.EnableCache {
  166. for {
  167. select {
  168. case data := <-m.input:
  169. if temp, processed := m.preprocess(data); !processed {
  170. data = temp
  171. } else {
  172. break
  173. }
  174. stats.SetBufferLength(int64(len(m.input)))
  175. stats.IncTotalRecordsIn()
  176. if sconf.RunAsync {
  177. conf.Log.Warnf("RunAsync is deprecated and ignored.")
  178. }
  179. err := doCollect(ctx, sink, data, sendManager, stats, sconf)
  180. if err != nil {
  181. logger.Warnf("sink collect error: %v", err)
  182. }
  183. case <-ctx.Done():
  184. logger.Infof("sink node %s instance %d done", m.name, instance)
  185. if err := sink.Close(ctx); err != nil {
  186. logger.Warnf("close sink node %s instance %d fails: %v", m.name, instance, err)
  187. }
  188. return nil
  189. }
  190. }
  191. } else {
  192. logger.Infof("Creating sink cache")
  193. if sconf.RunAsync { // async mode, the ack must have an id
  194. // is not supported and validated in the configure, should not go here
  195. return fmt.Errorf("async mode is not supported for cache sink")
  196. } else { // sync mode, the ack is already in order
  197. dataCh := make(chan []map[string]interface{}, sconf.BufferLength)
  198. c := cache.NewSyncCache(ctx, dataCh, result, stats, &sconf.SinkConf, sconf.BufferLength)
  199. for {
  200. select {
  201. case data := <-m.input:
  202. if temp, processed := m.preprocess(data); !processed {
  203. data = temp
  204. } else {
  205. break
  206. }
  207. stats.IncTotalRecordsIn()
  208. outs := itemToMap(data)
  209. if sconf.Omitempty && (data == nil || len(outs) == 0) {
  210. ctx.GetLogger().Debugf("receive empty in sink")
  211. return nil
  212. }
  213. select {
  214. case dataCh <- outs:
  215. case <-ctx.Done():
  216. }
  217. case data := <-c.Out:
  218. stats.ProcessTimeStart()
  219. ack := true
  220. err := doCollectMaps(ctx, sink, sconf, data, sendManager, stats)
  221. // Only recoverable error should be cached
  222. if err != nil {
  223. if strings.HasPrefix(err.Error(), errorx.IOErr) { // do not log to prevent a lot of logs!
  224. ack = false
  225. } else {
  226. ctx.GetLogger().Warnf("sink node %s instance %d publish %s error: %v", ctx.GetOpId(), ctx.GetInstanceId(), data, err)
  227. }
  228. } else {
  229. ctx.GetLogger().Debugf("sent data to MQTT: %v", data)
  230. }
  231. select {
  232. case c.Ack <- ack:
  233. case <-ctx.Done():
  234. }
  235. stats.ProcessTimeEnd()
  236. case <-ctx.Done():
  237. logger.Infof("sink node %s instance %d done", m.name, instance)
  238. if err := sink.Close(ctx); err != nil {
  239. logger.Warnf("close sink node %s instance %d fails: %v", m.name, instance, err)
  240. }
  241. return nil
  242. }
  243. }
  244. }
  245. }
  246. })
  247. if panicOrError != nil {
  248. infra.DrainError(ctx, panicOrError, result)
  249. }
  250. }(i)
  251. }
  252. return nil
  253. })
  254. if err != nil {
  255. infra.DrainError(ctx, err, result)
  256. }
  257. }()
  258. }
  259. func (m *SinkNode) parseConf(logger api.Logger) (*SinkConf, error) {
  260. sconf := &SinkConf{
  261. Concurrency: 1,
  262. RunAsync: false,
  263. Omitempty: false,
  264. SendSingle: false,
  265. DataTemplate: "",
  266. SinkConf: *conf.Config.Sink,
  267. BufferLength: 1024,
  268. }
  269. err := cast.MapToStruct(m.options, sconf)
  270. if err != nil {
  271. return nil, fmt.Errorf("read properties %v fail with error: %v", m.options, err)
  272. }
  273. if sconf.Concurrency <= 0 {
  274. logger.Warnf("invalid type for concurrency property, should be positive integer but found %t", sconf.Concurrency)
  275. sconf.Concurrency = 1
  276. }
  277. m.concurrency = sconf.Concurrency
  278. if sconf.Format == "" {
  279. sconf.Format = "json"
  280. } else if sconf.Format != message.FormatJson && sconf.Format != message.FormatProtobuf && sconf.Format != message.FormatBinary && sconf.Format != message.FormatCustom && sconf.Format != message.FormatDelimited {
  281. logger.Warnf("invalid type for format property, should be json protobuf or binary but found %s", sconf.Format)
  282. sconf.Format = "json"
  283. }
  284. err = cast.MapToStruct(m.options, &sconf.SinkConf)
  285. if err != nil {
  286. return nil, fmt.Errorf("read properties %v to cache conf fail with error: %v", m.options, err)
  287. }
  288. if sconf.SinkConf.EnableCache && sconf.RunAsync {
  289. conf.Log.Warnf("RunAsync is deprecated and ignored.")
  290. return nil, fmt.Errorf("cache is not supported for async sink, do not use enableCache and runAsync properties together")
  291. }
  292. if sconf.DataField == "" {
  293. if v, ok := m.options["tableDataField"]; ok {
  294. sconf.DataField = v.(string)
  295. }
  296. }
  297. err = sconf.SinkConf.Validate()
  298. if err != nil {
  299. return nil, fmt.Errorf("invalid cache properties: %v", err)
  300. }
  301. return sconf, err
  302. }
  303. func (m *SinkNode) reset() {
  304. if !m.isMock {
  305. m.sinks = nil
  306. }
  307. m.statManagers = nil
  308. }
  309. func doCollect(ctx api.StreamContext, sink api.Sink, item interface{}, sendManager *sinkUtil.SendManager, stats metric.StatManager, sconf *SinkConf) error {
  310. stats.ProcessTimeStart()
  311. defer stats.ProcessTimeEnd()
  312. outs := itemToMap(item)
  313. if sconf.Omitempty && (item == nil || len(outs) == 0) {
  314. ctx.GetLogger().Debugf("receive empty in sink")
  315. return nil
  316. }
  317. return doCollectMaps(ctx, sink, sconf, outs, sendManager, stats)
  318. }
  319. func doCollectMaps(ctx api.StreamContext, sink api.Sink, sconf *SinkConf, outs []map[string]interface{}, sendManager *sinkUtil.SendManager, stats metric.StatManager) error {
  320. if !sconf.SendSingle {
  321. return doCollectData(ctx, sink, outs, sendManager, stats)
  322. } else {
  323. var err error
  324. for _, d := range outs {
  325. if sconf.Omitempty && (d == nil || len(d) == 0) {
  326. ctx.GetLogger().Debugf("receive empty in sink")
  327. continue
  328. }
  329. newErr := doCollectData(ctx, sink, d, sendManager, stats)
  330. if newErr != nil {
  331. err = newErr
  332. }
  333. }
  334. return err
  335. }
  336. }
  337. func itemToMap(item interface{}) []map[string]interface{} {
  338. var outs []map[string]interface{}
  339. switch val := item.(type) {
  340. case error:
  341. outs = []map[string]interface{}{
  342. {"error": val.Error()},
  343. }
  344. break
  345. case xsql.Collection: // The order is important here, because some element is both a collection and a row, such as WindowTuples, JoinTuples, etc.
  346. outs = val.ToMaps()
  347. break
  348. case xsql.Row:
  349. outs = []map[string]interface{}{
  350. val.ToMap(),
  351. }
  352. break
  353. case []map[string]interface{}: // for test only
  354. outs = val
  355. break
  356. default:
  357. outs = []map[string]interface{}{
  358. {"error": fmt.Sprintf("result is not a map slice but found %#v", val)},
  359. }
  360. }
  361. return outs
  362. }
  363. // doCollectData outData must be map or []map
  364. func doCollectData(ctx api.StreamContext, sink api.Sink, outData interface{}, sendManager *sinkUtil.SendManager, stats metric.StatManager) error {
  365. if sendManager != nil {
  366. switch v := outData.(type) {
  367. case map[string]interface{}:
  368. sendManager.RecvData(v)
  369. case []map[string]interface{}:
  370. for _, d := range v {
  371. sendManager.RecvData(d)
  372. }
  373. }
  374. return nil
  375. }
  376. select {
  377. case <-ctx.Done():
  378. ctx.GetLogger().Infof("sink node %s instance %d stops data resending", ctx.GetOpId(), ctx.GetInstanceId())
  379. return nil
  380. default:
  381. return sendDataToSink(ctx, sink, outData, stats)
  382. }
  383. }
  384. func doCollectDataInBatch(ctx api.StreamContext, sink api.Sink, sendManager *sinkUtil.SendManager, stats metric.StatManager) {
  385. for {
  386. select {
  387. case <-ctx.Done():
  388. ctx.GetLogger().Infof("sink node %s instance %d stops data batch collecting", ctx.GetOpId(), ctx.GetInstanceId())
  389. return
  390. case outData := <-sendManager.GetOutputChan():
  391. if err := sendDataToSink(ctx, sink, outData, stats); err != nil {
  392. ctx.GetLogger().Warnf("sink node %s instance %d publish %s error: %v", ctx.GetOpId(), ctx.GetInstanceId(), outData, err)
  393. }
  394. }
  395. }
  396. }
  397. func sendDataToSink(ctx api.StreamContext, sink api.Sink, outData interface{}, stats metric.StatManager) error {
  398. if err := sink.Collect(ctx, outData); err != nil {
  399. stats.IncTotalExceptions(err.Error())
  400. return err
  401. } else {
  402. ctx.GetLogger().Debugf("success")
  403. stats.IncTotalRecordsOut()
  404. return nil
  405. }
  406. }
  407. func getSink(name string, action map[string]interface{}) (api.Sink, error) {
  408. var (
  409. s api.Sink
  410. err error
  411. )
  412. s, err = io.Sink(name)
  413. if s != nil {
  414. newAction := nodeConf.GetSinkConf(name, action)
  415. err = s.Configure(newAction)
  416. if err != nil {
  417. return nil, err
  418. }
  419. return s, nil
  420. } else {
  421. if err != nil {
  422. return nil, err
  423. } else {
  424. return nil, fmt.Errorf("sink %s not found", name)
  425. }
  426. }
  427. }
  428. // AddOutput Override defaultNode
  429. func (m *SinkNode) AddOutput(_ chan<- interface{}, name string) error {
  430. return fmt.Errorf("fail to add output %s, sink %s cannot add output", name, m.name)
  431. }
  432. // Broadcast Override defaultNode
  433. func (m *SinkNode) Broadcast(_ interface{}) error {
  434. return fmt.Errorf("sink %s cannot add broadcast", m.name)
  435. }