sink_node.go 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. // Copyright 2022 EMQ Technologies Co., Ltd.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package node
  15. import (
  16. "fmt"
  17. "github.com/lf-edge/ekuiper/internal/binder/io"
  18. "github.com/lf-edge/ekuiper/internal/conf"
  19. "github.com/lf-edge/ekuiper/internal/topo/context"
  20. "github.com/lf-edge/ekuiper/internal/topo/transform"
  21. "github.com/lf-edge/ekuiper/pkg/api"
  22. "github.com/lf-edge/ekuiper/pkg/cast"
  23. "github.com/lf-edge/ekuiper/pkg/errorx"
  24. "github.com/lf-edge/ekuiper/pkg/infra"
  25. "github.com/lf-edge/ekuiper/pkg/message"
  26. "strings"
  27. "sync"
  28. "time"
  29. )
  30. type SinkConf struct {
  31. Concurrency int `json:"concurrency"`
  32. RunAsync bool `json:"runAsync"`
  33. RetryInterval int `json:"retryInterval"`
  34. RetryCount int `json:"retryCount"`
  35. CacheLength int `json:"cacheLength"`
  36. CacheSaveInterval int `json:"cacheSaveInterval"`
  37. Omitempty bool `json:"omitIfEmpty"`
  38. SendSingle bool `json:"sendSingle"`
  39. DataTemplate string `json:"dataTemplate"`
  40. Format string `json:"format"`
  41. SchemaId string `json:"schemaId"`
  42. }
  43. type SinkNode struct {
  44. *defaultSinkNode
  45. //static
  46. sinkType string
  47. mutex sync.RWMutex
  48. //configs (also static for sinks)
  49. options map[string]interface{}
  50. isMock bool
  51. //states varies after restart
  52. sinks []api.Sink
  53. tch chan struct{} //channel to trigger cache saved, will be trigger by checkpoint only
  54. }
  55. func NewSinkNode(name string, sinkType string, props map[string]interface{}) *SinkNode {
  56. bufferLength := 1024
  57. if c, ok := props["bufferLength"]; ok {
  58. if t, err := cast.ToInt(c, cast.STRICT); err != nil || t <= 0 {
  59. //invalid property bufferLength
  60. } else {
  61. bufferLength = t
  62. }
  63. }
  64. return &SinkNode{
  65. defaultSinkNode: &defaultSinkNode{
  66. input: make(chan interface{}, bufferLength),
  67. defaultNode: &defaultNode{
  68. name: name,
  69. concurrency: 1,
  70. ctx: nil,
  71. },
  72. },
  73. sinkType: sinkType,
  74. options: props,
  75. }
  76. }
  77. // NewSinkNodeWithSink Only for mock source, do not use it in production
  78. func NewSinkNodeWithSink(name string, sink api.Sink, props map[string]interface{}) *SinkNode {
  79. return &SinkNode{
  80. defaultSinkNode: &defaultSinkNode{
  81. input: make(chan interface{}, 1024),
  82. defaultNode: &defaultNode{
  83. name: name,
  84. concurrency: 1,
  85. ctx: nil,
  86. },
  87. },
  88. sinks: []api.Sink{sink},
  89. options: props,
  90. isMock: true,
  91. }
  92. }
  93. func (m *SinkNode) Open(ctx api.StreamContext, result chan<- error) {
  94. m.ctx = ctx
  95. logger := ctx.GetLogger()
  96. logger.Debugf("open sink node %s", m.name)
  97. if m.qos >= api.AtLeastOnce {
  98. m.tch = make(chan struct{})
  99. }
  100. go func() {
  101. err := infra.SafeRun(func() error {
  102. sconf := &SinkConf{
  103. Concurrency: 1,
  104. RunAsync: false,
  105. RetryInterval: 1000,
  106. RetryCount: 0,
  107. CacheLength: 1024,
  108. CacheSaveInterval: 1000,
  109. Omitempty: false,
  110. SendSingle: false,
  111. DataTemplate: "",
  112. }
  113. err := cast.MapToStruct(m.options, sconf)
  114. if err != nil {
  115. return fmt.Errorf("read properties %v fail with error: %v", m.options, err)
  116. }
  117. if sconf.Concurrency <= 0 {
  118. logger.Warnf("invalid type for concurrency property, should be positive integer but found %t", sconf.Concurrency)
  119. sconf.Concurrency = 1
  120. }
  121. m.concurrency = sconf.Concurrency
  122. if sconf.RetryInterval <= 0 {
  123. logger.Warnf("invalid type for retryInterval property, should be positive integer but found %t", sconf.RetryInterval)
  124. sconf.RetryInterval = 1000
  125. }
  126. if sconf.RetryCount < 0 {
  127. logger.Warnf("invalid type for retryCount property, should be positive integer but found %t", sconf.RetryCount)
  128. sconf.RetryCount = 3
  129. }
  130. if sconf.CacheLength < 0 {
  131. logger.Warnf("invalid type for cacheLength property, should be positive integer but found %t", sconf.CacheLength)
  132. sconf.CacheLength = 1024
  133. }
  134. if sconf.CacheSaveInterval < 0 {
  135. logger.Warnf("invalid type for cacheSaveInterval property, should be positive integer but found %t", sconf.CacheSaveInterval)
  136. sconf.CacheSaveInterval = 1000
  137. }
  138. if sconf.Format == "" {
  139. sconf.Format = "json"
  140. } else if sconf.Format != message.FormatJson && sconf.Format != message.FormatProtobuf {
  141. logger.Warnf("invalid type for format property, should be json or protobuf but found %s", sconf.Format)
  142. sconf.Format = "json"
  143. }
  144. tf, err := transform.GenTransform(sconf.DataTemplate, sconf.Format, sconf.SchemaId)
  145. if err != nil {
  146. msg := fmt.Sprintf("property dataTemplate %v is invalid: %v", sconf.DataTemplate, err)
  147. logger.Warnf(msg)
  148. return fmt.Errorf(msg)
  149. }
  150. ctx = context.WithValue(ctx.(*context.DefaultContext), context.TransKey, tf)
  151. m.reset()
  152. logger.Infof("open sink node %d instances", m.concurrency)
  153. for i := 0; i < m.concurrency; i++ { // workers
  154. go func(instance int) {
  155. panicOrError := infra.SafeRun(func() error {
  156. var (
  157. sink api.Sink
  158. err error
  159. )
  160. if !m.isMock {
  161. logger.Debugf("Trying to get sink for rule %s with options %v\n", ctx.GetRuleId(), m.options)
  162. sink, err = getSink(m.sinkType, m.options)
  163. if err != nil {
  164. return err
  165. }
  166. logger.Debugf("Successfully get the sink %s", m.sinkType)
  167. m.mutex.Lock()
  168. m.sinks = append(m.sinks, sink)
  169. m.mutex.Unlock()
  170. logger.Debugf("Now is to open sink for rule %s.\n", ctx.GetRuleId())
  171. if err := sink.Open(ctx); err != nil {
  172. return err
  173. }
  174. logger.Debugf("Successfully open sink for rule %s.\n", ctx.GetRuleId())
  175. } else {
  176. sink = m.sinks[instance]
  177. }
  178. stats, err := NewStatManager(ctx, "sink")
  179. if err != nil {
  180. return err
  181. }
  182. m.mutex.Lock()
  183. m.statManagers = append(m.statManagers, stats)
  184. m.mutex.Unlock()
  185. if conf.Config.Sink.DisableCache {
  186. for {
  187. select {
  188. case data := <-m.input:
  189. if temp, processed := m.preprocess(data); processed {
  190. break
  191. } else {
  192. data = temp
  193. }
  194. stats.SetBufferLength(int64(len(m.input)))
  195. if sconf.RunAsync {
  196. go func() {
  197. p := infra.SafeRun(func() error {
  198. doCollect(ctx, sink, data, stats, sconf)
  199. return nil
  200. })
  201. if p != nil {
  202. infra.DrainError(ctx, p, result)
  203. }
  204. }()
  205. } else {
  206. doCollect(ctx, sink, data, stats, sconf)
  207. }
  208. case <-ctx.Done():
  209. logger.Infof("sink node %s instance %d done", m.name, instance)
  210. if err := sink.Close(ctx); err != nil {
  211. logger.Warnf("close sink node %s instance %d fails: %v", m.name, instance, err)
  212. }
  213. return nil
  214. case <-m.tch:
  215. logger.Debugf("rule %s sink receive checkpoint, do nothing", ctx.GetRuleId())
  216. }
  217. }
  218. } else {
  219. logger.Infof("Creating sink cache")
  220. var cache *Cache
  221. if m.qos >= api.AtLeastOnce {
  222. cache = NewCheckpointbasedCache(m.input, sconf.CacheLength, m.tch, result, ctx.WithInstance(instance))
  223. } else {
  224. cache = NewTimebasedCache(m.input, sconf.CacheLength, sconf.CacheSaveInterval, result, ctx.WithInstance(instance))
  225. }
  226. for {
  227. select {
  228. case data := <-cache.Out:
  229. if temp, processed := m.preprocess(data.data); processed {
  230. break
  231. } else {
  232. data.data = temp
  233. }
  234. stats.SetBufferLength(int64(len(m.input)))
  235. if sconf.RunAsync {
  236. go func() {
  237. p := infra.SafeRun(func() error {
  238. doCollect(ctx, sink, data.data, stats, sconf)
  239. return nil
  240. })
  241. if p != nil {
  242. infra.DrainError(ctx, p, result)
  243. }
  244. }()
  245. } else {
  246. doCollect(ctx, sink, data.data, stats, sconf)
  247. if cache.Complete != nil {
  248. select {
  249. case cache.Complete <- data.index:
  250. default:
  251. ctx.GetLogger().Warnf("sink cache missing response for %d", data.index)
  252. }
  253. }
  254. }
  255. case <-ctx.Done():
  256. logger.Infof("sink node %s instance %d done", m.name, instance)
  257. if err := sink.Close(ctx); err != nil {
  258. logger.Warnf("close sink node %s instance %d fails: %v", m.name, instance, err)
  259. }
  260. return nil
  261. }
  262. }
  263. }
  264. })
  265. if panicOrError != nil {
  266. infra.DrainError(ctx, panicOrError, result)
  267. }
  268. }(i)
  269. }
  270. return nil
  271. })
  272. if err != nil {
  273. infra.DrainError(ctx, err, result)
  274. }
  275. }()
  276. }
  277. func (m *SinkNode) reset() {
  278. if !m.isMock {
  279. m.sinks = nil
  280. }
  281. m.statManagers = nil
  282. }
  283. func doCollect(ctx api.StreamContext, sink api.Sink, item interface{}, stats StatManager, sconf *SinkConf) {
  284. stats.IncTotalRecordsIn()
  285. stats.ProcessTimeStart()
  286. defer stats.ProcessTimeEnd()
  287. var outs []map[string]interface{}
  288. switch val := item.(type) {
  289. case error:
  290. outs = []map[string]interface{}{
  291. {"error": val.Error()},
  292. }
  293. case []map[string]interface{}:
  294. outs = val
  295. default:
  296. outs = []map[string]interface{}{
  297. {"error": fmt.Sprintf("result is not a map slice but found %#v", val)},
  298. }
  299. }
  300. if sconf.Omitempty && (item == nil || len(outs) == 0) {
  301. ctx.GetLogger().Debugf("receive empty in sink")
  302. return
  303. }
  304. if !sconf.SendSingle {
  305. doCollectData(ctx, sink, outs, stats, sconf)
  306. } else {
  307. for _, d := range outs {
  308. if sconf.Omitempty && (d == nil || len(d) == 0) {
  309. ctx.GetLogger().Debugf("receive empty in sink")
  310. continue
  311. }
  312. doCollectData(ctx, sink, d, stats, sconf)
  313. }
  314. }
  315. }
  316. // doCollectData outData must be map or []map
  317. func doCollectData(ctx api.StreamContext, sink api.Sink, outData interface{}, stats StatManager, sconf *SinkConf) {
  318. retries := sconf.RetryCount
  319. for {
  320. select {
  321. case <-ctx.Done():
  322. ctx.GetLogger().Infof("sink node %s instance %d stops data resending", ctx.GetOpId(), ctx.GetInstanceId())
  323. return
  324. default:
  325. if err := sink.Collect(ctx, outData); err != nil {
  326. stats.IncTotalExceptions()
  327. ctx.GetLogger().Warnf("sink node %s instance %d publish %s error: %v", ctx.GetOpId(), ctx.GetInstanceId(), outData, err)
  328. if sconf.RetryInterval > 0 && retries > 0 && strings.HasPrefix(err.Error(), errorx.IOErr) {
  329. retries--
  330. time.Sleep(time.Duration(sconf.RetryInterval) * time.Millisecond)
  331. ctx.GetLogger().Debugf("try again")
  332. } else {
  333. return
  334. }
  335. } else {
  336. ctx.GetLogger().Debugf("success")
  337. stats.IncTotalRecordsOut()
  338. return
  339. }
  340. }
  341. }
  342. }
  343. func getSink(name string, action map[string]interface{}) (api.Sink, error) {
  344. var (
  345. s api.Sink
  346. err error
  347. )
  348. s, err = io.Sink(name)
  349. if s != nil {
  350. err = s.Configure(action)
  351. if err != nil {
  352. return nil, err
  353. }
  354. return s, nil
  355. } else {
  356. if err != nil {
  357. return nil, err
  358. } else {
  359. return nil, fmt.Errorf("sink %s not found", name)
  360. }
  361. }
  362. }
  363. // AddOutput Override defaultNode
  364. func (m *SinkNode) AddOutput(_ chan<- interface{}, name string) error {
  365. return fmt.Errorf("fail to add output %s, sink %s cannot add output", name, m.name)
  366. }
  367. // Broadcast Override defaultNode
  368. func (m *SinkNode) Broadcast(_ interface{}) error {
  369. return fmt.Errorf("sink %s cannot add broadcast", m.name)
  370. }
  371. // SaveCache Only called when checkpoint enabled
  372. func (m *SinkNode) SaveCache() {
  373. select {
  374. case m.tch <- struct{}{}:
  375. case <-m.ctx.Done():
  376. }
  377. }