sink_node.go 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. // Copyright 2021 EMQ Technologies Co., Ltd.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package node
  15. import (
  16. "fmt"
  17. "github.com/lf-edge/ekuiper/internal/binder/io"
  18. "github.com/lf-edge/ekuiper/internal/conf"
  19. "github.com/lf-edge/ekuiper/internal/topo/context"
  20. "github.com/lf-edge/ekuiper/internal/topo/transform"
  21. "github.com/lf-edge/ekuiper/pkg/api"
  22. "github.com/lf-edge/ekuiper/pkg/cast"
  23. "strings"
  24. "sync"
  25. "time"
  26. )
  27. type SinkConf struct {
  28. Concurrency int `json:"concurrency"`
  29. RunAsync bool `json:"runAsync"`
  30. RetryInterval int `json:"retryInterval"`
  31. RetryCount int `json:"retryCount"`
  32. CacheLength int `json:"cacheLength"`
  33. CacheSaveInterval int `json:"cacheSaveInterval"`
  34. Omitempty bool `json:"omitIfEmpty"`
  35. SendSingle bool `json:"sendSingle"`
  36. DataTemplate string `json:"dataTemplate"`
  37. }
  38. type SinkNode struct {
  39. *defaultSinkNode
  40. //static
  41. sinkType string
  42. mutex sync.RWMutex
  43. //configs (also static for sinks)
  44. options map[string]interface{}
  45. isMock bool
  46. //states varies after restart
  47. sinks []api.Sink
  48. tch chan struct{} //channel to trigger cache saved, will be trigger by checkpoint only
  49. }
  50. func NewSinkNode(name string, sinkType string, props map[string]interface{}) *SinkNode {
  51. bufferLength := 1024
  52. if c, ok := props["bufferLength"]; ok {
  53. if t, err := cast.ToInt(c, cast.STRICT); err != nil || t <= 0 {
  54. //invalid property bufferLength
  55. } else {
  56. bufferLength = t
  57. }
  58. }
  59. return &SinkNode{
  60. defaultSinkNode: &defaultSinkNode{
  61. input: make(chan interface{}, bufferLength),
  62. defaultNode: &defaultNode{
  63. name: name,
  64. concurrency: 1,
  65. ctx: nil,
  66. },
  67. },
  68. sinkType: sinkType,
  69. options: props,
  70. }
  71. }
  72. // NewSinkNodeWithSink Only for mock source, do not use it in production
  73. func NewSinkNodeWithSink(name string, sink api.Sink, props map[string]interface{}) *SinkNode {
  74. return &SinkNode{
  75. defaultSinkNode: &defaultSinkNode{
  76. input: make(chan interface{}, 1024),
  77. defaultNode: &defaultNode{
  78. name: name,
  79. concurrency: 1,
  80. ctx: nil,
  81. },
  82. },
  83. sinks: []api.Sink{sink},
  84. options: props,
  85. isMock: true,
  86. }
  87. }
  88. func (m *SinkNode) Open(ctx api.StreamContext, result chan<- error) {
  89. m.ctx = ctx
  90. logger := ctx.GetLogger()
  91. logger.Debugf("open sink node %s", m.name)
  92. if m.qos >= api.AtLeastOnce {
  93. m.tch = make(chan struct{})
  94. }
  95. go func() {
  96. sconf := &SinkConf{
  97. Concurrency: 1,
  98. RunAsync: false,
  99. RetryInterval: 1000,
  100. RetryCount: 0,
  101. CacheLength: 1024,
  102. CacheSaveInterval: 1000,
  103. Omitempty: false,
  104. SendSingle: false,
  105. DataTemplate: "",
  106. }
  107. err := cast.MapToStruct(m.options, sconf)
  108. if err != nil {
  109. result <- fmt.Errorf("read properties %v fail with error: %v", m.options, err)
  110. return
  111. }
  112. if sconf.Concurrency <= 0 {
  113. logger.Warnf("invalid type for concurrency property, should be positive integer but found %t", sconf.Concurrency)
  114. sconf.Concurrency = 1
  115. }
  116. m.concurrency = sconf.Concurrency
  117. if sconf.RetryInterval <= 0 {
  118. logger.Warnf("invalid type for retryInterval property, should be positive integer but found %t", sconf.RetryInterval)
  119. sconf.RetryInterval = 1000
  120. }
  121. if sconf.RetryCount < 0 {
  122. logger.Warnf("invalid type for retryCount property, should be positive integer but found %t", sconf.RetryCount)
  123. sconf.RetryCount = 3
  124. }
  125. if sconf.CacheLength < 0 {
  126. logger.Warnf("invalid type for cacheLength property, should be positive integer but found %t", sconf.CacheLength)
  127. sconf.CacheLength = 1024
  128. }
  129. if sconf.CacheSaveInterval < 0 {
  130. logger.Warnf("invalid type for cacheSaveInterval property, should be positive integer but found %t", sconf.CacheSaveInterval)
  131. sconf.CacheSaveInterval = 1000
  132. }
  133. tf, err := transform.GenTransform(sconf.DataTemplate)
  134. if err != nil {
  135. msg := fmt.Sprintf("property dataTemplate %v is invalid: %v", sconf.DataTemplate, err)
  136. logger.Warnf(msg)
  137. result <- fmt.Errorf(msg)
  138. return
  139. }
  140. m.reset()
  141. logger.Infof("open sink node %d instances", m.concurrency)
  142. for i := 0; i < m.concurrency; i++ { // workers
  143. go func(instance int) {
  144. var sink api.Sink
  145. var err error
  146. if !m.isMock {
  147. logger.Debugf("Trying to get sink for rule %s with options %v\n", ctx.GetRuleId(), m.options)
  148. sink, err = getSink(m.sinkType, m.options)
  149. if err != nil {
  150. m.drainError(result, err, ctx, logger)
  151. return
  152. }
  153. logger.Debugf("Successfully get the sink %s", m.sinkType)
  154. m.mutex.Lock()
  155. m.sinks = append(m.sinks, sink)
  156. m.mutex.Unlock()
  157. logger.Debugf("Now is to open sink for rule %s.\n", ctx.GetRuleId())
  158. if err := sink.Open(ctx); err != nil {
  159. m.drainError(result, err, ctx, logger)
  160. return
  161. }
  162. logger.Debugf("Successfully open sink for rule %s.\n", ctx.GetRuleId())
  163. } else {
  164. sink = m.sinks[instance]
  165. }
  166. stats, err := NewStatManager("sink", ctx)
  167. if err != nil {
  168. m.drainError(result, err, ctx, logger)
  169. return
  170. }
  171. m.mutex.Lock()
  172. m.statManagers = append(m.statManagers, stats)
  173. m.mutex.Unlock()
  174. if conf.Config.Sink.DisableCache {
  175. for {
  176. select {
  177. case data := <-m.input:
  178. if temp, processed := m.preprocess(data); processed {
  179. break
  180. } else {
  181. data = temp
  182. }
  183. stats.SetBufferLength(int64(len(m.input)))
  184. if sconf.RunAsync {
  185. go doCollect(ctx, sink, data, stats, sconf, tf, nil)
  186. } else {
  187. doCollect(ctx, sink, data, stats, sconf, tf, nil)
  188. }
  189. case <-ctx.Done():
  190. logger.Infof("sink node %s instance %d done", m.name, instance)
  191. if err := sink.Close(ctx); err != nil {
  192. logger.Warnf("close sink node %s instance %d fails: %v", m.name, instance, err)
  193. }
  194. return
  195. case <-m.tch:
  196. logger.Debugf("rule %s sink receive checkpoint, do nothing", ctx.GetRuleId())
  197. }
  198. }
  199. } else {
  200. logger.Infof("Creating sink cache")
  201. var cache *Cache
  202. if m.qos >= api.AtLeastOnce {
  203. cache = NewCheckpointbasedCache(m.input, sconf.CacheLength, m.tch, result, ctx)
  204. } else {
  205. cache = NewTimebasedCache(m.input, sconf.CacheLength, sconf.CacheSaveInterval, result, ctx)
  206. }
  207. for {
  208. select {
  209. case data := <-cache.Out:
  210. if temp, processed := m.preprocess(data.data); processed {
  211. break
  212. } else {
  213. data.data = temp
  214. }
  215. stats.SetBufferLength(int64(len(m.input)))
  216. if sconf.RunAsync {
  217. go doCollect(ctx, sink, data, stats, sconf, tf, cache.Complete)
  218. } else {
  219. doCollect(ctx, sink, data, stats, sconf, tf, cache.Complete)
  220. }
  221. case <-ctx.Done():
  222. logger.Infof("sink node %s instance %d done", m.name, instance)
  223. if err := sink.Close(ctx); err != nil {
  224. logger.Warnf("close sink node %s instance %d fails: %v", m.name, instance, err)
  225. }
  226. return
  227. }
  228. }
  229. }
  230. }(i)
  231. }
  232. }()
  233. }
  234. func (m *SinkNode) reset() {
  235. if !m.isMock {
  236. m.sinks = nil
  237. }
  238. m.statManagers = nil
  239. }
  240. func doCollect(ctx api.StreamContext, sink api.Sink, item interface{}, stats StatManager, sconf *SinkConf, tp transform.TransFunc, signalCh chan<- int) {
  241. stats.IncTotalRecordsIn()
  242. stats.ProcessTimeStart()
  243. defer stats.ProcessTimeEnd()
  244. var outs []map[string]interface{}
  245. switch val := item.(type) {
  246. case error:
  247. outs = []map[string]interface{}{
  248. {"error": val.Error()},
  249. }
  250. case []map[string]interface{}:
  251. outs = val
  252. default:
  253. outs = []map[string]interface{}{
  254. {"error": fmt.Sprintf("result is not a string but found %#v", val)},
  255. }
  256. }
  257. if sconf.Omitempty && (item == nil || len(outs) == 0) {
  258. ctx.GetLogger().Debugf("receive empty in sink")
  259. return
  260. }
  261. if !sconf.SendSingle {
  262. doCollectData(ctx, sink, outs, stats, sconf, tp, signalCh)
  263. } else {
  264. for _, d := range outs {
  265. doCollectData(ctx, sink, d, stats, sconf, tp, signalCh)
  266. }
  267. }
  268. }
  269. // doCollectData outData must be map or []map
  270. func doCollectData(ctx api.StreamContext, sink api.Sink, outData interface{}, stats StatManager, sconf *SinkConf, tf transform.TransFunc, signalCh chan<- int) {
  271. vCtx := context.WithValue(ctx.(*context.DefaultContext), context.TransKey, &context.TransConfig{
  272. Data: outData,
  273. TFunc: tf,
  274. })
  275. retries := sconf.RetryCount
  276. for {
  277. select {
  278. case <-ctx.Done():
  279. ctx.GetLogger().Infof("sink node %s instance %d stops data resending", ctx.GetOpId(), ctx.GetInstanceId())
  280. return
  281. default:
  282. if err := sink.Collect(vCtx, outData); err != nil {
  283. stats.IncTotalExceptions()
  284. ctx.GetLogger().Warnf("sink node %s instance %d publish %s error: %v", ctx.GetOpId(), ctx.GetInstanceId(), outData, err)
  285. if sconf.RetryInterval > 0 && retries > 0 && strings.HasPrefix(err.Error(), "io error") {
  286. retries--
  287. time.Sleep(time.Duration(sconf.RetryInterval) * time.Millisecond)
  288. ctx.GetLogger().Debugf("try again")
  289. } else {
  290. return
  291. }
  292. } else {
  293. ctx.GetLogger().Debugf("success")
  294. stats.IncTotalRecordsOut()
  295. if signalCh != nil {
  296. cacheTuple, ok := outData.(*CacheTuple)
  297. if !ok {
  298. ctx.GetLogger().Warnf("got none cache tuple %v, should not happen", outData)
  299. }
  300. select {
  301. case signalCh <- cacheTuple.index:
  302. default:
  303. ctx.GetLogger().Warnf("sink cache missing response for %d", cacheTuple.index)
  304. }
  305. }
  306. return
  307. }
  308. }
  309. }
  310. }
  311. func getSink(name string, action map[string]interface{}) (api.Sink, error) {
  312. var (
  313. s api.Sink
  314. err error
  315. )
  316. s, err = io.Sink(name)
  317. if s != nil {
  318. err = s.Configure(action)
  319. if err != nil {
  320. return nil, err
  321. }
  322. return s, nil
  323. } else {
  324. if err != nil {
  325. return nil, err
  326. } else {
  327. return nil, fmt.Errorf("sink %s not found", name)
  328. }
  329. }
  330. }
  331. // AddOutput Override defaultNode
  332. func (m *SinkNode) AddOutput(_ chan<- interface{}, name string) error {
  333. return fmt.Errorf("fail to add output %s, sink %s cannot add output", name, m.name)
  334. }
  335. // Broadcast Override defaultNode
  336. func (m *SinkNode) Broadcast(_ interface{}) error {
  337. return fmt.Errorf("sink %s cannot add broadcast", m.name)
  338. }
  339. func (m *SinkNode) drainError(errCh chan<- error, err error, ctx api.StreamContext, logger api.Logger) {
  340. go func() {
  341. select {
  342. case errCh <- err:
  343. ctx.GetLogger().Errorf("error in sink %s", err)
  344. case <-ctx.Done():
  345. m.close(ctx, logger)
  346. }
  347. }()
  348. }
  349. func (m *SinkNode) close(ctx api.StreamContext, logger api.Logger) {
  350. for _, s := range m.sinks {
  351. if err := s.Close(ctx); err != nil {
  352. logger.Warnf("close sink fails: %v", err)
  353. }
  354. }
  355. if m.tch != nil {
  356. close(m.tch)
  357. m.tch = nil
  358. }
  359. }
  360. // SaveCache Only called when checkpoint enabled
  361. func (m *SinkNode) SaveCache() {
  362. m.tch <- struct{}{}
  363. }