watermark.go 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285
  1. // Copyright 2021-2022 EMQ Technologies Co., Ltd.
  2. //
  3. // Licensed under the Apache License, Version 2.0 (the "License");
  4. // you may not use this file except in compliance with the License.
  5. // You may obtain a copy of the License at
  6. //
  7. // http://www.apache.org/licenses/LICENSE-2.0
  8. //
  9. // Unless required by applicable law or agreed to in writing, software
  10. // distributed under the License is distributed on an "AS IS" BASIS,
  11. // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. // See the License for the specific language governing permissions and
  13. // limitations under the License.
  14. package node
  15. import (
  16. "context"
  17. "fmt"
  18. "math"
  19. "sort"
  20. "github.com/lf-edge/ekuiper/internal/xsql"
  21. "github.com/lf-edge/ekuiper/pkg/api"
  22. "github.com/lf-edge/ekuiper/pkg/ast"
  23. "github.com/lf-edge/ekuiper/pkg/infra"
  24. )
  25. type WatermarkTuple struct {
  26. Timestamp int64
  27. }
  28. func (t *WatermarkTuple) GetTimestamp() int64 {
  29. return t.Timestamp
  30. }
  31. func (t *WatermarkTuple) IsWatermark() bool {
  32. return true
  33. }
  34. const WATERMARK_KEY = "$$wartermark"
  35. type WatermarkGenerator struct {
  36. inputTopics []string
  37. topicToTs map[string]int64
  38. window *WindowConfig
  39. lateTolerance int64
  40. interval int
  41. // ticker *clock.Ticker
  42. stream chan<- interface{}
  43. // state
  44. lastWatermarkTs int64
  45. }
  46. func NewWatermarkGenerator(window *WindowConfig, l int64, s []string, stream chan<- interface{}) (*WatermarkGenerator, error) {
  47. w := &WatermarkGenerator{
  48. window: window,
  49. topicToTs: make(map[string]int64),
  50. lateTolerance: l,
  51. inputTopics: s,
  52. stream: stream,
  53. }
  54. switch window.Type {
  55. case ast.NOT_WINDOW:
  56. case ast.TUMBLING_WINDOW:
  57. w.interval = window.Length
  58. case ast.HOPPING_WINDOW:
  59. w.interval = window.Interval
  60. case ast.SLIDING_WINDOW:
  61. w.interval = window.Length
  62. case ast.SESSION_WINDOW:
  63. // Use timeout to update watermark
  64. w.interval = window.Interval
  65. default:
  66. return nil, fmt.Errorf("unsupported window type %d", window.Type)
  67. }
  68. return w, nil
  69. }
  70. func (w *WatermarkGenerator) track(s string, ts int64, ctx api.StreamContext) bool {
  71. log := ctx.GetLogger()
  72. log.Debugf("watermark generator track event from topic %s at %d", s, ts)
  73. currentVal, ok := w.topicToTs[s]
  74. if !ok || ts > currentVal {
  75. w.topicToTs[s] = ts
  76. }
  77. r := ts >= w.lastWatermarkTs
  78. if r {
  79. w.trigger(ctx)
  80. }
  81. return r
  82. }
  83. func (w *WatermarkGenerator) trigger(ctx api.StreamContext) {
  84. log := ctx.GetLogger()
  85. watermark := w.computeWatermarkTs(ctx)
  86. log.Debugf("compute watermark event at %d with last %d", watermark, w.lastWatermarkTs)
  87. if watermark > w.lastWatermarkTs {
  88. t := &WatermarkTuple{Timestamp: watermark}
  89. select {
  90. case w.stream <- t:
  91. default: // TODO need to set buffer
  92. }
  93. w.lastWatermarkTs = watermark
  94. ctx.PutState(WATERMARK_KEY, w.lastWatermarkTs)
  95. log.Debugf("scan watermark event at %d", watermark)
  96. }
  97. }
  98. func (w *WatermarkGenerator) computeWatermarkTs(_ context.Context) int64 {
  99. var ts int64
  100. if len(w.topicToTs) >= len(w.inputTopics) {
  101. ts = math.MaxInt64
  102. for _, key := range w.inputTopics {
  103. if ts > w.topicToTs[key] {
  104. ts = w.topicToTs[key]
  105. }
  106. }
  107. }
  108. return ts - w.lateTolerance
  109. }
  110. // If window end cannot be determined yet, return max int64 so that it can be recalculated for the next watermark
  111. func (w *WatermarkGenerator) getNextWindow(inputs []*xsql.Tuple, current int64, watermark int64) int64 {
  112. switch w.window.Type {
  113. case ast.TUMBLING_WINDOW, ast.HOPPING_WINDOW:
  114. if current > 0 {
  115. return current + int64(w.interval)
  116. } else { // first run without previous window
  117. interval := int64(w.interval)
  118. nextTs := getEarliestEventTs(inputs, current, watermark)
  119. if nextTs == math.MaxInt64 {
  120. return nextTs
  121. }
  122. return getAlignedWindowEndTime(nextTs, interval).UnixMilli()
  123. }
  124. case ast.SLIDING_WINDOW:
  125. nextTs := getEarliestEventTs(inputs, current, watermark)
  126. return nextTs
  127. default:
  128. return math.MaxInt64
  129. }
  130. }
  131. func (w *WatermarkGenerator) getNextSessionWindow(inputs []*xsql.Tuple) (int64, bool) {
  132. if len(inputs) > 0 {
  133. timeout, duration := int64(w.window.Interval), int64(w.window.Length)
  134. sort.SliceStable(inputs, func(i, j int) bool {
  135. return inputs[i].Timestamp < inputs[j].Timestamp
  136. })
  137. et := inputs[0].Timestamp
  138. tick := getAlignedWindowEndTime(et, duration).UnixMilli()
  139. var p int64
  140. ticked := false
  141. for _, tuple := range inputs {
  142. var r int64 = math.MaxInt64
  143. if p > 0 {
  144. if tuple.Timestamp-p > timeout {
  145. r = p + timeout
  146. }
  147. }
  148. if tuple.Timestamp > tick {
  149. if tick-duration > et && tick < r {
  150. r = tick
  151. ticked = true
  152. }
  153. tick += duration
  154. }
  155. if r < math.MaxInt64 {
  156. return r, ticked
  157. }
  158. p = tuple.Timestamp
  159. }
  160. }
  161. return math.MaxInt64, false
  162. }
  163. func (o *WindowOperator) execEventWindow(ctx api.StreamContext, inputs []*xsql.Tuple, errCh chan<- error) {
  164. log := ctx.GetLogger()
  165. var (
  166. nextWindowEndTs int64
  167. prevWindowEndTs int64
  168. lastTicked bool
  169. )
  170. o.watermarkGenerator.lastWatermarkTs = 0
  171. if s, err := ctx.GetState(WATERMARK_KEY); err == nil && s != nil {
  172. if si, ok := s.(int64); ok {
  173. o.watermarkGenerator.lastWatermarkTs = si
  174. } else {
  175. infra.DrainError(ctx, fmt.Errorf("restore window state `lastWatermarkTs` %v error, invalid type", s), errCh)
  176. return
  177. }
  178. }
  179. log.Infof("Start with window state lastWatermarkTs: %d", o.watermarkGenerator.lastWatermarkTs)
  180. for {
  181. select {
  182. // process incoming item
  183. case item, opened := <-o.input:
  184. processed := false
  185. if item, processed = o.preprocess(item); processed {
  186. break
  187. }
  188. o.statManager.ProcessTimeStart()
  189. if !opened {
  190. o.statManager.IncTotalExceptions("input channel closed")
  191. break
  192. }
  193. switch d := item.(type) {
  194. case error:
  195. o.statManager.IncTotalRecordsIn()
  196. o.Broadcast(d)
  197. o.statManager.IncTotalExceptions(d.Error())
  198. case xsql.Event:
  199. if d.IsWatermark() {
  200. watermarkTs := d.GetTimestamp()
  201. windowEndTs := nextWindowEndTs
  202. ticked := false
  203. // Session window needs a recalculation of window because its window end depends on the inputs
  204. if windowEndTs == math.MaxInt64 || o.window.Type == ast.SESSION_WINDOW || o.window.Type == ast.SLIDING_WINDOW {
  205. if o.window.Type == ast.SESSION_WINDOW {
  206. windowEndTs, ticked = o.watermarkGenerator.getNextSessionWindow(inputs)
  207. } else {
  208. windowEndTs = o.watermarkGenerator.getNextWindow(inputs, prevWindowEndTs, watermarkTs)
  209. }
  210. }
  211. for windowEndTs <= watermarkTs && windowEndTs >= 0 {
  212. log.Debugf("Window end ts %d Watermark ts %d", windowEndTs, watermarkTs)
  213. log.Debugf("Current input count %d", len(inputs))
  214. // scan all events and find out the event in the current window
  215. if o.window.Type == ast.SESSION_WINDOW && !lastTicked {
  216. o.triggerTime = inputs[0].Timestamp
  217. }
  218. if windowEndTs > 0 {
  219. inputs = o.scan(inputs, windowEndTs, ctx)
  220. }
  221. prevWindowEndTs = windowEndTs
  222. lastTicked = ticked
  223. if o.window.Type == ast.SESSION_WINDOW {
  224. windowEndTs, ticked = o.watermarkGenerator.getNextSessionWindow(inputs)
  225. } else {
  226. windowEndTs = o.watermarkGenerator.getNextWindow(inputs, prevWindowEndTs, watermarkTs)
  227. }
  228. }
  229. nextWindowEndTs = windowEndTs
  230. log.Debugf("next window end %d", nextWindowEndTs)
  231. } else {
  232. o.statManager.IncTotalRecordsIn()
  233. tuple, ok := d.(*xsql.Tuple)
  234. if !ok {
  235. log.Debugf("receive non tuple element %v", d)
  236. }
  237. log.Debugf("event window receive tuple %s", tuple.Message)
  238. if o.watermarkGenerator.track(tuple.Emitter, d.GetTimestamp(), ctx) {
  239. inputs = append(inputs, tuple)
  240. }
  241. }
  242. o.statManager.ProcessTimeEnd()
  243. ctx.PutState(WINDOW_INPUTS_KEY, inputs)
  244. default:
  245. o.statManager.IncTotalRecordsIn()
  246. e := fmt.Errorf("run Window error: expect xsql.Event type but got %[1]T(%[1]v)", d)
  247. o.Broadcast(e)
  248. o.statManager.IncTotalExceptions(e.Error())
  249. }
  250. // is cancelling
  251. case <-ctx.Done():
  252. log.Infoln("Cancelling window....")
  253. if o.ticker != nil {
  254. o.ticker.Stop()
  255. }
  256. return
  257. }
  258. }
  259. }
  260. func getEarliestEventTs(inputs []*xsql.Tuple, startTs int64, endTs int64) int64 {
  261. var minTs int64 = math.MaxInt64
  262. for _, t := range inputs {
  263. if t.Timestamp > startTs && t.Timestamp <= endTs && t.Timestamp < minTs {
  264. minTs = t.Timestamp
  265. }
  266. }
  267. return minTs
  268. }