package clientv3

import (
    "fmt"
    "sync"
    "time"

    v3rpc "github.com/coreos/etcd/etcdserver/api/v3rpc/rpctypes"
    pb "github.com/coreos/etcd/etcdserver/etcdserverpb"
    mvccpb "github.com/coreos/etcd/mvcc/mvccpb"
    "golang.org/x/net/context"
    "google.golang.org/grpc"
)

const (
    EventTypeDelete = mvccpb.DELETE
    EventTypePut    = mvccpb.PUT

    closeSendErrTimeout = 250 * time.Millisecond
)

type Event mvccpb.Event

type WatchChan <-chan WatchResponse

type Watcher interface {
    // Watch watches on a key or prefix. The watched events will be returned
    // through the returned channel.
    // If the watch is slow or the required rev is compacted, the watch request
    // might be canceled from the server-side and the chan will be closed.
    // 'opts' can be: 'WithRev' and/or 'WithPrefix'.
    Watch(ctx context.Context, key string, opts ...OpOption) WatchChan

    // Close closes the watcher and cancels all watch requests.
    Close() error
}

type WatchResponse struct {
    Header pb.ResponseHeader
    Events []*Event

    // CompactRevision is the minimum revision the watcher may receive.
    CompactRevision int64

    // Canceled is used to indicate watch failure.
    // If the watch failed and the stream was about to close, before the channel is closed,
    // the channel sends a final response that has Canceled set to true with a non-nil Err().
    Canceled bool

    // Created is used to indicate the creation of the watcher.
    Created bool

    closeErr error
}

// IsCreate returns true if the event tells that the key is newly created.
func (e *Event) IsCreate() bool {
    return e.Type == EventTypePut && e.Kv.CreateRevision == e.Kv.ModRevision
}

// IsModify returns true if the event tells that a new value is put on existing key.
func (e *Event) IsModify() bool {
    return e.Type == EventTypePut && e.Kv.CreateRevision != e.Kv.ModRevision
}

// Err is the error value if this WatchResponse holds an error.
func (wr *WatchResponse) Err() error {
    switch {
    case wr.closeErr != nil:
        return v3rpc.Error(wr.closeErr)
    case wr.CompactRevision != 0:
        return v3rpc.ErrCompacted
    case wr.Canceled:
        return v3rpc.ErrFutureRev
    }
    return nil
}

// IsProgressNotify returns true if the WatchResponse is progress notification.
func (wr *WatchResponse) IsProgressNotify() bool {
    return len(wr.Events) == 0 && !wr.Canceled && !wr.Created && wr.CompactRevision == 0 && wr.Header.Revision != 0
}

// watcher implements the Watcher interface
type watcher struct {
    remote pb.WatchClient

    // mu protects the grpc streams map
    mu sync.RWMutex

    // streams holds all the active grpc streams keyed by ctx value.
    streams map[string]*watchGrpcStream
}

// watchGrpcStream tracks all watch resources attached to a single grpc stream.
type watchGrpcStream struct {
    owner  *watcher
    remote pb.WatchClient

    // ctx controls internal remote.Watch requests
    ctx context.Context
    // ctxKey is the key used when looking up this stream's context
    ctxKey string
    cancel context.CancelFunc

    // substreams holds all active watchers on this grpc stream
    substreams map[int64]*watcherStream
    // resuming holds all resuming watchers on this grpc stream
    resuming []*watcherStream

    // reqc sends a watch request from Watch() to the main goroutine
    reqc chan *watchRequest
    // respc receives data from the watch client
    respc chan *pb.WatchResponse
    // donec closes to broadcast shutdown
    donec chan struct{}
    // errc transmits errors from grpc Recv to the watch stream reconn logic
    errc chan error
    // closingc gets the watcherStream of closing watchers
    closingc chan *watcherStream

    // resumec closes to signal that all substreams should begin resuming
    resumec chan struct{}
    // closeErr is the error that closed the watch stream
    closeErr error
}

// watchRequest is issued by the subscriber to start a new watcher
type watchRequest struct {
    ctx context.Context
    key string
    end string
    rev int64
    // send created notification event if this field is true
    createdNotify bool
    // progressNotify is for progress updates
    progressNotify bool
    // filters is the list of events to filter out
    filters []pb.WatchCreateRequest_FilterType
    // get the previous key-value pair before the event happens
    prevKV bool
    // retc receives a chan WatchResponse once the watcher is established
    retc chan chan WatchResponse
}

// watcherStream represents a registered watcher
type watcherStream struct {
    // initReq is the request that initiated this request
    initReq watchRequest

    // outc publishes watch responses to subscriber
    outc chan WatchResponse
    // recvc buffers watch responses before publishing
    recvc chan *WatchResponse
    // donec closes when the watcherStream goroutine stops.
    donec chan struct{}
    // closing is set to true when stream should be scheduled to shutdown.
    closing bool
    // id is the registered watch id on the grpc stream
    id int64

    // buf holds all events received from etcd but not yet consumed by the client
    buf []*WatchResponse
}

func NewWatcher(c *Client) Watcher {
    return NewWatchFromWatchClient(pb.NewWatchClient(c.conn))
}

func NewWatchFromWatchClient(wc pb.WatchClient) Watcher {
    return &watcher{
        remote:  wc,
        streams: make(map[string]*watchGrpcStream),
    }
}

// never closes
var valCtxCh = make(chan struct{})
var zeroTime = time.Unix(0, 0)

// ctx with only the values; never Done
type valCtx struct{ context.Context }

func (vc *valCtx) Deadline() (time.Time, bool) { return zeroTime, false }
func (vc *valCtx) Done() <-chan struct{}       { return valCtxCh }
func (vc *valCtx) Err() error                  { return nil }

func (w *watcher) newWatcherGrpcStream(inctx context.Context) *watchGrpcStream {
    ctx, cancel := context.WithCancel(&valCtx{inctx})
    wgs := &watchGrpcStream{
        owner:      w,
        remote:     w.remote,
        ctx:        ctx,
        ctxKey:     fmt.Sprintf("%v", inctx),
        cancel:     cancel,
        substreams: make(map[int64]*watcherStream),

        respc:    make(chan *pb.WatchResponse),
        reqc:     make(chan *watchRequest),
        donec:    make(chan struct{}),
        errc:     make(chan error, 1),
        closingc: make(chan *watcherStream),
        resumec:  make(chan struct{}),
    }
    go wgs.run()
    return wgs
}

// Watch posts a watch request to run() and waits for a new watcher channel
func (w *watcher) Watch(ctx context.Context, key string, opts ...OpOption) WatchChan {
    ow := opWatch(key, opts...)

    var filters []pb.WatchCreateRequest_FilterType
    if ow.filterPut {
        filters = append(filters, pb.WatchCreateRequest_NOPUT)
    }
    if ow.filterDelete {
        filters = append(filters, pb.WatchCreateRequest_NODELETE)
    }

    wr := &watchRequest{
        ctx:            ctx,
        createdNotify:  ow.createdNotify,
        key:            string(ow.key),
        end:            string(ow.end),
        rev:            ow.rev,
        progressNotify: ow.progressNotify,
        filters:        filters,
        prevKV:         ow.prevKV,
        retc:           make(chan chan WatchResponse, 1),
    }

    ok := false
    ctxKey := fmt.Sprintf("%v", ctx)

    // find or allocate appropriate grpc watch stream
    w.mu.Lock()
    if w.streams == nil {
        // closed
        w.mu.Unlock()
        ch := make(chan WatchResponse)
        close(ch)
        return ch
    }
    wgs := w.streams[ctxKey]
    if wgs == nil {
        wgs = w.newWatcherGrpcStream(ctx)
        w.streams[ctxKey] = wgs
    }
    donec := wgs.donec
    reqc := wgs.reqc
    w.mu.Unlock()

    // couldn't create channel; return closed channel
    closeCh := make(chan WatchResponse, 1)

    // submit request
    select {
    case reqc <- wr:
        ok = true
    case <-wr.ctx.Done():
    case <-donec:
        if wgs.closeErr != nil {
            closeCh <- WatchResponse{closeErr: wgs.closeErr}
            break
        }
        // retry; may have dropped stream from no ctxs
        return w.Watch(ctx, key, opts...)
    }

    // receive channel
    if ok {
        select {
        case ret := <-wr.retc:
            return ret
        case <-ctx.Done():
        case <-donec:
            if wgs.closeErr != nil {
                closeCh <- WatchResponse{closeErr: wgs.closeErr}
                break
            }
            // retry; may have dropped stream from no ctxs
            return w.Watch(ctx, key, opts...)
        }
    }

    close(closeCh)
    return closeCh
}

func (w *watcher) Close() (err error) {
    w.mu.Lock()
    streams := w.streams
    w.streams = nil
    w.mu.Unlock()
    for _, wgs := range streams {
        if werr := wgs.Close(); werr != nil {
            err = werr
        }
    }
    return err
}

func (w *watchGrpcStream) Close() (err error) {
    w.cancel()
    <-w.donec
    select {
    case err = <-w.errc:
    default:
    }
    return toErr(w.ctx, err)
}

func (w *watcher) closeStream(wgs *watchGrpcStream) {
    w.mu.Lock()
    close(wgs.donec)
    wgs.cancel()
    if w.streams != nil {
        delete(w.streams, wgs.ctxKey)
    }
    w.mu.Unlock()
}

func (w *watchGrpcStream) addSubstream(resp *pb.WatchResponse, ws *watcherStream) {
    if resp.WatchId == -1 {
        // failed; no channel
        close(ws.recvc)
        return
    }
    ws.id = resp.WatchId
    w.substreams[ws.id] = ws
}

func (w *watchGrpcStream) sendCloseSubstream(ws *watcherStream, resp *WatchResponse) {
    select {
    case ws.outc <- *resp:
    case <-ws.initReq.ctx.Done():
    case <-time.After(closeSendErrTimeout):
    }
    close(ws.outc)
}

func (w *watchGrpcStream) closeSubstream(ws *watcherStream) {
    // send channel response in case stream was never established
    select {
    case ws.initReq.retc <- ws.outc:
    default:
    }
    // close subscriber's channel
    if closeErr := w.closeErr; closeErr != nil && ws.initReq.ctx.Err() == nil {
        go w.sendCloseSubstream(ws, &WatchResponse{closeErr: w.closeErr})
    } else if ws.outc != nil {
        close(ws.outc)
    }
    if ws.id != -1 {
        delete(w.substreams, ws.id)
        return
    }
    for i := range w.resuming {
        if w.resuming[i] == ws {
            w.resuming[i] = nil
            return
        }
    }
}

// run is the root of the goroutines for managing a watcher client
func (w *watchGrpcStream) run() {
    var wc pb.Watch_WatchClient
    var closeErr error

    // substreams marked to close but goroutine still running; needed for
    // avoiding double-closing recvc on grpc stream teardown
    closing := make(map[*watcherStream]struct{})

    defer func() {
        w.closeErr = closeErr
        // shutdown substreams and resuming substreams
        for _, ws := range w.substreams {
            if _, ok := closing[ws]; !ok {
                close(ws.recvc)
                closing[ws] = struct{}{}
            }
        }
        for _, ws := range w.resuming {
            if _, ok := closing[ws]; ws != nil && !ok {
                close(ws.recvc)
                closing[ws] = struct{}{}
            }
        }
        w.joinSubstreams()
        for range closing {
            w.closeSubstream(<-w.closingc)
        }

        w.owner.closeStream(w)
    }()

    // start a stream with the etcd grpc server
    if wc, closeErr = w.newWatchClient(); closeErr != nil {
        return
    }

    cancelSet := make(map[int64]struct{})

    for {
        select {
        // Watch() requested
        case wreq := <-w.reqc:
            outc := make(chan WatchResponse, 1)
            ws := &watcherStream{
                initReq: *wreq,
                id:      -1,
                outc:    outc,
                // unbufffered so resumes won't cause repeat events
                recvc: make(chan *WatchResponse),
            }

            ws.donec = make(chan struct{})
            go w.serveSubstream(ws, w.resumec)

            // queue up for watcher creation/resume
            w.resuming = append(w.resuming, ws)
            if len(w.resuming) == 1 {
                // head of resume queue, can register a new watcher
                wc.Send(ws.initReq.toPB())
            }
        // New events from the watch client
        case pbresp := <-w.respc:
            switch {
            case pbresp.Created:
                // response to head of queue creation
                if ws := w.resuming[0]; ws != nil {
                    w.addSubstream(pbresp, ws)
                    w.dispatchEvent(pbresp)
                    w.resuming[0] = nil
                }
                if ws := w.nextResume(); ws != nil {
                    wc.Send(ws.initReq.toPB())
                }
            case pbresp.Canceled:
                delete(cancelSet, pbresp.WatchId)
                if ws, ok := w.substreams[pbresp.WatchId]; ok {
                    // signal to stream goroutine to update closingc
                    close(ws.recvc)
                    closing[ws] = struct{}{}
                }
            default:
                // dispatch to appropriate watch stream
                if ok := w.dispatchEvent(pbresp); ok {
                    break
                }
                // watch response on unexpected watch id; cancel id
                if _, ok := cancelSet[pbresp.WatchId]; ok {
                    break
                }
                cancelSet[pbresp.WatchId] = struct{}{}
                cr := &pb.WatchRequest_CancelRequest{
                    CancelRequest: &pb.WatchCancelRequest{
                        WatchId: pbresp.WatchId,
                    },
                }
                req := &pb.WatchRequest{RequestUnion: cr}
                wc.Send(req)
            }
        // watch client failed to recv; spawn another if possible
        case err := <-w.errc:
            if isHaltErr(w.ctx, err) || toErr(w.ctx, err) == v3rpc.ErrNoLeader {
                closeErr = err
                return
            }
            if wc, closeErr = w.newWatchClient(); closeErr != nil {
                return
            }
            if ws := w.nextResume(); ws != nil {
                wc.Send(ws.initReq.toPB())
            }
            cancelSet = make(map[int64]struct{})
        case <-w.ctx.Done():
            return
        case ws := <-w.closingc:
            w.closeSubstream(ws)
            delete(closing, ws)
            if len(w.substreams)+len(w.resuming) == 0 {
                // no more watchers on this stream, shutdown
                return
            }
        }
    }
}

// nextResume chooses the next resuming to register with the grpc stream. Abandoned
// streams are marked as nil in the queue since the head must wait for its inflight registration.
func (w *watchGrpcStream) nextResume() *watcherStream {
    for len(w.resuming) != 0 {
        if w.resuming[0] != nil {
            return w.resuming[0]
        }
        w.resuming = w.resuming[1:len(w.resuming)]
    }
    return nil
}

// dispatchEvent sends a WatchResponse to the appropriate watcher stream
func (w *watchGrpcStream) dispatchEvent(pbresp *pb.WatchResponse) bool {
    ws, ok := w.substreams[pbresp.WatchId]
    if !ok {
        return false
    }
    events := make([]*Event, len(pbresp.Events))
    for i, ev := range pbresp.Events {
        events[i] = (*Event)(ev)
    }
    wr := &WatchResponse{
        Header:          *pbresp.Header,
        Events:          events,
        CompactRevision: pbresp.CompactRevision,
        Created:         pbresp.Created,
        Canceled:        pbresp.Canceled,
    }
    select {
    case ws.recvc <- wr:
    case <-ws.donec:
        return false
    }
    return true
}

// serveWatchClient forwards messages from the grpc stream to run()
func (w *watchGrpcStream) serveWatchClient(wc pb.Watch_WatchClient) {
    for {
        resp, err := wc.Recv()
        if err != nil {
            select {
            case w.errc <- err:
            case <-w.donec:
            }
            return
        }
        select {
        case w.respc <- resp:
        case <-w.donec:
            return
        }
    }
}

// serveSubstream forwards watch responses from run() to the subscriber
func (w *watchGrpcStream) serveSubstream(ws *watcherStream, resumec chan struct{}) {
    if ws.closing {
        panic("created substream goroutine but substream is closing")
    }

    // nextRev is the minimum expected next revision
    nextRev := ws.initReq.rev
    resuming := false
    defer func() {
        if !resuming {
            ws.closing = true
        }
        close(ws.donec)
        if !resuming {
            w.closingc <- ws
        }
    }()

    emptyWr := &WatchResponse{}
    for {
        curWr := emptyWr
        outc := ws.outc

        if len(ws.buf) > 0 {
            curWr = ws.buf[0]
        } else {
            outc = nil
        }
        select {
        case outc <- *curWr:
            if ws.buf[0].Err() != nil {
                return
            }
            ws.buf[0] = nil
            ws.buf = ws.buf[1:]
        case wr, ok := <-ws.recvc:
            if !ok {
                // shutdown from closeSubstream
                return
            }

            if wr.Created {
                if ws.initReq.retc != nil {
                    ws.initReq.retc <- ws.outc
                    // to prevent next write from taking the slot in buffered channel
                    // and posting duplicate create events
                    ws.initReq.retc = nil

                    // send first creation event only if requested
                    if ws.initReq.createdNotify {
                        ws.outc <- *wr
                    }
                }
            }

            nextRev = wr.Header.Revision
            if len(wr.Events) > 0 {
                nextRev = wr.Events[len(wr.Events)-1].Kv.ModRevision + 1
            }
            ws.initReq.rev = nextRev

            // created event is already sent above,
            // watcher should not post duplicate events
            if wr.Created {
                continue
            }

            // TODO pause channel if buffer gets too large
            ws.buf = append(ws.buf, wr)
        case <-w.ctx.Done():
            return
        case <-ws.initReq.ctx.Done():
            return
        case <-resumec:
            resuming = true
            return
        }
    }
    // lazily send cancel message if events on missing id
}

func (w *watchGrpcStream) newWatchClient() (pb.Watch_WatchClient, error) {
    // mark all substreams as resuming
    close(w.resumec)
    w.resumec = make(chan struct{})
    w.joinSubstreams()
    for _, ws := range w.substreams {
        ws.id = -1
        w.resuming = append(w.resuming, ws)
    }
    // strip out nils, if any
    var resuming []*watcherStream
    for _, ws := range w.resuming {
        if ws != nil {
            resuming = append(resuming, ws)
        }
    }
    w.resuming = resuming
    w.substreams = make(map[int64]*watcherStream)

    // connect to grpc stream while accepting watcher cancelation
    stopc := make(chan struct{})
    donec := w.waitCancelSubstreams(stopc)
    wc, err := w.openWatchClient()
    close(stopc)
    <-donec

    // serve all non-closing streams, even if there's a client error
    // so that the teardown path can shutdown the streams as expected.
    for _, ws := range w.resuming {
        if ws.closing {
            continue
        }
        ws.donec = make(chan struct{})
        go w.serveSubstream(ws, w.resumec)
    }

    if err != nil {
        return nil, v3rpc.Error(err)
    }

    // receive data from new grpc stream
    go w.serveWatchClient(wc)
    return wc, nil
}

func (w *watchGrpcStream) waitCancelSubstreams(stopc <-chan struct{}) <-chan struct{} {
    var wg sync.WaitGroup
    wg.Add(len(w.resuming))
    donec := make(chan struct{})
    for i := range w.resuming {
        go func(ws *watcherStream) {
            defer wg.Done()
            if ws.closing {
                return
            }
            select {
            case <-ws.initReq.ctx.Done():
                // closed ws will be removed from resuming
                ws.closing = true
                close(ws.outc)
                ws.outc = nil
                go func() { w.closingc <- ws }()
            case <-stopc:
            }
        }(w.resuming[i])
    }
    go func() {
        defer close(donec)
        wg.Wait()
    }()
    return donec
}

// joinSubstream waits for all substream goroutines to complete
func (w *watchGrpcStream) joinSubstreams() {
    for _, ws := range w.substreams {
        <-ws.donec
    }
    for _, ws := range w.resuming {
        if ws != nil {
            <-ws.donec
        }
    }
}

// openWatchClient retries opening a watchclient until retryConnection fails
func (w *watchGrpcStream) openWatchClient() (ws pb.Watch_WatchClient, err error) {
    for {
        select {
        case <-w.ctx.Done():
            if err == nil {
                return nil, w.ctx.Err()
            }
            return nil, err
        default:
        }
        if ws, err = w.remote.Watch(w.ctx, grpc.FailFast(false)); ws != nil && err == nil {
            break
        }
        if isHaltErr(w.ctx, err) {
            return nil, v3rpc.Error(err)
        }
    }
    return ws, nil
}

// toPB converts an internal watch request structure to its protobuf messagefunc (wr *watchRequest)
func (wr *watchRequest) toPB() *pb.WatchRequest {
    req := &pb.WatchCreateRequest{
        StartRevision:  wr.rev,
        Key:            []byte(wr.key),
        RangeEnd:       []byte(wr.end),
        ProgressNotify: wr.progressNotify,
        Filters:        wr.filters,
        PrevKv:         wr.prevKV,
    }
    cr := &pb.WatchRequest_CreateRequest{CreateRequest: req}
    return &pb.WatchRequest{RequestUnion: cr}
}

随机推荐

  1. db2字段修改

    db2表字段修改 1:删除字段非空属性alter table XXX alter column XXX drop not null 2:添加字段非空属性alter table XXX alter co ...

  2. Java小技巧输出26个英文字母

    相信有的童鞋写到过与字母有关的小东西,是否有写过全部的字母呢?26个这么多字母,一个个打会疯掉.所有咱们可以用一个小技巧使用for循环帮我们把26个字母自动搞出来,大家来瞅一眼把! 使用Java遍历2 ...

  3. IOS常用第三方库《转》

    UI 动画 网络相关 Model 其他 数据库 缓存处理 PDF 图像浏览及处理 摄像照相视频音频处理 响应式框架 消息相关 版本新API的Demo 代码安全与密码 测试及调试 AppleWatch ...

  4. jdk1.7 tomcat-7安装

    由于软件下载地址经常有变动,所以不能直接wget,还是直接到网上点击下载 下载jdk http://www.oracle.com/technetwork/java/javase/downloads/j ...

  5. 排序算法入门之选择排序-Java实现

    本文参考http://blog.csdn.net/m0_37568091/article/details/78023705 选择排序是先从对象数组中选出最小的放在第一个位置,再从剩下的元素中选择次小的 ...

  6. rotate image(旋转数组)

    You are given an n x n 2D matrix representing an image. Rotate the image by 90 degrees (clockwise). ...

  7. two sum II

    Given an array of integers that is already sorted in ascending order, find two numbers such that the ...

  8. 聊聊Unity的Gamma校正以及线性工作流

    0x00 前言的前言 这篇小文其实是在清明节前后起的头,不过后来一度搁笔.一直到这周末才又想起来起的这个头还没有写完,所以还是直接用一个月前的开头,再将过程和结尾补齐. 0x01 前言 结束了在南方一 ...

  9. Mybatis 系列8

    上篇系列7 介绍了insert.update.delete的用法, 本篇将介绍select.resultMap的用法. select无疑是我们最常用,也是最复杂的,mybatis通过resultMap ...

  10. Android两级嵌套ListView滑动问题的解决

    Android下面两级嵌套ListView会出现滑动失效,解决方案,把两级Listview全换成NoScrollListView,代码如下: public class NoScrollListView ...