From 31583b5d6ca023edc95039815f05c4cfffeb02ad Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Tue, 28 Apr 2020 17:29:10 -0700 Subject: [PATCH] master and volume server: avoid race condition The volume server may disconnect and reconnect to the same master. The master's unregistration may happen after the reconnection. Thus the volume server will disappear. --- weed/server/master_grpc_server.go | 4 +++- weed/server/volume_grpc_client_to_master.go | 3 +++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/weed/server/master_grpc_server.go b/weed/server/master_grpc_server.go index 9f5bf569d..1ee214deb 100644 --- a/weed/server/master_grpc_server.go +++ b/weed/server/master_grpc_server.go @@ -24,8 +24,10 @@ func (ms *MasterServer) SendHeartbeat(stream master_pb.Seaweed_SendHeartbeatServ defer func() { if dn != nil { - glog.V(0).Infof("unregister disconnected volume server %s:%d", dn.Ip, dn.Port) + // if the volume server disconnects and reconnects quickly + // the unregister and register can race with each other t.UnRegisterDataNode(dn) + glog.V(0).Infof("unregister disconnected volume server %s:%d", dn.Ip, dn.Port) message := &master_pb.VolumeLocation{ Url: dn.Url(), diff --git a/weed/server/volume_grpc_client_to_master.go b/weed/server/volume_grpc_client_to_master.go index 517eb4bc0..7cb836344 100644 --- a/weed/server/volume_grpc_client_to_master.go +++ b/weed/server/volume_grpc_client_to_master.go @@ -35,6 +35,9 @@ func (vs *VolumeServer) heartbeat() { for { for _, master := range vs.SeedMasterNodes { if newLeader != "" { + // the new leader may actually is the same master + // need to wait a bit before adding itself + time.Sleep(3 * time.Second) master = newLeader } masterGrpcAddress, parseErr := pb.ParseServerToGrpcAddress(master)