Skip to content

Commit

Permalink
global rebalance vs targets that are being decommissioned
Browse files Browse the repository at this point in the history
from the _rebalancing_ perspective, a target node that is in maintenaince mode or
that is being decommissioned must still be considered "active"
                       _unless_
this target has already reached post-rebalancing (`SnodeMaintPostReb`) state

Signed-off-by: Alex Aizman <[email protected]>
  • Loading branch information
alex-aizman committed Aug 27, 2024
1 parent 31d1a79 commit 56f7347
Show file tree
Hide file tree
Showing 4 changed files with 26 additions and 10 deletions.
2 changes: 1 addition & 1 deletion cmd/cli/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module github.com/NVIDIA/aistore/cmd/cli
go 1.22.3

require (
github.com/NVIDIA/aistore v1.3.24-0.20240826235310-8c273cfa0d36
github.com/NVIDIA/aistore v1.3.24-0.20240827150748-31d1a799f7e5
github.com/fatih/color v1.17.0
github.com/json-iterator/go v1.1.12
github.com/onsi/ginkgo/v2 v2.20.0
Expand Down
4 changes: 2 additions & 2 deletions cmd/cli/go.sum
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
code.cloudfoundry.org/bytefmt v0.0.0-20190710193110-1eb035ffe2b6/go.mod h1:wN/zk7mhREp/oviagqUXY3EwuHhWyOvAdsn5Y4CzOrc=
github.com/BurntSushi/toml v1.3.2/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/NVIDIA/aistore v1.3.24-0.20240826235310-8c273cfa0d36 h1:6WbWE3vqkTVP4i1hnHqye3yktBQaD4KDtJ0MUdcmc64=
github.com/NVIDIA/aistore v1.3.24-0.20240826235310-8c273cfa0d36/go.mod h1:si83S9r29vwIC0f0CE2Mk+25bFiaN6mmVlmuBpP4hHM=
github.com/NVIDIA/aistore v1.3.24-0.20240827150748-31d1a799f7e5 h1:ZgEB37pn2584FDlJdKPMw3AKWVfgNJL24QhPmKxRA+0=
github.com/NVIDIA/aistore v1.3.24-0.20240827150748-31d1a799f7e5/go.mod h1:si83S9r29vwIC0f0CE2Mk+25bFiaN6mmVlmuBpP4hHM=
github.com/OneOfOne/xxhash v1.2.8 h1:31czK/TI9sNkxIKfaUfGlU47BAxQ0ztGgd9vPyqimf8=
github.com/OneOfOne/xxhash v1.2.8/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/VividCortex/ewma v1.1.1/go.mod h1:2Tkkvm3sRDVXaiyucHiACn4cqf7DpdyLvmxzcbUokwA=
Expand Down
16 changes: 16 additions & 0 deletions core/meta/smap.go
Original file line number Diff line number Diff line change
Expand Up @@ -461,6 +461,22 @@ func (m *Smap) HasActiveTs(except string) bool {
return false
}

func (m *Smap) HasPeersToRebalance(except string) bool {
for tid, t := range m.Tmap {
if tid == except {
continue
}
if !t.InMaintOrDecomm() {
return true
}
// is a "peer" if still transitioning to post-rebalance state
if !t.Flags.IsSet(SnodeMaintPostReb) {
return true
}
}
return false
}

func (m *Smap) CountActivePs() (count int) {
for _, p := range m.Pmap {
if !p.InMaintOrDecomm() {
Expand Down
14 changes: 7 additions & 7 deletions reb/globrun.go
Original file line number Diff line number Diff line change
Expand Up @@ -196,7 +196,7 @@ func (reb *Reb) RunRebalance(smap *meta.Smap, id int64, notif *xact.NotifXact, t

reb.regRecv()

haveStreams := smap.HasActiveTs(core.T.SID())
haveStreams := smap.HasPeersToRebalance(core.T.SID())
if bmd.IsEmpty() {
haveStreams = false
}
Expand Down Expand Up @@ -552,16 +552,16 @@ func (reb *Reb) runNoEC(rargs *rebArgs) error {

func (reb *Reb) rebWaitAck(rargs *rebArgs) (errCnt int) {
var (
cnt int
logHdr = reb.logHdr(rargs.id, rargs.smap)
sleep = rargs.config.Timeout.CplaneOperation.D()
maxwt = rargs.config.Rebalance.DestRetryTime.D()
xreb = reb.xctn()
smap = rargs.smap
cnt int
sleep = rargs.config.Timeout.CplaneOperation.D()
maxwt = rargs.config.Rebalance.DestRetryTime.D()
xreb = reb.xctn()
smap = rargs.smap
)
maxwt += time.Duration(int64(time.Minute) * int64(rargs.smap.CountTargets()/10))
maxwt = min(maxwt, rargs.config.Rebalance.DestRetryTime.D()*2)
reb.changeStage(rebStageWaitAck)
logHdr := reb.logHdr(rargs.id, rargs.smap)

for {
curwt := time.Duration(0)
Expand Down

0 comments on commit 56f7347

Please sign in to comment.