From 853c4de75a7005d0c8b744055edc70d6c1c6e122 Mon Sep 17 00:00:00 2001 From: Harshavardhana Date: Wed, 16 Nov 2022 07:59:10 -0800 Subject: [PATCH] allow changing endpoints in distributed setups (#16071) --- cmd/bootstrap-peer-server.go | 22 +++++++++++++++------- cmd/erasure-server-pool-decom.go | 15 ++++----------- cmd/erasure-server-pool-decom_test.go | 12 ++++++------ cmd/server-main.go | 2 +- 4 files changed, 26 insertions(+), 25 deletions(-) diff --git a/cmd/bootstrap-peer-server.go b/cmd/bootstrap-peer-server.go index e0e1dd90f..4e213184c 100644 --- a/cmd/bootstrap-peer-server.go +++ b/cmd/bootstrap-peer-server.go @@ -1,4 +1,4 @@ -// Copyright (c) 2015-2021 MinIO, Inc. +// Copyright (c) 2015-2022 MinIO, Inc. // // This file is part of MinIO Object Storage stack // @@ -64,6 +64,7 @@ func (s1 ServerSystemConfig) Diff(s2 ServerSystemConfig) error { return fmt.Errorf("Expected platform '%s', found to be running '%s'", s1.MinioPlatform, s2.MinioPlatform) } + if s1.MinioEndpoints.NEndpoints() != s2.MinioEndpoints.NEndpoints() { return fmt.Errorf("Expected number of endpoints %d, seen %d", s1.MinioEndpoints.NEndpoints(), s2.MinioEndpoints.NEndpoints()) @@ -200,15 +201,18 @@ func verifyServerSystemConfig(ctx context.Context, endpointServerPools EndpointS srcCfg := getServerSystemCfg() clnts := newBootstrapRESTClients(endpointServerPools) var onlineServers int - var offlineEndpoints []string + var offlineEndpoints []error + var incorrectConfigs []error var retries int for onlineServers < len(clnts)/2 { for _, clnt := range clnts { if err := clnt.Verify(ctx, srcCfg); err != nil { if !isNetworkError(err) { - logger.LogIf(ctx, fmt.Errorf("%s has incorrect configuration: %w", clnt.String(), err)) + logger.LogOnceIf(ctx, fmt.Errorf("%s has incorrect configuration: %w", clnt.String(), err), clnt.String()) + incorrectConfigs = append(incorrectConfigs, fmt.Errorf("%s has incorrect configuration: %w", clnt.String(), err)) + } else { + offlineEndpoints = append(offlineEndpoints, fmt.Errorf("%s is unreachable: %w", clnt.String(), err)) } - offlineEndpoints = append(offlineEndpoints, clnt.String()) continue } onlineServers++ @@ -221,15 +225,19 @@ func verifyServerSystemConfig(ctx context.Context, endpointServerPools EndpointS // 100% CPU when half the endpoints are offline. time.Sleep(100 * time.Millisecond) retries++ - // after 5 retries start logging that servers are not reachable yet - if retries >= 5 { - logger.Info(fmt.Sprintf("Waiting for atleast %d remote servers to be online for bootstrap check", len(clnts)/2)) + // after 20 retries start logging that servers are not reachable yet + if retries >= 20 { + logger.Info(fmt.Sprintf("Waiting for atleast %d remote servers with valid configuration to be online", len(clnts)/2)) if len(offlineEndpoints) > 0 { logger.Info(fmt.Sprintf("Following servers are currently offline or unreachable %s", offlineEndpoints)) } + if len(incorrectConfigs) > 0 { + logger.Info(fmt.Sprintf("Following servers mismatch in their configuration %s", incorrectConfigs)) + } retries = 0 // reset to log again after 5 retries. } offlineEndpoints = nil + incorrectConfigs = nil } } return nil diff --git a/cmd/erasure-server-pool-decom.go b/cmd/erasure-server-pool-decom.go index dd9f4446a..5f76bc9d3 100644 --- a/cmd/erasure-server-pool-decom.go +++ b/cmd/erasure-server-pool-decom.go @@ -317,7 +317,7 @@ func (p *poolMeta) validate(pools []*erasureSets) (bool, error) { } replaceScheme := func(k string) string { - // This is needed as fallback when users are changeing + // This is needed as fallback when users are updating // from http->https or https->http, we need to verify // both because MinIO remembers the command-line in // "exact" order - as long as this order is not disturbed @@ -359,11 +359,7 @@ func (p *poolMeta) validate(pools []*erasureSets) (bool, error) { } } if !ok { - if globalIsErasureSD { - update = true - } else { - return false, fmt.Errorf("pool(%s) = %s is not specified, please specify on server command line", humanize.Ordinal(pi.position+1), k) - } + update = true } } @@ -378,11 +374,7 @@ func (p *poolMeta) validate(pools []*erasureSets) (bool, error) { } } if !ok { - if globalIsErasureSD { - update = true - } else { - return false, fmt.Errorf("pool(%s) = %s is not specified, please specify on server command line", humanize.Ordinal(pi.position+1), k) - } + update = true } if ok && pos != pi.position { return false, fmt.Errorf("pool order change detected for %s, expected position is (%s) but found (%s)", k, humanize.Ordinal(pi.position+1), humanize.Ordinal(pos+1)) @@ -400,6 +392,7 @@ func (p *poolMeta) validate(pools []*erasureSets) (bool, error) { } } } + return update, nil } diff --git a/cmd/erasure-server-pool-decom_test.go b/cmd/erasure-server-pool-decom_test.go index c4523908e..a97ea0bf3 100644 --- a/cmd/erasure-server-pool-decom_test.go +++ b/cmd/erasure-server-pool-decom_test.go @@ -112,16 +112,16 @@ func TestPoolMetaValidate(t *testing.T) { { meta: meta, pools: newPools, - name: "Invalid-Commandline", - expectedErr: true, - expectedUpdate: false, + name: "Correct-Update", + expectedErr: false, + expectedUpdate: true, }, { meta: meta, pools: reducedPools, - name: "Invalid-Reduced", - expectedErr: true, - expectedUpdate: false, + name: "Correct-Update", + expectedErr: false, + expectedUpdate: true, }, { meta: meta, diff --git a/cmd/server-main.go b/cmd/server-main.go index 77c3e92ac..e9d34f40a 100644 --- a/cmd/server-main.go +++ b/cmd/server-main.go @@ -568,7 +568,7 @@ func serverMain(ctx *cli.Context) { setHTTPServer(httpServer) - if globalIsDistErasure && globalEndpoints.FirstLocal() { + if globalIsDistErasure { // Additionally in distributed setup, validate the setup and configuration. if err := verifyServerSystemConfig(GlobalContext, globalEndpoints); err != nil { logger.Fatal(err, "Unable to start the server")