You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

288 lines
8.4 KiB

  1. // Copyright (c) 2015-2022 MinIO, Inc.
  2. //
  3. // This file is part of MinIO Object Storage stack
  4. //
  5. // This program is free software: you can redistribute it and/or modify
  6. // it under the terms of the GNU Affero General Public License as published by
  7. // the Free Software Foundation, either version 3 of the License, or
  8. // (at your option) any later version.
  9. //
  10. // This program is distributed in the hope that it will be useful
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU Affero General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. package cmd
  18. import (
  19. "context"
  20. "crypto/md5"
  21. "encoding/hex"
  22. "errors"
  23. "fmt"
  24. "io"
  25. "math/rand"
  26. "os"
  27. "reflect"
  28. "strings"
  29. "sync"
  30. "time"
  31. "github.com/minio/minio-go/v7/pkg/set"
  32. "github.com/minio/minio/internal/grid"
  33. "github.com/minio/minio/internal/logger"
  34. "github.com/minio/pkg/v3/env"
  35. )
  36. // To abstract a node over network.
  37. type bootstrapRESTServer struct{}
  38. //go:generate msgp -file=$GOFILE
  39. // ServerSystemConfig - captures information about server configuration.
  40. type ServerSystemConfig struct {
  41. NEndpoints int
  42. CmdLines []string
  43. MinioEnv map[string]string
  44. Checksum string
  45. }
  46. // Diff - returns error on first difference found in two configs.
  47. func (s1 *ServerSystemConfig) Diff(s2 *ServerSystemConfig) error {
  48. if s1.Checksum != s2.Checksum {
  49. return fmt.Errorf("Expected MinIO binary checksum: %s, seen: %s", s1.Checksum, s2.Checksum)
  50. }
  51. ns1 := s1.NEndpoints
  52. ns2 := s2.NEndpoints
  53. if ns1 != ns2 {
  54. return fmt.Errorf("Expected number of endpoints %d, seen %d", ns1, ns2)
  55. }
  56. for i, cmdLine := range s1.CmdLines {
  57. if cmdLine != s2.CmdLines[i] {
  58. return fmt.Errorf("Expected command line argument %s, seen %s", cmdLine,
  59. s2.CmdLines[i])
  60. }
  61. }
  62. if reflect.DeepEqual(s1.MinioEnv, s2.MinioEnv) {
  63. return nil
  64. }
  65. // Report differences in environment variables.
  66. var missing []string
  67. var mismatching []string
  68. for k, v := range s1.MinioEnv {
  69. ev, ok := s2.MinioEnv[k]
  70. if !ok {
  71. missing = append(missing, k)
  72. } else if v != ev {
  73. mismatching = append(mismatching, k)
  74. }
  75. }
  76. var extra []string
  77. for k := range s2.MinioEnv {
  78. _, ok := s1.MinioEnv[k]
  79. if !ok {
  80. extra = append(extra, k)
  81. }
  82. }
  83. msg := "Expected MINIO_* environment name and values across all servers to be same: "
  84. if len(missing) > 0 {
  85. msg += fmt.Sprintf(`Missing environment values: %v. `, missing)
  86. }
  87. if len(mismatching) > 0 {
  88. msg += fmt.Sprintf(`Mismatching environment values: %v. `, mismatching)
  89. }
  90. if len(extra) > 0 {
  91. msg += fmt.Sprintf(`Extra environment values: %v. `, extra)
  92. }
  93. return errors.New(strings.TrimSpace(msg))
  94. }
  95. var skipEnvs = map[string]struct{}{
  96. "MINIO_OPTS": {},
  97. "MINIO_CERT_PASSWD": {},
  98. "MINIO_SERVER_DEBUG": {},
  99. "MINIO_DSYNC_TRACE": {},
  100. "MINIO_ROOT_USER": {},
  101. "MINIO_ROOT_PASSWORD": {},
  102. "MINIO_ACCESS_KEY": {},
  103. "MINIO_SECRET_KEY": {},
  104. "MINIO_OPERATOR_VERSION": {},
  105. "MINIO_VSPHERE_PLUGIN_VERSION": {},
  106. "MINIO_CI_CD": {},
  107. }
  108. func getServerSystemCfg() *ServerSystemConfig {
  109. envs := env.List("MINIO_")
  110. envValues := make(map[string]string, len(envs))
  111. for _, envK := range envs {
  112. // skip certain environment variables as part
  113. // of the whitelist and could be configured
  114. // differently on each nodes, update skipEnvs()
  115. // map if there are such environment values
  116. if _, ok := skipEnvs[envK]; ok {
  117. continue
  118. }
  119. envValues[envK] = logger.HashString(env.Get(envK, ""))
  120. }
  121. scfg := &ServerSystemConfig{NEndpoints: globalEndpoints.NEndpoints(), MinioEnv: envValues, Checksum: binaryChecksum}
  122. var cmdLines []string
  123. for _, ep := range globalEndpoints {
  124. cmdLines = append(cmdLines, ep.CmdLine)
  125. }
  126. scfg.CmdLines = cmdLines
  127. return scfg
  128. }
  129. func (s *bootstrapRESTServer) VerifyHandler(params *grid.MSS) (*ServerSystemConfig, *grid.RemoteErr) {
  130. return getServerSystemCfg(), nil
  131. }
  132. var serverVerifyHandler = grid.NewSingleHandler[*grid.MSS, *ServerSystemConfig](grid.HandlerServerVerify, grid.NewMSS, func() *ServerSystemConfig { return &ServerSystemConfig{} })
  133. // registerBootstrapRESTHandlers - register bootstrap rest router.
  134. func registerBootstrapRESTHandlers(gm *grid.Manager) {
  135. server := &bootstrapRESTServer{}
  136. logger.FatalIf(serverVerifyHandler.Register(gm, server.VerifyHandler), "unable to register handler")
  137. }
  138. // client to talk to bootstrap NEndpoints.
  139. type bootstrapRESTClient struct {
  140. gridConn *grid.Connection
  141. }
  142. // Verify function verifies the server config.
  143. func (client *bootstrapRESTClient) Verify(ctx context.Context, srcCfg *ServerSystemConfig) (err error) {
  144. if newObjectLayerFn() != nil {
  145. return nil
  146. }
  147. recvCfg, err := serverVerifyHandler.Call(ctx, client.gridConn, grid.NewMSS())
  148. if err != nil {
  149. return err
  150. }
  151. // We do not need the response after returning.
  152. defer serverVerifyHandler.PutResponse(recvCfg)
  153. return srcCfg.Diff(recvCfg)
  154. }
  155. // Stringer provides a canonicalized representation of node.
  156. func (client *bootstrapRESTClient) String() string {
  157. return client.gridConn.String()
  158. }
  159. var binaryChecksum = getBinaryChecksum()
  160. func getBinaryChecksum() string {
  161. mw := md5.New()
  162. binPath, err := os.Executable()
  163. if err != nil {
  164. logger.Error("Calculating checksum failed: %s", err)
  165. return "00000000000000000000000000000000"
  166. }
  167. b, err := os.Open(binPath)
  168. if err != nil {
  169. logger.Error("Calculating checksum failed: %s", err)
  170. return "00000000000000000000000000000000"
  171. }
  172. defer b.Close()
  173. io.Copy(mw, b)
  174. return hex.EncodeToString(mw.Sum(nil))
  175. }
  176. func verifyServerSystemConfig(ctx context.Context, endpointServerPools EndpointServerPools, gm *grid.Manager) error {
  177. srcCfg := getServerSystemCfg()
  178. clnts := newBootstrapRESTClients(endpointServerPools, gm)
  179. var onlineServers int
  180. var offlineEndpoints []error
  181. var incorrectConfigs []error
  182. var retries int
  183. var mu sync.Mutex
  184. for onlineServers < len(clnts)/2 {
  185. var wg sync.WaitGroup
  186. wg.Add(len(clnts))
  187. onlineServers = 0
  188. for _, clnt := range clnts {
  189. go func(clnt *bootstrapRESTClient) {
  190. defer wg.Done()
  191. if clnt.gridConn.State() != grid.StateConnected {
  192. mu.Lock()
  193. offlineEndpoints = append(offlineEndpoints, fmt.Errorf("%s is unreachable: %w", clnt, grid.ErrDisconnected))
  194. mu.Unlock()
  195. return
  196. }
  197. ctx, cancel := context.WithTimeout(ctx, 2*time.Second)
  198. defer cancel()
  199. err := clnt.Verify(ctx, srcCfg)
  200. mu.Lock()
  201. if err != nil {
  202. bootstrapTraceMsg(fmt.Sprintf("bootstrapVerify: %v, endpoint: %s", err, clnt))
  203. if !isNetworkError(err) {
  204. bootLogOnceIf(context.Background(), fmt.Errorf("%s has incorrect configuration: %w", clnt, err), "incorrect_"+clnt.String())
  205. incorrectConfigs = append(incorrectConfigs, fmt.Errorf("%s has incorrect configuration: %w", clnt, err))
  206. } else {
  207. offlineEndpoints = append(offlineEndpoints, fmt.Errorf("%s is unreachable: %w", clnt, err))
  208. }
  209. } else {
  210. onlineServers++
  211. }
  212. mu.Unlock()
  213. }(clnt)
  214. }
  215. wg.Wait()
  216. select {
  217. case <-ctx.Done():
  218. return ctx.Err()
  219. default:
  220. // Sleep and stagger to avoid blocked CPU and thundering
  221. // herd upon start up sequence.
  222. time.Sleep(25*time.Millisecond + time.Duration(rand.Int63n(int64(100*time.Millisecond))))
  223. retries++
  224. // after 20 retries start logging that servers are not reachable yet
  225. if retries >= 20 {
  226. logger.Info(fmt.Sprintf("Waiting for at least %d remote servers with valid configuration to be online", len(clnts)/2))
  227. if len(offlineEndpoints) > 0 {
  228. logger.Info(fmt.Sprintf("Following servers are currently offline or unreachable %s", offlineEndpoints))
  229. }
  230. if len(incorrectConfigs) > 0 {
  231. logger.Info(fmt.Sprintf("Following servers have mismatching configuration %s", incorrectConfigs))
  232. }
  233. retries = 0 // reset to log again after 20 retries.
  234. }
  235. offlineEndpoints = nil
  236. incorrectConfigs = nil
  237. }
  238. }
  239. return nil
  240. }
  241. func newBootstrapRESTClients(endpointServerPools EndpointServerPools, gm *grid.Manager) []*bootstrapRESTClient {
  242. seenClient := set.NewStringSet()
  243. var clnts []*bootstrapRESTClient
  244. for _, ep := range endpointServerPools {
  245. for _, endpoint := range ep.Endpoints {
  246. if endpoint.IsLocal {
  247. continue
  248. }
  249. if seenClient.Contains(endpoint.Host) {
  250. continue
  251. }
  252. seenClient.Add(endpoint.Host)
  253. clnts = append(clnts, &bootstrapRESTClient{gm.Connection(endpoint.GridHost())})
  254. }
  255. }
  256. return clnts
  257. }