You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

420 lines
10 KiB

  1. // Copyright (c) 2015-2021 MinIO, Inc.
  2. //
  3. // This file is part of MinIO Object Storage stack
  4. //
  5. // This program is free software: you can redistribute it and/or modify
  6. // it under the terms of the GNU Affero General Public License as published by
  7. // the Free Software Foundation, either version 3 of the License, or
  8. // (at your option) any later version.
  9. //
  10. // This program is distributed in the hope that it will be useful
  11. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  12. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13. // GNU Affero General Public License for more details.
  14. //
  15. // You should have received a copy of the GNU Affero General Public License
  16. // along with this program. If not, see <http://www.gnu.org/licenses/>.
  17. package cmd
  18. import (
  19. "math"
  20. "net/http"
  21. "os"
  22. "runtime"
  23. "slices"
  24. "strconv"
  25. "strings"
  26. "sync"
  27. "time"
  28. "github.com/dustin/go-humanize"
  29. "github.com/shirou/gopsutil/v3/mem"
  30. "github.com/minio/minio/internal/config/api"
  31. xioutil "github.com/minio/minio/internal/ioutil"
  32. "github.com/minio/minio/internal/logger"
  33. "github.com/minio/minio/internal/mcontext"
  34. )
  35. type apiConfig struct {
  36. mu sync.RWMutex
  37. requestsPool chan struct{}
  38. clusterDeadline time.Duration
  39. listQuorum string
  40. corsAllowOrigins []string
  41. replicationPriority string
  42. replicationMaxWorkers int
  43. replicationMaxLWorkers int
  44. transitionWorkers int
  45. staleUploadsExpiry time.Duration
  46. staleUploadsCleanupInterval time.Duration
  47. deleteCleanupInterval time.Duration
  48. enableODirect bool
  49. gzipObjects bool
  50. rootAccess bool
  51. syncEvents bool
  52. objectMaxVersions int64
  53. }
  54. const (
  55. cgroupV1MemLimitFile = "/sys/fs/cgroup/memory/memory.limit_in_bytes"
  56. cgroupV2MemLimitFile = "/sys/fs/cgroup/memory.max"
  57. )
  58. func cgroupMemLimit() (limit uint64) {
  59. buf, err := os.ReadFile(cgroupV2MemLimitFile)
  60. if err != nil {
  61. buf, err = os.ReadFile(cgroupV1MemLimitFile)
  62. }
  63. if err != nil {
  64. return 0
  65. }
  66. limit, err = strconv.ParseUint(strings.TrimSpace(string(buf)), 10, 64)
  67. if err != nil {
  68. // The kernel can return valid but non integer values
  69. // but still, no need to interpret more
  70. return 0
  71. }
  72. if limit >= 100*humanize.TiByte {
  73. // No limit set, or unreasonably high. Ignore
  74. return 0
  75. }
  76. return limit
  77. }
  78. func availableMemory() (available uint64) {
  79. available = 2048 * blockSizeV2 * 2 // Default to 4 GiB when we can't find the limits.
  80. if runtime.GOOS == "linux" {
  81. // Honor cgroup limits if set.
  82. limit := cgroupMemLimit()
  83. if limit > 0 {
  84. // A valid value is found, return its 90%
  85. available = (limit * 9) / 10
  86. return
  87. }
  88. } // for all other platforms limits are based on virtual memory.
  89. memStats, err := mem.VirtualMemory()
  90. if err != nil {
  91. return
  92. }
  93. // A valid value is available return its 90%
  94. available = (memStats.Available * 9) / 10
  95. return
  96. }
  97. func (t *apiConfig) init(cfg api.Config, setDriveCounts []int, legacy bool) {
  98. t.mu.Lock()
  99. defer t.mu.Unlock()
  100. clusterDeadline := cfg.ClusterDeadline
  101. if clusterDeadline == 0 {
  102. clusterDeadline = 10 * time.Second
  103. }
  104. t.clusterDeadline = clusterDeadline
  105. corsAllowOrigin := cfg.CorsAllowOrigin
  106. if len(corsAllowOrigin) == 0 {
  107. corsAllowOrigin = []string{"*"}
  108. }
  109. t.corsAllowOrigins = corsAllowOrigin
  110. var apiRequestsMaxPerNode int
  111. if cfg.RequestsMax <= 0 {
  112. maxSetDrives := slices.Max(setDriveCounts)
  113. // Returns 75% of max memory allowed
  114. maxMem := globalServerCtxt.MemLimit
  115. // max requests per node is calculated as
  116. // total_ram / ram_per_request
  117. blockSize := xioutil.LargeBlock + xioutil.SmallBlock
  118. if legacy {
  119. // ram_per_request is (1MiB+32KiB) * driveCount \
  120. // + 2 * 10MiB (default erasure block size v1) + 2 * 1MiB (default erasure block size v2)
  121. apiRequestsMaxPerNode = int(maxMem / uint64(maxSetDrives*blockSize+int(blockSizeV1*2+blockSizeV2*2)))
  122. } else {
  123. // ram_per_request is (1MiB+32KiB) * driveCount \
  124. // + 2 * 1MiB (default erasure block size v2)
  125. apiRequestsMaxPerNode = int(maxMem / uint64(maxSetDrives*blockSize+int(blockSizeV2*2)))
  126. }
  127. } else {
  128. apiRequestsMaxPerNode = cfg.RequestsMax
  129. if n := totalNodeCount(); n > 0 {
  130. apiRequestsMaxPerNode /= n
  131. }
  132. }
  133. if globalIsDistErasure {
  134. logger.Info("Configured max API requests per node based on available memory: %d", apiRequestsMaxPerNode)
  135. }
  136. if cap(t.requestsPool) != apiRequestsMaxPerNode {
  137. // Only replace if needed.
  138. // Existing requests will use the previous limit,
  139. // but new requests will use the new limit.
  140. // There will be a short overlap window,
  141. // but this shouldn't last long.
  142. t.requestsPool = make(chan struct{}, apiRequestsMaxPerNode)
  143. }
  144. listQuorum := cfg.ListQuorum
  145. if listQuorum == "" {
  146. listQuorum = "strict"
  147. }
  148. t.listQuorum = listQuorum
  149. if r := globalReplicationPool.GetNonBlocking(); r != nil &&
  150. (cfg.ReplicationPriority != t.replicationPriority || cfg.ReplicationMaxWorkers != t.replicationMaxWorkers || cfg.ReplicationMaxLWorkers != t.replicationMaxLWorkers) {
  151. r.ResizeWorkerPriority(cfg.ReplicationPriority, cfg.ReplicationMaxWorkers, cfg.ReplicationMaxLWorkers)
  152. }
  153. t.replicationPriority = cfg.ReplicationPriority
  154. t.replicationMaxWorkers = cfg.ReplicationMaxWorkers
  155. t.replicationMaxLWorkers = cfg.ReplicationMaxLWorkers
  156. // N B api.transition_workers will be deprecated
  157. if globalTransitionState != nil {
  158. globalTransitionState.UpdateWorkers(cfg.TransitionWorkers)
  159. }
  160. t.transitionWorkers = cfg.TransitionWorkers
  161. t.staleUploadsExpiry = cfg.StaleUploadsExpiry
  162. t.deleteCleanupInterval = cfg.DeleteCleanupInterval
  163. t.enableODirect = cfg.EnableODirect
  164. t.gzipObjects = cfg.GzipObjects
  165. t.rootAccess = cfg.RootAccess
  166. t.syncEvents = cfg.SyncEvents
  167. t.objectMaxVersions = cfg.ObjectMaxVersions
  168. if t.staleUploadsCleanupInterval != cfg.StaleUploadsCleanupInterval {
  169. t.staleUploadsCleanupInterval = cfg.StaleUploadsCleanupInterval
  170. // signal that cleanup interval has changed
  171. select {
  172. case staleUploadsCleanupIntervalChangedCh <- struct{}{}:
  173. default: // in case the channel is blocked...
  174. }
  175. }
  176. }
  177. func (t *apiConfig) odirectEnabled() bool {
  178. t.mu.RLock()
  179. defer t.mu.RUnlock()
  180. return t.enableODirect
  181. }
  182. func (t *apiConfig) shouldGzipObjects() bool {
  183. t.mu.RLock()
  184. defer t.mu.RUnlock()
  185. return t.gzipObjects
  186. }
  187. func (t *apiConfig) permitRootAccess() bool {
  188. t.mu.RLock()
  189. defer t.mu.RUnlock()
  190. return t.rootAccess
  191. }
  192. func (t *apiConfig) getListQuorum() string {
  193. t.mu.RLock()
  194. defer t.mu.RUnlock()
  195. if t.listQuorum == "" {
  196. return "strict"
  197. }
  198. return t.listQuorum
  199. }
  200. func (t *apiConfig) getCorsAllowOrigins() []string {
  201. t.mu.RLock()
  202. defer t.mu.RUnlock()
  203. if len(t.corsAllowOrigins) == 0 {
  204. return []string{"*"}
  205. }
  206. corsAllowOrigins := make([]string, len(t.corsAllowOrigins))
  207. copy(corsAllowOrigins, t.corsAllowOrigins)
  208. return corsAllowOrigins
  209. }
  210. func (t *apiConfig) getStaleUploadsCleanupInterval() time.Duration {
  211. t.mu.RLock()
  212. defer t.mu.RUnlock()
  213. if t.staleUploadsCleanupInterval == 0 {
  214. return 6 * time.Hour // default 6 hours
  215. }
  216. return t.staleUploadsCleanupInterval
  217. }
  218. func (t *apiConfig) getStaleUploadsExpiry() time.Duration {
  219. t.mu.RLock()
  220. defer t.mu.RUnlock()
  221. if t.staleUploadsExpiry == 0 {
  222. return 24 * time.Hour // default 24 hours
  223. }
  224. return t.staleUploadsExpiry
  225. }
  226. func (t *apiConfig) getDeleteCleanupInterval() time.Duration {
  227. t.mu.RLock()
  228. defer t.mu.RUnlock()
  229. if t.deleteCleanupInterval == 0 {
  230. return 5 * time.Minute // every 5 minutes
  231. }
  232. return t.deleteCleanupInterval
  233. }
  234. func (t *apiConfig) getClusterDeadline() time.Duration {
  235. t.mu.RLock()
  236. defer t.mu.RUnlock()
  237. if t.clusterDeadline == 0 {
  238. return 10 * time.Second
  239. }
  240. return t.clusterDeadline
  241. }
  242. func (t *apiConfig) getRequestsPoolCapacity() int {
  243. t.mu.RLock()
  244. defer t.mu.RUnlock()
  245. return cap(t.requestsPool)
  246. }
  247. func (t *apiConfig) getRequestsPool() chan struct{} {
  248. t.mu.RLock()
  249. defer t.mu.RUnlock()
  250. if t.requestsPool == nil {
  251. return nil
  252. }
  253. return t.requestsPool
  254. }
  255. // maxClients throttles the S3 API calls
  256. func maxClients(f http.HandlerFunc) http.HandlerFunc {
  257. return func(w http.ResponseWriter, r *http.Request) {
  258. globalHTTPStats.incS3RequestsIncoming()
  259. if r.Header.Get(globalObjectPerfUserMetadata) == "" {
  260. if val := globalServiceFreeze.Load(); val != nil {
  261. if unlock, ok := val.(chan struct{}); ok && unlock != nil {
  262. // Wait until unfrozen.
  263. select {
  264. case <-unlock:
  265. case <-r.Context().Done():
  266. // if client canceled we don't need to wait here forever.
  267. return
  268. }
  269. }
  270. }
  271. }
  272. globalHTTPStats.addRequestsInQueue(1)
  273. pool := globalAPIConfig.getRequestsPool()
  274. if pool == nil {
  275. globalHTTPStats.addRequestsInQueue(-1)
  276. f.ServeHTTP(w, r)
  277. return
  278. }
  279. if tc, ok := r.Context().Value(mcontext.ContextTraceKey).(*mcontext.TraceCtxt); ok {
  280. tc.FuncName = "s3.MaxClients"
  281. }
  282. w.Header().Set("X-RateLimit-Limit", strconv.Itoa(cap(pool)))
  283. w.Header().Set("X-RateLimit-Remaining", strconv.Itoa(cap(pool)-len(pool)))
  284. ctx := r.Context()
  285. select {
  286. case pool <- struct{}{}:
  287. defer func() { <-pool }()
  288. globalHTTPStats.addRequestsInQueue(-1)
  289. if contextCanceled(ctx) {
  290. w.WriteHeader(499)
  291. return
  292. }
  293. f.ServeHTTP(w, r)
  294. case <-r.Context().Done():
  295. globalHTTPStats.addRequestsInQueue(-1)
  296. // When the client disconnects before getting the S3 handler
  297. // status code response, set the status code to 499 so this request
  298. // will be properly audited and traced.
  299. w.WriteHeader(499)
  300. default:
  301. globalHTTPStats.addRequestsInQueue(-1)
  302. if contextCanceled(ctx) {
  303. w.WriteHeader(499)
  304. return
  305. }
  306. // Send a http timeout message
  307. writeErrorResponse(ctx, w,
  308. errorCodes.ToAPIErr(ErrTooManyRequests),
  309. r.URL)
  310. }
  311. }
  312. }
  313. func (t *apiConfig) getReplicationOpts() replicationPoolOpts {
  314. t.mu.RLock()
  315. defer t.mu.RUnlock()
  316. if t.replicationPriority == "" {
  317. return replicationPoolOpts{
  318. Priority: "auto",
  319. MaxWorkers: WorkerMaxLimit,
  320. MaxLWorkers: LargeWorkerCount,
  321. }
  322. }
  323. return replicationPoolOpts{
  324. Priority: t.replicationPriority,
  325. MaxWorkers: t.replicationMaxWorkers,
  326. MaxLWorkers: t.replicationMaxLWorkers,
  327. }
  328. }
  329. func (t *apiConfig) getTransitionWorkers() int {
  330. t.mu.RLock()
  331. defer t.mu.RUnlock()
  332. if t.transitionWorkers <= 0 {
  333. return runtime.GOMAXPROCS(0) / 2
  334. }
  335. return t.transitionWorkers
  336. }
  337. func (t *apiConfig) isSyncEventsEnabled() bool {
  338. t.mu.RLock()
  339. defer t.mu.RUnlock()
  340. return t.syncEvents
  341. }
  342. func (t *apiConfig) getObjectMaxVersions() int64 {
  343. t.mu.RLock()
  344. defer t.mu.RUnlock()
  345. if t.objectMaxVersions <= 0 {
  346. // defaults to 'IntMax' when unset.
  347. return math.MaxInt64
  348. }
  349. return t.objectMaxVersions
  350. }