* metrics: add Prometheus metrics for concurrent upload tracking Add Prometheus metrics to monitor concurrent upload activity for both filer and S3 servers. This provides visibility into the upload limiting feature added in the previous PR. New Metrics: - SeaweedFS_filer_in_flight_upload_bytes: Current bytes being uploaded to filer - SeaweedFS_filer_in_flight_upload_count: Current number of uploads to filer - SeaweedFS_s3_in_flight_upload_bytes: Current bytes being uploaded to S3 - SeaweedFS_s3_in_flight_upload_count: Current number of uploads to S3 The metrics are updated atomically whenever uploads start or complete, providing real-time visibility into upload concurrency levels. This helps operators: - Monitor upload concurrency in real-time - Set appropriate limits based on actual usage patterns - Detect potential bottlenecks or capacity issues - Track the effectiveness of upload limiting configuration * grafana: add dashboard panels for concurrent upload metrics Add 4 new panels to the Grafana dashboard to visualize the concurrent upload metrics added in this PR: Filer Section: - Filer Concurrent Uploads: Shows current number of concurrent uploads - Filer Concurrent Upload Bytes: Shows current bytes being uploaded S3 Gateway Section: - S3 Concurrent Uploads: Shows current number of concurrent uploads - S3 Concurrent Upload Bytes: Shows current bytes being uploaded These panels help operators monitor upload concurrency in real-time and tune the upload limiting configuration based on actual usage patterns. * more efficient
240 lines
7.4 KiB
Go
240 lines
7.4 KiB
Go
package s3api
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"net/http"
|
|
"sync"
|
|
"sync/atomic"
|
|
|
|
"github.com/gorilla/mux"
|
|
"github.com/seaweedfs/seaweedfs/weed/filer"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/s3_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
|
|
"github.com/seaweedfs/seaweedfs/weed/stats"
|
|
)
|
|
|
|
type CircuitBreaker struct {
|
|
sync.RWMutex
|
|
Enabled bool
|
|
counters map[string]*int64
|
|
limitations map[string]int64
|
|
s3a *S3ApiServer
|
|
}
|
|
|
|
func NewCircuitBreaker(option *S3ApiServerOption) *CircuitBreaker {
|
|
cb := &CircuitBreaker{
|
|
counters: make(map[string]*int64),
|
|
limitations: make(map[string]int64),
|
|
}
|
|
|
|
// Use WithOneOfGrpcFilerClients to support multiple filers with failover
|
|
err := pb.WithOneOfGrpcFilerClients(false, option.Filers, option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
|
|
content, err := filer.ReadInsideFiler(client, s3_constants.CircuitBreakerConfigDir, s3_constants.CircuitBreakerConfigFile)
|
|
if errors.Is(err, filer_pb.ErrNotFound) {
|
|
return nil
|
|
}
|
|
if err != nil {
|
|
return fmt.Errorf("read S3 circuit breaker config: %w", err)
|
|
}
|
|
return cb.LoadS3ApiConfigurationFromBytes(content)
|
|
})
|
|
|
|
if err != nil {
|
|
glog.Warningf("S3 circuit breaker disabled; failed to load config from any filer: %v", err)
|
|
}
|
|
|
|
return cb
|
|
}
|
|
|
|
func (cb *CircuitBreaker) LoadS3ApiConfigurationFromBytes(content []byte) error {
|
|
cbCfg := &s3_pb.S3CircuitBreakerConfig{}
|
|
if err := filer.ParseS3ConfigurationFromBytes(content, cbCfg); err != nil {
|
|
glog.Warningf("unmarshal error: %v", err)
|
|
return fmt.Errorf("unmarshal error: %w", err)
|
|
}
|
|
if err := cb.loadCircuitBreakerConfig(cbCfg); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (cb *CircuitBreaker) loadCircuitBreakerConfig(cfg *s3_pb.S3CircuitBreakerConfig) error {
|
|
|
|
//global
|
|
globalEnabled := false
|
|
globalOptions := cfg.Global
|
|
limitations := make(map[string]int64)
|
|
if globalOptions != nil && globalOptions.Enabled && len(globalOptions.Actions) > 0 {
|
|
globalEnabled = globalOptions.Enabled
|
|
for action, limit := range globalOptions.Actions {
|
|
limitations[action] = limit
|
|
}
|
|
}
|
|
cb.Enabled = globalEnabled
|
|
|
|
//buckets
|
|
for bucket, cbOptions := range cfg.Buckets {
|
|
if cbOptions.Enabled {
|
|
for action, limit := range cbOptions.Actions {
|
|
limitations[s3_constants.Concat(bucket, action)] = limit
|
|
}
|
|
}
|
|
}
|
|
|
|
cb.limitations = limitations
|
|
return nil
|
|
}
|
|
|
|
func (cb *CircuitBreaker) Limit(f func(w http.ResponseWriter, r *http.Request), action string) (http.HandlerFunc, Action) {
|
|
return func(w http.ResponseWriter, r *http.Request) {
|
|
// Apply upload limiting for write actions if configured
|
|
if cb.s3a != nil && (action == s3_constants.ACTION_WRITE) &&
|
|
(cb.s3a.option.ConcurrentUploadLimit != 0 || cb.s3a.option.ConcurrentFileUploadLimit != 0) {
|
|
|
|
// Get content length, default to 0 if not provided
|
|
contentLength := r.ContentLength
|
|
if contentLength < 0 {
|
|
contentLength = 0
|
|
}
|
|
|
|
// Wait until in flight data is less than the limit
|
|
cb.s3a.inFlightDataLimitCond.L.Lock()
|
|
inFlightDataSize := atomic.LoadInt64(&cb.s3a.inFlightDataSize)
|
|
inFlightUploads := atomic.LoadInt64(&cb.s3a.inFlightUploads)
|
|
|
|
// Wait if either data size limit or file count limit is exceeded
|
|
for (cb.s3a.option.ConcurrentUploadLimit != 0 && inFlightDataSize > cb.s3a.option.ConcurrentUploadLimit) ||
|
|
(cb.s3a.option.ConcurrentFileUploadLimit != 0 && inFlightUploads >= cb.s3a.option.ConcurrentFileUploadLimit) {
|
|
if (cb.s3a.option.ConcurrentUploadLimit != 0 && inFlightDataSize > cb.s3a.option.ConcurrentUploadLimit) {
|
|
glog.V(4).Infof("wait because inflight data %d > %d", inFlightDataSize, cb.s3a.option.ConcurrentUploadLimit)
|
|
}
|
|
if (cb.s3a.option.ConcurrentFileUploadLimit != 0 && inFlightUploads >= cb.s3a.option.ConcurrentFileUploadLimit) {
|
|
glog.V(4).Infof("wait because inflight uploads %d >= %d", inFlightUploads, cb.s3a.option.ConcurrentFileUploadLimit)
|
|
}
|
|
cb.s3a.inFlightDataLimitCond.Wait()
|
|
inFlightDataSize = atomic.LoadInt64(&cb.s3a.inFlightDataSize)
|
|
inFlightUploads = atomic.LoadInt64(&cb.s3a.inFlightUploads)
|
|
}
|
|
cb.s3a.inFlightDataLimitCond.L.Unlock()
|
|
|
|
// Increment counters
|
|
newUploads := atomic.AddInt64(&cb.s3a.inFlightUploads, 1)
|
|
newSize := atomic.AddInt64(&cb.s3a.inFlightDataSize, contentLength)
|
|
// Update metrics
|
|
stats.S3InFlightUploadCountGauge.Set(float64(newUploads))
|
|
stats.S3InFlightUploadBytesGauge.Set(float64(newSize))
|
|
defer func() {
|
|
// Decrement counters
|
|
newUploads := atomic.AddInt64(&cb.s3a.inFlightUploads, -1)
|
|
newSize := atomic.AddInt64(&cb.s3a.inFlightDataSize, -contentLength)
|
|
// Update metrics
|
|
stats.S3InFlightUploadCountGauge.Set(float64(newUploads))
|
|
stats.S3InFlightUploadBytesGauge.Set(float64(newSize))
|
|
cb.s3a.inFlightDataLimitCond.Signal()
|
|
}()
|
|
}
|
|
|
|
// Apply circuit breaker logic
|
|
if !cb.Enabled {
|
|
f(w, r)
|
|
return
|
|
}
|
|
|
|
vars := mux.Vars(r)
|
|
bucket := vars["bucket"]
|
|
|
|
rollback, errCode := cb.limit(r, bucket, action)
|
|
defer func() {
|
|
for _, rf := range rollback {
|
|
rf()
|
|
}
|
|
}()
|
|
|
|
if errCode == s3err.ErrNone {
|
|
f(w, r)
|
|
return
|
|
}
|
|
s3err.WriteErrorResponse(w, r, errCode)
|
|
}, Action(action)
|
|
}
|
|
|
|
func (cb *CircuitBreaker) limit(r *http.Request, bucket string, action string) (rollback []func(), errCode s3err.ErrorCode) {
|
|
|
|
//bucket simultaneous request count
|
|
bucketCountRollBack, errCode := cb.loadCounterAndCompare(s3_constants.Concat(bucket, action, s3_constants.LimitTypeCount), 1, s3err.ErrTooManyRequest)
|
|
if bucketCountRollBack != nil {
|
|
rollback = append(rollback, bucketCountRollBack)
|
|
}
|
|
if errCode != s3err.ErrNone {
|
|
return
|
|
}
|
|
|
|
//bucket simultaneous request content bytes
|
|
bucketContentLengthRollBack, errCode := cb.loadCounterAndCompare(s3_constants.Concat(bucket, action, s3_constants.LimitTypeBytes), r.ContentLength, s3err.ErrRequestBytesExceed)
|
|
if bucketContentLengthRollBack != nil {
|
|
rollback = append(rollback, bucketContentLengthRollBack)
|
|
}
|
|
if errCode != s3err.ErrNone {
|
|
return
|
|
}
|
|
|
|
//global simultaneous request count
|
|
globalCountRollBack, errCode := cb.loadCounterAndCompare(s3_constants.Concat(action, s3_constants.LimitTypeCount), 1, s3err.ErrTooManyRequest)
|
|
if globalCountRollBack != nil {
|
|
rollback = append(rollback, globalCountRollBack)
|
|
}
|
|
if errCode != s3err.ErrNone {
|
|
return
|
|
}
|
|
|
|
//global simultaneous request content bytes
|
|
globalContentLengthRollBack, errCode := cb.loadCounterAndCompare(s3_constants.Concat(action, s3_constants.LimitTypeBytes), r.ContentLength, s3err.ErrRequestBytesExceed)
|
|
if globalContentLengthRollBack != nil {
|
|
rollback = append(rollback, globalContentLengthRollBack)
|
|
}
|
|
if errCode != s3err.ErrNone {
|
|
return
|
|
}
|
|
return
|
|
}
|
|
|
|
func (cb *CircuitBreaker) loadCounterAndCompare(key string, inc int64, errCode s3err.ErrorCode) (f func(), e s3err.ErrorCode) {
|
|
e = s3err.ErrNone
|
|
if max, ok := cb.limitations[key]; ok {
|
|
cb.RLock()
|
|
counter, exists := cb.counters[key]
|
|
cb.RUnlock()
|
|
|
|
if !exists {
|
|
cb.Lock()
|
|
counter, exists = cb.counters[key]
|
|
if !exists {
|
|
var newCounter int64
|
|
counter = &newCounter
|
|
cb.counters[key] = counter
|
|
}
|
|
cb.Unlock()
|
|
}
|
|
current := atomic.LoadInt64(counter)
|
|
if current+inc > max {
|
|
e = errCode
|
|
return
|
|
} else {
|
|
current := atomic.AddInt64(counter, inc)
|
|
f = func() {
|
|
atomic.AddInt64(counter, -inc)
|
|
}
|
|
if current > max {
|
|
e = errCode
|
|
return
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|