Files
seaweedFS/weed/s3api/s3api_circuit_breaker.go
Chris Lu 848bec6d24 Metrics: Add Prometheus metrics for concurrent upload tracking (#7555)
* metrics: add Prometheus metrics for concurrent upload tracking

Add Prometheus metrics to monitor concurrent upload activity for both
filer and S3 servers. This provides visibility into the upload limiting
feature added in the previous PR.

New Metrics:
- SeaweedFS_filer_in_flight_upload_bytes: Current bytes being uploaded to filer
- SeaweedFS_filer_in_flight_upload_count: Current number of uploads to filer
- SeaweedFS_s3_in_flight_upload_bytes: Current bytes being uploaded to S3
- SeaweedFS_s3_in_flight_upload_count: Current number of uploads to S3

The metrics are updated atomically whenever uploads start or complete,
providing real-time visibility into upload concurrency levels.

This helps operators:
- Monitor upload concurrency in real-time
- Set appropriate limits based on actual usage patterns
- Detect potential bottlenecks or capacity issues
- Track the effectiveness of upload limiting configuration

* grafana: add dashboard panels for concurrent upload metrics

Add 4 new panels to the Grafana dashboard to visualize the concurrent
upload metrics added in this PR:

Filer Section:
- Filer Concurrent Uploads: Shows current number of concurrent uploads
- Filer Concurrent Upload Bytes: Shows current bytes being uploaded

S3 Gateway Section:
- S3 Concurrent Uploads: Shows current number of concurrent uploads
- S3 Concurrent Upload Bytes: Shows current bytes being uploaded

These panels help operators monitor upload concurrency in real-time and
tune the upload limiting configuration based on actual usage patterns.

* more efficient
2025-11-26 15:51:38 -08:00

240 lines
7.4 KiB
Go

package s3api
import (
"errors"
"fmt"
"net/http"
"sync"
"sync/atomic"
"github.com/gorilla/mux"
"github.com/seaweedfs/seaweedfs/weed/filer"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/s3_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
"github.com/seaweedfs/seaweedfs/weed/stats"
)
type CircuitBreaker struct {
sync.RWMutex
Enabled bool
counters map[string]*int64
limitations map[string]int64
s3a *S3ApiServer
}
func NewCircuitBreaker(option *S3ApiServerOption) *CircuitBreaker {
cb := &CircuitBreaker{
counters: make(map[string]*int64),
limitations: make(map[string]int64),
}
// Use WithOneOfGrpcFilerClients to support multiple filers with failover
err := pb.WithOneOfGrpcFilerClients(false, option.Filers, option.GrpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
content, err := filer.ReadInsideFiler(client, s3_constants.CircuitBreakerConfigDir, s3_constants.CircuitBreakerConfigFile)
if errors.Is(err, filer_pb.ErrNotFound) {
return nil
}
if err != nil {
return fmt.Errorf("read S3 circuit breaker config: %w", err)
}
return cb.LoadS3ApiConfigurationFromBytes(content)
})
if err != nil {
glog.Warningf("S3 circuit breaker disabled; failed to load config from any filer: %v", err)
}
return cb
}
func (cb *CircuitBreaker) LoadS3ApiConfigurationFromBytes(content []byte) error {
cbCfg := &s3_pb.S3CircuitBreakerConfig{}
if err := filer.ParseS3ConfigurationFromBytes(content, cbCfg); err != nil {
glog.Warningf("unmarshal error: %v", err)
return fmt.Errorf("unmarshal error: %w", err)
}
if err := cb.loadCircuitBreakerConfig(cbCfg); err != nil {
return err
}
return nil
}
func (cb *CircuitBreaker) loadCircuitBreakerConfig(cfg *s3_pb.S3CircuitBreakerConfig) error {
//global
globalEnabled := false
globalOptions := cfg.Global
limitations := make(map[string]int64)
if globalOptions != nil && globalOptions.Enabled && len(globalOptions.Actions) > 0 {
globalEnabled = globalOptions.Enabled
for action, limit := range globalOptions.Actions {
limitations[action] = limit
}
}
cb.Enabled = globalEnabled
//buckets
for bucket, cbOptions := range cfg.Buckets {
if cbOptions.Enabled {
for action, limit := range cbOptions.Actions {
limitations[s3_constants.Concat(bucket, action)] = limit
}
}
}
cb.limitations = limitations
return nil
}
func (cb *CircuitBreaker) Limit(f func(w http.ResponseWriter, r *http.Request), action string) (http.HandlerFunc, Action) {
return func(w http.ResponseWriter, r *http.Request) {
// Apply upload limiting for write actions if configured
if cb.s3a != nil && (action == s3_constants.ACTION_WRITE) &&
(cb.s3a.option.ConcurrentUploadLimit != 0 || cb.s3a.option.ConcurrentFileUploadLimit != 0) {
// Get content length, default to 0 if not provided
contentLength := r.ContentLength
if contentLength < 0 {
contentLength = 0
}
// Wait until in flight data is less than the limit
cb.s3a.inFlightDataLimitCond.L.Lock()
inFlightDataSize := atomic.LoadInt64(&cb.s3a.inFlightDataSize)
inFlightUploads := atomic.LoadInt64(&cb.s3a.inFlightUploads)
// Wait if either data size limit or file count limit is exceeded
for (cb.s3a.option.ConcurrentUploadLimit != 0 && inFlightDataSize > cb.s3a.option.ConcurrentUploadLimit) ||
(cb.s3a.option.ConcurrentFileUploadLimit != 0 && inFlightUploads >= cb.s3a.option.ConcurrentFileUploadLimit) {
if (cb.s3a.option.ConcurrentUploadLimit != 0 && inFlightDataSize > cb.s3a.option.ConcurrentUploadLimit) {
glog.V(4).Infof("wait because inflight data %d > %d", inFlightDataSize, cb.s3a.option.ConcurrentUploadLimit)
}
if (cb.s3a.option.ConcurrentFileUploadLimit != 0 && inFlightUploads >= cb.s3a.option.ConcurrentFileUploadLimit) {
glog.V(4).Infof("wait because inflight uploads %d >= %d", inFlightUploads, cb.s3a.option.ConcurrentFileUploadLimit)
}
cb.s3a.inFlightDataLimitCond.Wait()
inFlightDataSize = atomic.LoadInt64(&cb.s3a.inFlightDataSize)
inFlightUploads = atomic.LoadInt64(&cb.s3a.inFlightUploads)
}
cb.s3a.inFlightDataLimitCond.L.Unlock()
// Increment counters
newUploads := atomic.AddInt64(&cb.s3a.inFlightUploads, 1)
newSize := atomic.AddInt64(&cb.s3a.inFlightDataSize, contentLength)
// Update metrics
stats.S3InFlightUploadCountGauge.Set(float64(newUploads))
stats.S3InFlightUploadBytesGauge.Set(float64(newSize))
defer func() {
// Decrement counters
newUploads := atomic.AddInt64(&cb.s3a.inFlightUploads, -1)
newSize := atomic.AddInt64(&cb.s3a.inFlightDataSize, -contentLength)
// Update metrics
stats.S3InFlightUploadCountGauge.Set(float64(newUploads))
stats.S3InFlightUploadBytesGauge.Set(float64(newSize))
cb.s3a.inFlightDataLimitCond.Signal()
}()
}
// Apply circuit breaker logic
if !cb.Enabled {
f(w, r)
return
}
vars := mux.Vars(r)
bucket := vars["bucket"]
rollback, errCode := cb.limit(r, bucket, action)
defer func() {
for _, rf := range rollback {
rf()
}
}()
if errCode == s3err.ErrNone {
f(w, r)
return
}
s3err.WriteErrorResponse(w, r, errCode)
}, Action(action)
}
func (cb *CircuitBreaker) limit(r *http.Request, bucket string, action string) (rollback []func(), errCode s3err.ErrorCode) {
//bucket simultaneous request count
bucketCountRollBack, errCode := cb.loadCounterAndCompare(s3_constants.Concat(bucket, action, s3_constants.LimitTypeCount), 1, s3err.ErrTooManyRequest)
if bucketCountRollBack != nil {
rollback = append(rollback, bucketCountRollBack)
}
if errCode != s3err.ErrNone {
return
}
//bucket simultaneous request content bytes
bucketContentLengthRollBack, errCode := cb.loadCounterAndCompare(s3_constants.Concat(bucket, action, s3_constants.LimitTypeBytes), r.ContentLength, s3err.ErrRequestBytesExceed)
if bucketContentLengthRollBack != nil {
rollback = append(rollback, bucketContentLengthRollBack)
}
if errCode != s3err.ErrNone {
return
}
//global simultaneous request count
globalCountRollBack, errCode := cb.loadCounterAndCompare(s3_constants.Concat(action, s3_constants.LimitTypeCount), 1, s3err.ErrTooManyRequest)
if globalCountRollBack != nil {
rollback = append(rollback, globalCountRollBack)
}
if errCode != s3err.ErrNone {
return
}
//global simultaneous request content bytes
globalContentLengthRollBack, errCode := cb.loadCounterAndCompare(s3_constants.Concat(action, s3_constants.LimitTypeBytes), r.ContentLength, s3err.ErrRequestBytesExceed)
if globalContentLengthRollBack != nil {
rollback = append(rollback, globalContentLengthRollBack)
}
if errCode != s3err.ErrNone {
return
}
return
}
func (cb *CircuitBreaker) loadCounterAndCompare(key string, inc int64, errCode s3err.ErrorCode) (f func(), e s3err.ErrorCode) {
e = s3err.ErrNone
if max, ok := cb.limitations[key]; ok {
cb.RLock()
counter, exists := cb.counters[key]
cb.RUnlock()
if !exists {
cb.Lock()
counter, exists = cb.counters[key]
if !exists {
var newCounter int64
counter = &newCounter
cb.counters[key] = counter
}
cb.Unlock()
}
current := atomic.LoadInt64(counter)
if current+inc > max {
e = errCode
return
} else {
current := atomic.AddInt64(counter, inc)
f = func() {
atomic.AddInt64(counter, -inc)
}
if current > max {
e = errCode
return
}
}
}
return
}