* feat: add S3 bucket size and object count metrics Adds periodic collection of bucket size metrics: - SeaweedFS_s3_bucket_size_bytes: logical size (deduplicated across replicas) - SeaweedFS_s3_bucket_physical_size_bytes: physical size (including replicas) - SeaweedFS_s3_bucket_object_count: object count (deduplicated) Collection runs every 1 minute via background goroutine that queries filer Statistics RPC for each bucket's collection. Also adds Grafana dashboard panels for: - S3 Bucket Size (logical vs physical) - S3 Bucket Object Count * address PR comments: fix bucket size metrics collection 1. Fix collectCollectionInfoFromMaster to use master VolumeList API - Now properly queries master for topology info - Uses WithMasterClient to get volume list from master - Correctly calculates logical vs physical size based on replication 2. Return error when filerClient is nil to trigger fallback - Changed from 'return nil, nil' to 'return nil, error' - Ensures fallback to filer stats is properly triggered 3. Implement pagination in listBucketNames - Added listBucketPageSize constant (1000) - Uses StartFromFileName for pagination - Continues fetching until fewer entries than limit returned 4. Handle NewReplicaPlacementFromByte error and prevent division by zero - Check error return from NewReplicaPlacementFromByte - Default to 1 copy if error occurs - Add explicit check for copyCount == 0 * simplify bucket size metrics: remove filer fallback, align with quota enforcement - Remove fallback to filer Statistics RPC - Use only master topology for collection info (same as s3.bucket.quota.enforce) - Updated comments to clarify this runs the same collection logic as quota enforcement - Simplified code by removing collectBucketSizeFromFilerStats * use s3a.option.Masters directly instead of querying filer * address PR comments: fix dashboard overlaps and improve metrics collection Grafana dashboard fixes: - Fix overlapping panels 55 and 59 in grafana_seaweedfs.json (moved 59 to y=30) - Fix grid collision in k8s dashboard (moved panel 72 to y=48) - Aggregate bucket metrics with max() by (bucket) for multi-instance S3 gateways Go code improvements: - Add graceful shutdown support via context cancellation - Use ticker instead of time.Sleep for better shutdown responsiveness - Distinguish EOF from actual errors in stream handling * improve bucket size metrics: multi-master failover and proper error handling - Initial delay now respects context cancellation using select with time.After - Use WithOneOfGrpcMasterClients for multi-master failover instead of hardcoding Masters[0] - Properly propagate stream errors instead of just logging them (EOF vs real errors) * improve bucket size metrics: distributed lock and volume ID deduplication - Add distributed lock (LiveLock) so only one S3 instance collects metrics at a time - Add IsLocked() method to LiveLock for checking lock status - Fix deduplication: use volume ID tracking instead of dividing by copyCount - Previous approach gave wrong results if replicas were missing - Now tracks seen volume IDs and counts each volume only once - Physical size still includes all replicas for accurate disk usage reporting * rename lock to s3.leader * simplify: remove StartBucketSizeMetricsCollection wrapper function * fix data race: use atomic operations for LiveLock.isLocked field - Change isLocked from bool to int32 - Use atomic.LoadInt32/StoreInt32 for all reads/writes - Sync shared isLocked field in StartLongLivedLock goroutine * add nil check for topology info to prevent panic * fix bucket metrics: use Ticker for consistent intervals, fix pagination logic - Use time.Ticker instead of time.After for consistent interval execution - Fix pagination: count all entries (not just directories) for proper termination - Update lastFileName for all entries to prevent pagination issues * address PR comments: remove redundant atomic store, propagate context - Remove redundant atomic.StoreInt32 in StartLongLivedLock (AttemptToLock already sets it) - Propagate context through metrics collection for proper cancellation on shutdown - collectAndUpdateBucketSizeMetrics now accepts ctx - collectCollectionInfoFromMaster uses ctx for VolumeList RPC - listBucketNames uses ctx for ListEntries RPC
239 lines
7.4 KiB
Go
239 lines
7.4 KiB
Go
package cluster
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"sync/atomic"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/cluster/lock_manager"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
"google.golang.org/grpc"
|
|
)
|
|
|
|
type LockClient struct {
|
|
grpcDialOption grpc.DialOption
|
|
maxLockDuration time.Duration
|
|
sleepDuration time.Duration
|
|
seedFiler pb.ServerAddress
|
|
}
|
|
|
|
func NewLockClient(grpcDialOption grpc.DialOption, seedFiler pb.ServerAddress) *LockClient {
|
|
return &LockClient{
|
|
grpcDialOption: grpcDialOption,
|
|
maxLockDuration: 5 * time.Second,
|
|
sleepDuration: 2473 * time.Millisecond,
|
|
seedFiler: seedFiler,
|
|
}
|
|
}
|
|
|
|
type LiveLock struct {
|
|
key string
|
|
renewToken string
|
|
expireAtNs int64
|
|
hostFiler pb.ServerAddress
|
|
cancelCh chan struct{}
|
|
grpcDialOption grpc.DialOption
|
|
isLocked int32 // 0 = unlocked, 1 = locked; use atomic operations
|
|
self string
|
|
lc *LockClient
|
|
owner string
|
|
}
|
|
|
|
// NewShortLivedLock creates a lock with a 5-second duration
|
|
func (lc *LockClient) NewShortLivedLock(key string, owner string) (lock *LiveLock) {
|
|
lock = &LiveLock{
|
|
key: key,
|
|
hostFiler: lc.seedFiler,
|
|
cancelCh: make(chan struct{}),
|
|
expireAtNs: time.Now().Add(5 * time.Second).UnixNano(),
|
|
grpcDialOption: lc.grpcDialOption,
|
|
self: owner,
|
|
lc: lc,
|
|
}
|
|
lock.retryUntilLocked(5 * time.Second)
|
|
return
|
|
}
|
|
|
|
// StartLongLivedLock starts a goroutine to lock the key and returns immediately.
|
|
func (lc *LockClient) StartLongLivedLock(key string, owner string, onLockOwnerChange func(newLockOwner string)) (lock *LiveLock) {
|
|
lock = &LiveLock{
|
|
key: key,
|
|
hostFiler: lc.seedFiler,
|
|
cancelCh: make(chan struct{}),
|
|
expireAtNs: time.Now().Add(lock_manager.LiveLockTTL).UnixNano(),
|
|
grpcDialOption: lc.grpcDialOption,
|
|
self: owner,
|
|
lc: lc,
|
|
}
|
|
go func() {
|
|
isLocked := false
|
|
lockOwner := ""
|
|
for {
|
|
// Check for cancellation BEFORE attempting to lock to avoid race condition
|
|
// where Stop() is called after sleep but before lock attempt
|
|
select {
|
|
case <-lock.cancelCh:
|
|
return
|
|
default:
|
|
}
|
|
|
|
if isLocked {
|
|
if err := lock.AttemptToLock(lock_manager.LiveLockTTL); err != nil {
|
|
glog.V(0).Infof("Lost lock %s: %v", key, err)
|
|
isLocked = false
|
|
atomic.StoreInt32(&lock.isLocked, 0)
|
|
}
|
|
} else {
|
|
if err := lock.AttemptToLock(lock_manager.LiveLockTTL); err == nil {
|
|
isLocked = true
|
|
// Note: AttemptToLock already sets lock.isLocked atomically on success
|
|
}
|
|
}
|
|
if lockOwner != lock.LockOwner() && lock.LockOwner() != "" {
|
|
glog.V(0).Infof("Lock owner changed from %s to %s", lockOwner, lock.LockOwner())
|
|
onLockOwnerChange(lock.LockOwner())
|
|
lockOwner = lock.LockOwner()
|
|
}
|
|
select {
|
|
case <-lock.cancelCh:
|
|
return
|
|
default:
|
|
time.Sleep(lock_manager.RenewInterval)
|
|
}
|
|
}
|
|
}()
|
|
return
|
|
}
|
|
|
|
func (lock *LiveLock) retryUntilLocked(lockDuration time.Duration) {
|
|
util.RetryUntil("create lock:"+lock.key, func() error {
|
|
return lock.AttemptToLock(lockDuration)
|
|
}, func(err error) (shouldContinue bool) {
|
|
if err != nil {
|
|
glog.Warningf("create lock %s: %s", lock.key, err)
|
|
}
|
|
return lock.renewToken == ""
|
|
})
|
|
}
|
|
|
|
func (lock *LiveLock) AttemptToLock(lockDuration time.Duration) error {
|
|
glog.V(4).Infof("LOCK: AttemptToLock key=%s owner=%s", lock.key, lock.self)
|
|
errorMessage, err := lock.doLock(lockDuration)
|
|
if err != nil {
|
|
glog.V(1).Infof("LOCK: doLock failed for key=%s: %v", lock.key, err)
|
|
time.Sleep(time.Second)
|
|
return err
|
|
}
|
|
if errorMessage != "" {
|
|
glog.V(1).Infof("LOCK: doLock returned error message for key=%s: %s", lock.key, errorMessage)
|
|
time.Sleep(time.Second)
|
|
return fmt.Errorf("%v", errorMessage)
|
|
}
|
|
if atomic.LoadInt32(&lock.isLocked) == 0 {
|
|
// Only log when transitioning from unlocked to locked
|
|
glog.V(1).Infof("LOCK: Successfully acquired key=%s owner=%s", lock.key, lock.self)
|
|
}
|
|
atomic.StoreInt32(&lock.isLocked, 1)
|
|
return nil
|
|
}
|
|
|
|
func (lock *LiveLock) StopShortLivedLock() error {
|
|
if atomic.LoadInt32(&lock.isLocked) == 0 {
|
|
return nil
|
|
}
|
|
defer func() {
|
|
atomic.StoreInt32(&lock.isLocked, 0)
|
|
}()
|
|
return pb.WithFilerClient(false, 0, lock.hostFiler, lock.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
|
|
_, err := client.DistributedUnlock(context.Background(), &filer_pb.UnlockRequest{
|
|
Name: lock.key,
|
|
RenewToken: lock.renewToken,
|
|
})
|
|
return err
|
|
})
|
|
}
|
|
|
|
// Stop stops a long-lived lock by closing the cancel channel and releasing the lock
|
|
func (lock *LiveLock) Stop() error {
|
|
// Close the cancel channel to stop the long-lived lock goroutine
|
|
select {
|
|
case <-lock.cancelCh:
|
|
// Already closed
|
|
default:
|
|
close(lock.cancelCh)
|
|
}
|
|
|
|
// Wait a brief moment for the goroutine to see the closed channel
|
|
// This reduces the race condition window where the goroutine might
|
|
// attempt one more lock operation after we've released the lock
|
|
time.Sleep(10 * time.Millisecond)
|
|
|
|
// Also release the lock if held
|
|
// Note: We intentionally don't clear renewToken here because
|
|
// StopShortLivedLock needs it to properly unlock
|
|
return lock.StopShortLivedLock()
|
|
}
|
|
|
|
func (lock *LiveLock) doLock(lockDuration time.Duration) (errorMessage string, err error) {
|
|
glog.V(4).Infof("LOCK: doLock calling DistributedLock - key=%s filer=%s owner=%s",
|
|
lock.key, lock.hostFiler, lock.self)
|
|
|
|
previousHostFiler := lock.hostFiler
|
|
previousOwner := lock.owner
|
|
|
|
err = pb.WithFilerClient(false, 0, lock.hostFiler, lock.grpcDialOption, func(client filer_pb.SeaweedFilerClient) error {
|
|
resp, err := client.DistributedLock(context.Background(), &filer_pb.LockRequest{
|
|
Name: lock.key,
|
|
SecondsToLock: int64(lockDuration.Seconds()),
|
|
RenewToken: lock.renewToken,
|
|
IsMoved: false,
|
|
Owner: lock.self,
|
|
})
|
|
glog.V(4).Infof("LOCK: DistributedLock response - key=%s err=%v", lock.key, err)
|
|
if err == nil && resp != nil {
|
|
lock.renewToken = resp.RenewToken
|
|
glog.V(4).Infof("LOCK: Got renewToken for key=%s", lock.key)
|
|
} else {
|
|
//this can be retried. Need to remember the last valid renewToken
|
|
lock.renewToken = ""
|
|
glog.V(1).Infof("LOCK: Cleared renewToken for key=%s (err=%v)", lock.key, err)
|
|
}
|
|
if resp != nil {
|
|
errorMessage = resp.Error
|
|
if resp.LockHostMovedTo != "" && resp.LockHostMovedTo != string(previousHostFiler) {
|
|
// Only log if the host actually changed
|
|
glog.V(1).Infof("LOCK: Host changed from %s to %s for key=%s", previousHostFiler, resp.LockHostMovedTo, lock.key)
|
|
lock.hostFiler = pb.ServerAddress(resp.LockHostMovedTo)
|
|
lock.lc.seedFiler = lock.hostFiler
|
|
} else if resp.LockHostMovedTo != "" {
|
|
lock.hostFiler = pb.ServerAddress(resp.LockHostMovedTo)
|
|
}
|
|
if resp.LockOwner != "" && resp.LockOwner != previousOwner {
|
|
// Only log if the owner actually changed
|
|
glog.V(1).Infof("LOCK: Owner changed from %s to %s for key=%s", previousOwner, resp.LockOwner, lock.key)
|
|
lock.owner = resp.LockOwner
|
|
} else if resp.LockOwner != "" {
|
|
lock.owner = resp.LockOwner
|
|
} else if previousOwner != "" {
|
|
glog.V(1).Infof("LOCK: Owner cleared for key=%s", lock.key)
|
|
lock.owner = ""
|
|
}
|
|
}
|
|
return err
|
|
})
|
|
return
|
|
}
|
|
|
|
func (lock *LiveLock) LockOwner() string {
|
|
return lock.owner
|
|
}
|
|
|
|
// IsLocked returns true if this instance currently holds the lock
|
|
func (lock *LiveLock) IsLocked() bool {
|
|
return atomic.LoadInt32(&lock.isLocked) == 1
|
|
}
|