fix: ARM v7 alignment issue for 64-bit atomic operations (#7652)
Fixes #7643 Reordered filerHealth struct fields to ensure int64 field comes first, guaranteeing 8-byte alignment required for atomic operations on 32-bit ARM architectures (ARMv7, as used in OpenWRT).
This commit is contained in:
@@ -29,8 +29,8 @@ const (
|
|||||||
|
|
||||||
// filerHealth tracks the health status of a filer
|
// filerHealth tracks the health status of a filer
|
||||||
type filerHealth struct {
|
type filerHealth struct {
|
||||||
failureCount int32 // atomic: consecutive failures
|
|
||||||
lastFailureTimeNs int64 // atomic: last failure time in Unix nanoseconds
|
lastFailureTimeNs int64 // atomic: last failure time in Unix nanoseconds
|
||||||
|
failureCount int32 // atomic: consecutive failures
|
||||||
}
|
}
|
||||||
|
|
||||||
// FilerClient provides volume location services by querying a filer
|
// FilerClient provides volume location services by querying a filer
|
||||||
@@ -54,7 +54,7 @@ type FilerClient struct {
|
|||||||
maxRetries int // Retry: maximum retry attempts for transient failures
|
maxRetries int // Retry: maximum retry attempts for transient failures
|
||||||
initialRetryWait time.Duration // Retry: initial wait time before first retry
|
initialRetryWait time.Duration // Retry: initial wait time before first retry
|
||||||
retryBackoffFactor float64 // Retry: backoff multiplier for wait time
|
retryBackoffFactor float64 // Retry: backoff multiplier for wait time
|
||||||
|
|
||||||
// Filer discovery fields
|
// Filer discovery fields
|
||||||
masterClient *MasterClient // Optional: for discovering filers in the same group
|
masterClient *MasterClient // Optional: for discovering filers in the same group
|
||||||
filerGroup string // Optional: filer group for discovery
|
filerGroup string // Optional: filer group for discovery
|
||||||
@@ -79,7 +79,7 @@ type FilerClientOption struct {
|
|||||||
MaxRetries int // Retry: maximum retry attempts for transient failures (0 = use default of 3)
|
MaxRetries int // Retry: maximum retry attempts for transient failures (0 = use default of 3)
|
||||||
InitialRetryWait time.Duration // Retry: initial wait time before first retry (0 = use default of 1s)
|
InitialRetryWait time.Duration // Retry: initial wait time before first retry (0 = use default of 1s)
|
||||||
RetryBackoffFactor float64 // Retry: backoff multiplier for wait time (0 = use default of 1.5)
|
RetryBackoffFactor float64 // Retry: backoff multiplier for wait time (0 = use default of 1.5)
|
||||||
|
|
||||||
// Filer discovery options
|
// Filer discovery options
|
||||||
MasterClient *MasterClient // Optional: enables filer discovery from master
|
MasterClient *MasterClient // Optional: enables filer discovery from master
|
||||||
FilerGroup string // Optional: filer group name for discovery (required if MasterClient is set)
|
FilerGroup string // Optional: filer group name for discovery (required if MasterClient is set)
|
||||||
@@ -192,17 +192,17 @@ func NewFilerClient(filerAddresses []pb.ServerAddress, grpcDialOption grpc.DialO
|
|||||||
func (fc *FilerClient) GetCurrentFiler() pb.ServerAddress {
|
func (fc *FilerClient) GetCurrentFiler() pb.ServerAddress {
|
||||||
fc.filerAddressesMu.RLock()
|
fc.filerAddressesMu.RLock()
|
||||||
defer fc.filerAddressesMu.RUnlock()
|
defer fc.filerAddressesMu.RUnlock()
|
||||||
|
|
||||||
if len(fc.filerAddresses) == 0 {
|
if len(fc.filerAddresses) == 0 {
|
||||||
return ""
|
return ""
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get current index (atomically updated on successful operations)
|
// Get current index (atomically updated on successful operations)
|
||||||
index := atomic.LoadInt32(&fc.filerIndex)
|
index := atomic.LoadInt32(&fc.filerIndex)
|
||||||
if index >= int32(len(fc.filerAddresses)) {
|
if index >= int32(len(fc.filerAddresses)) {
|
||||||
index = 0
|
index = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
return fc.filerAddresses[index]
|
return fc.filerAddresses[index]
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -211,7 +211,7 @@ func (fc *FilerClient) GetCurrentFiler() pb.ServerAddress {
|
|||||||
func (fc *FilerClient) GetAllFilers() []pb.ServerAddress {
|
func (fc *FilerClient) GetAllFilers() []pb.ServerAddress {
|
||||||
fc.filerAddressesMu.RLock()
|
fc.filerAddressesMu.RLock()
|
||||||
defer fc.filerAddressesMu.RUnlock()
|
defer fc.filerAddressesMu.RUnlock()
|
||||||
|
|
||||||
// Return a copy to avoid concurrent modification
|
// Return a copy to avoid concurrent modification
|
||||||
filers := make([]pb.ServerAddress, len(fc.filerAddresses))
|
filers := make([]pb.ServerAddress, len(fc.filerAddresses))
|
||||||
copy(filers, fc.filerAddresses)
|
copy(filers, fc.filerAddresses)
|
||||||
@@ -223,7 +223,7 @@ func (fc *FilerClient) GetAllFilers() []pb.ServerAddress {
|
|||||||
func (fc *FilerClient) SetCurrentFiler(addr pb.ServerAddress) {
|
func (fc *FilerClient) SetCurrentFiler(addr pb.ServerAddress) {
|
||||||
fc.filerAddressesMu.RLock()
|
fc.filerAddressesMu.RLock()
|
||||||
defer fc.filerAddressesMu.RUnlock()
|
defer fc.filerAddressesMu.RUnlock()
|
||||||
|
|
||||||
// Find the index of the specified filer address
|
// Find the index of the specified filer address
|
||||||
for i, filer := range fc.filerAddresses {
|
for i, filer := range fc.filerAddresses {
|
||||||
if filer == addr {
|
if filer == addr {
|
||||||
@@ -239,7 +239,7 @@ func (fc *FilerClient) SetCurrentFiler(addr pb.ServerAddress) {
|
|||||||
func (fc *FilerClient) ShouldSkipUnhealthyFiler(addr pb.ServerAddress) bool {
|
func (fc *FilerClient) ShouldSkipUnhealthyFiler(addr pb.ServerAddress) bool {
|
||||||
fc.filerAddressesMu.RLock()
|
fc.filerAddressesMu.RLock()
|
||||||
defer fc.filerAddressesMu.RUnlock()
|
defer fc.filerAddressesMu.RUnlock()
|
||||||
|
|
||||||
// Find the health for this filer address
|
// Find the health for this filer address
|
||||||
for i, filer := range fc.filerAddresses {
|
for i, filer := range fc.filerAddresses {
|
||||||
if filer == addr {
|
if filer == addr {
|
||||||
@@ -257,7 +257,7 @@ func (fc *FilerClient) ShouldSkipUnhealthyFiler(addr pb.ServerAddress) bool {
|
|||||||
func (fc *FilerClient) RecordFilerSuccess(addr pb.ServerAddress) {
|
func (fc *FilerClient) RecordFilerSuccess(addr pb.ServerAddress) {
|
||||||
fc.filerAddressesMu.RLock()
|
fc.filerAddressesMu.RLock()
|
||||||
defer fc.filerAddressesMu.RUnlock()
|
defer fc.filerAddressesMu.RUnlock()
|
||||||
|
|
||||||
// Find the health for this filer address
|
// Find the health for this filer address
|
||||||
for i, filer := range fc.filerAddresses {
|
for i, filer := range fc.filerAddresses {
|
||||||
if filer == addr {
|
if filer == addr {
|
||||||
@@ -273,7 +273,7 @@ func (fc *FilerClient) RecordFilerSuccess(addr pb.ServerAddress) {
|
|||||||
func (fc *FilerClient) RecordFilerFailure(addr pb.ServerAddress) {
|
func (fc *FilerClient) RecordFilerFailure(addr pb.ServerAddress) {
|
||||||
fc.filerAddressesMu.RLock()
|
fc.filerAddressesMu.RLock()
|
||||||
defer fc.filerAddressesMu.RUnlock()
|
defer fc.filerAddressesMu.RUnlock()
|
||||||
|
|
||||||
// Find the health for this filer address
|
// Find the health for this filer address
|
||||||
for i, filer := range fc.filerAddresses {
|
for i, filer := range fc.filerAddresses {
|
||||||
if filer == addr {
|
if filer == addr {
|
||||||
@@ -303,13 +303,13 @@ func (fc *FilerClient) discoverFilers() {
|
|||||||
glog.Errorf("FilerClient: panic in filer discovery goroutine for group '%s': %v", fc.filerGroup, r)
|
glog.Errorf("FilerClient: panic in filer discovery goroutine for group '%s': %v", fc.filerGroup, r)
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Do an initial discovery
|
// Do an initial discovery
|
||||||
fc.refreshFilerList()
|
fc.refreshFilerList()
|
||||||
|
|
||||||
ticker := time.NewTicker(fc.discoveryInterval)
|
ticker := time.NewTicker(fc.discoveryInterval)
|
||||||
defer ticker.Stop()
|
defer ticker.Stop()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ticker.C:
|
case <-ticker.C:
|
||||||
@@ -326,22 +326,22 @@ func (fc *FilerClient) refreshFilerList() {
|
|||||||
if fc.masterClient == nil {
|
if fc.masterClient == nil {
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get current master address
|
// Get current master address
|
||||||
currentMaster := fc.masterClient.GetMaster(context.Background())
|
currentMaster := fc.masterClient.GetMaster(context.Background())
|
||||||
if currentMaster == "" {
|
if currentMaster == "" {
|
||||||
glog.V(1).Infof("FilerClient: no master available for filer discovery")
|
glog.V(1).Infof("FilerClient: no master available for filer discovery")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Query master for filers in our group
|
// Query master for filers in our group
|
||||||
updates := cluster.ListExistingPeerUpdates(currentMaster, fc.grpcDialOption, fc.filerGroup, cluster.FilerType)
|
updates := cluster.ListExistingPeerUpdates(currentMaster, fc.grpcDialOption, fc.filerGroup, cluster.FilerType)
|
||||||
|
|
||||||
if len(updates) == 0 {
|
if len(updates) == 0 {
|
||||||
glog.V(2).Infof("FilerClient: no filers found in group '%s'", fc.filerGroup)
|
glog.V(2).Infof("FilerClient: no filers found in group '%s'", fc.filerGroup)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Build new filer address list
|
// Build new filer address list
|
||||||
discoveredFilers := make(map[pb.ServerAddress]bool)
|
discoveredFilers := make(map[pb.ServerAddress]bool)
|
||||||
for _, update := range updates {
|
for _, update := range updates {
|
||||||
@@ -349,17 +349,17 @@ func (fc *FilerClient) refreshFilerList() {
|
|||||||
discoveredFilers[pb.ServerAddress(update.Address)] = true
|
discoveredFilers[pb.ServerAddress(update.Address)] = true
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Thread-safe update of filer list
|
// Thread-safe update of filer list
|
||||||
fc.filerAddressesMu.Lock()
|
fc.filerAddressesMu.Lock()
|
||||||
defer fc.filerAddressesMu.Unlock()
|
defer fc.filerAddressesMu.Unlock()
|
||||||
|
|
||||||
// Build a map of existing filers for efficient O(1) lookup
|
// Build a map of existing filers for efficient O(1) lookup
|
||||||
existingFilers := make(map[pb.ServerAddress]struct{}, len(fc.filerAddresses))
|
existingFilers := make(map[pb.ServerAddress]struct{}, len(fc.filerAddresses))
|
||||||
for _, f := range fc.filerAddresses {
|
for _, f := range fc.filerAddresses {
|
||||||
existingFilers[f] = struct{}{}
|
existingFilers[f] = struct{}{}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find new filers - O(N+M) instead of O(N*M)
|
// Find new filers - O(N+M) instead of O(N*M)
|
||||||
var newFilers []pb.ServerAddress
|
var newFilers []pb.ServerAddress
|
||||||
for addr := range discoveredFilers {
|
for addr := range discoveredFilers {
|
||||||
@@ -367,18 +367,18 @@ func (fc *FilerClient) refreshFilerList() {
|
|||||||
newFilers = append(newFilers, addr)
|
newFilers = append(newFilers, addr)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add new filers
|
// Add new filers
|
||||||
if len(newFilers) > 0 {
|
if len(newFilers) > 0 {
|
||||||
glog.V(0).Infof("FilerClient: discovered %d new filer(s) in group '%s': %v", len(newFilers), fc.filerGroup, newFilers)
|
glog.V(0).Infof("FilerClient: discovered %d new filer(s) in group '%s': %v", len(newFilers), fc.filerGroup, newFilers)
|
||||||
fc.filerAddresses = append(fc.filerAddresses, newFilers...)
|
fc.filerAddresses = append(fc.filerAddresses, newFilers...)
|
||||||
|
|
||||||
// Initialize health tracking for new filers
|
// Initialize health tracking for new filers
|
||||||
for range newFilers {
|
for range newFilers {
|
||||||
fc.filerHealth = append(fc.filerHealth, &filerHealth{})
|
fc.filerHealth = append(fc.filerHealth, &filerHealth{})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Optionally, remove filers that are no longer in the cluster
|
// Optionally, remove filers that are no longer in the cluster
|
||||||
// For now, we keep all filers and rely on health checks to avoid dead ones
|
// For now, we keep all filers and rely on health checks to avoid dead ones
|
||||||
// This prevents removing filers that might be temporarily unavailable
|
// This prevents removing filers that might be temporarily unavailable
|
||||||
@@ -571,7 +571,7 @@ func (p *filerVolumeProvider) LookupVolumeIds(ctx context.Context, volumeIds []s
|
|||||||
// Try all filer addresses with round-robin starting from current index
|
// Try all filer addresses with round-robin starting from current index
|
||||||
// Skip known-unhealthy filers (circuit breaker pattern)
|
// Skip known-unhealthy filers (circuit breaker pattern)
|
||||||
i := atomic.LoadInt32(&fc.filerIndex)
|
i := atomic.LoadInt32(&fc.filerIndex)
|
||||||
|
|
||||||
// Get filer count with read lock
|
// Get filer count with read lock
|
||||||
fc.filerAddressesMu.RLock()
|
fc.filerAddressesMu.RLock()
|
||||||
n := int32(len(fc.filerAddresses))
|
n := int32(len(fc.filerAddresses))
|
||||||
@@ -589,12 +589,12 @@ func (p *filerVolumeProvider) LookupVolumeIds(ctx context.Context, volumeIds []s
|
|||||||
// Filer list changed, reset index
|
// Filer list changed, reset index
|
||||||
i = 0
|
i = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get health pointer while holding lock
|
// Get health pointer while holding lock
|
||||||
health := fc.filerHealth[i]
|
health := fc.filerHealth[i]
|
||||||
filerAddress := fc.filerAddresses[i]
|
filerAddress := fc.filerAddresses[i]
|
||||||
fc.filerAddressesMu.RUnlock()
|
fc.filerAddressesMu.RUnlock()
|
||||||
|
|
||||||
// Circuit breaker: skip unhealthy filers (no lock needed - uses atomics)
|
// Circuit breaker: skip unhealthy filers (no lock needed - uses atomics)
|
||||||
if fc.shouldSkipUnhealthyFilerWithHealth(health) {
|
if fc.shouldSkipUnhealthyFilerWithHealth(health) {
|
||||||
glog.V(2).Infof("FilerClient: skipping unhealthy filer %s (consecutive failures: %d)",
|
glog.V(2).Infof("FilerClient: skipping unhealthy filer %s (consecutive failures: %d)",
|
||||||
|
|||||||
Reference in New Issue
Block a user