This commit is contained in:
chrislu
2025-10-27 23:04:55 -07:00
parent 208d7f24f4
commit b7ba6785a2
32 changed files with 337 additions and 363 deletions

View File

@@ -24,12 +24,12 @@ func (rtm *RebalanceTimeoutManager) CheckRebalanceTimeouts() {
for _, group := range rtm.coordinator.groups {
group.Mu.Lock()
// Only check timeouts for groups in rebalancing states
if group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance {
rtm.checkGroupRebalanceTimeout(group, now)
}
group.Mu.Unlock()
}
}
@@ -37,7 +37,7 @@ func (rtm *RebalanceTimeoutManager) CheckRebalanceTimeouts() {
// checkGroupRebalanceTimeout checks and handles rebalance timeout for a specific group
func (rtm *RebalanceTimeoutManager) checkGroupRebalanceTimeout(group *ConsumerGroup, now time.Time) {
expiredMembers := make([]string, 0)
for memberID, member := range group.Members {
// Check if member has exceeded its rebalance timeout
rebalanceTimeout := time.Duration(member.RebalanceTimeout) * time.Millisecond
@@ -45,21 +45,21 @@ func (rtm *RebalanceTimeoutManager) checkGroupRebalanceTimeout(group *ConsumerGr
// Use default rebalance timeout if not specified
rebalanceTimeout = time.Duration(rtm.coordinator.rebalanceTimeoutMs) * time.Millisecond
}
// For members in pending state during rebalance, check against join time
if member.State == MemberStatePending {
if now.Sub(member.JoinedAt) > rebalanceTimeout {
expiredMembers = append(expiredMembers, memberID)
}
}
// Also check session timeout as a fallback
sessionTimeout := time.Duration(member.SessionTimeout) * time.Millisecond
if now.Sub(member.LastHeartbeat) > sessionTimeout {
expiredMembers = append(expiredMembers, memberID)
}
}
// Remove expired members and trigger rebalance if necessary
if len(expiredMembers) > 0 {
rtm.evictExpiredMembers(group, expiredMembers)
@@ -70,13 +70,13 @@ func (rtm *RebalanceTimeoutManager) checkGroupRebalanceTimeout(group *ConsumerGr
func (rtm *RebalanceTimeoutManager) evictExpiredMembers(group *ConsumerGroup, expiredMembers []string) {
for _, memberID := range expiredMembers {
delete(group.Members, memberID)
// If the leader was evicted, clear leader
if group.Leader == memberID {
group.Leader = ""
}
}
// Update group state based on remaining members
if len(group.Members) == 0 {
group.State = GroupStateEmpty
@@ -92,18 +92,18 @@ func (rtm *RebalanceTimeoutManager) evictExpiredMembers(group *ConsumerGroup, ex
break
}
}
// Reset to preparing rebalance to restart the process
group.State = GroupStatePreparingRebalance
group.Generation++
// Mark remaining members as pending
for _, member := range group.Members {
member.State = MemberStatePending
}
}
}
group.LastActivity = time.Now()
}
@@ -112,7 +112,7 @@ func (rtm *RebalanceTimeoutManager) IsRebalanceStuck(group *ConsumerGroup, maxRe
if group.State != GroupStatePreparingRebalance && group.State != GroupStateCompletingRebalance {
return false
}
return time.Since(group.LastActivity) > maxRebalanceDuration
}
@@ -120,14 +120,14 @@ func (rtm *RebalanceTimeoutManager) IsRebalanceStuck(group *ConsumerGroup, maxRe
func (rtm *RebalanceTimeoutManager) ForceCompleteRebalance(group *ConsumerGroup) {
group.Mu.Lock()
defer group.Mu.Unlock()
// If stuck in preparing rebalance, move to completing
if group.State == GroupStatePreparingRebalance {
group.State = GroupStateCompletingRebalance
group.LastActivity = time.Now()
return
}
// If stuck in completing rebalance, force to stable
if group.State == GroupStateCompletingRebalance {
group.State = GroupStateStable
@@ -145,21 +145,21 @@ func (rtm *RebalanceTimeoutManager) GetRebalanceStatus(groupID string) *Rebalanc
if group == nil {
return nil
}
group.Mu.RLock()
defer group.Mu.RUnlock()
status := &RebalanceStatus{
GroupID: groupID,
State: group.State,
Generation: group.Generation,
MemberCount: len(group.Members),
Leader: group.Leader,
LastActivity: group.LastActivity,
IsRebalancing: group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance,
GroupID: groupID,
State: group.State,
Generation: group.Generation,
MemberCount: len(group.Members),
Leader: group.Leader,
LastActivity: group.LastActivity,
IsRebalancing: group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance,
RebalanceDuration: time.Since(group.LastActivity),
}
// Calculate member timeout status
now := time.Now()
for memberID, member := range group.Members {
@@ -171,48 +171,48 @@ func (rtm *RebalanceTimeoutManager) GetRebalanceStatus(groupID string) *Rebalanc
SessionTimeout: time.Duration(member.SessionTimeout) * time.Millisecond,
RebalanceTimeout: time.Duration(member.RebalanceTimeout) * time.Millisecond,
}
// Calculate time until session timeout
sessionTimeRemaining := memberStatus.SessionTimeout - now.Sub(member.LastHeartbeat)
if sessionTimeRemaining < 0 {
sessionTimeRemaining = 0
}
memberStatus.SessionTimeRemaining = sessionTimeRemaining
// Calculate time until rebalance timeout
rebalanceTimeRemaining := memberStatus.RebalanceTimeout - now.Sub(member.JoinedAt)
if rebalanceTimeRemaining < 0 {
rebalanceTimeRemaining = 0
}
memberStatus.RebalanceTimeRemaining = rebalanceTimeRemaining
status.Members = append(status.Members, memberStatus)
}
return status
}
// RebalanceStatus represents the current status of a group's rebalance
type RebalanceStatus struct {
GroupID string `json:"group_id"`
State GroupState `json:"state"`
Generation int32 `json:"generation"`
MemberCount int `json:"member_count"`
Leader string `json:"leader"`
LastActivity time.Time `json:"last_activity"`
IsRebalancing bool `json:"is_rebalancing"`
RebalanceDuration time.Duration `json:"rebalance_duration"`
Members []MemberTimeoutStatus `json:"members"`
GroupID string `json:"group_id"`
State GroupState `json:"state"`
Generation int32 `json:"generation"`
MemberCount int `json:"member_count"`
Leader string `json:"leader"`
LastActivity time.Time `json:"last_activity"`
IsRebalancing bool `json:"is_rebalancing"`
RebalanceDuration time.Duration `json:"rebalance_duration"`
Members []MemberTimeoutStatus `json:"members"`
}
// MemberTimeoutStatus represents timeout status for a group member
type MemberTimeoutStatus struct {
MemberID string `json:"member_id"`
State MemberState `json:"state"`
LastHeartbeat time.Time `json:"last_heartbeat"`
JoinedAt time.Time `json:"joined_at"`
SessionTimeout time.Duration `json:"session_timeout"`
RebalanceTimeout time.Duration `json:"rebalance_timeout"`
SessionTimeRemaining time.Duration `json:"session_time_remaining"`
RebalanceTimeRemaining time.Duration `json:"rebalance_time_remaining"`
MemberID string `json:"member_id"`
State MemberState `json:"state"`
LastHeartbeat time.Time `json:"last_heartbeat"`
JoinedAt time.Time `json:"joined_at"`
SessionTimeout time.Duration `json:"session_timeout"`
RebalanceTimeout time.Duration `json:"rebalance_timeout"`
SessionTimeRemaining time.Duration `json:"session_time_remaining"`
RebalanceTimeRemaining time.Duration `json:"rebalance_time_remaining"`
}