* fix(kafka): resolve consumer group resumption timeout in e2e tests Three issues caused ConsumerGroupResumption to time out when the second consumer tried to resume from committed offsets: 1. ForceCompleteRebalance deadlock: performCleanup() held group.Mu.Lock then called ForceCompleteRebalance() which tried to acquire the same lock — a guaranteed deadlock on Go's non-reentrant sync.Mutex. Fixed by requiring callers to hold the lock (matching actual call sites). 2. Unbounded fallback fetch: when the multi-batch fetch timed out, the fallback GetStoredRecords call used the connection context (no deadline). A slow broker gRPC call could block the data-plane goroutine indefinitely, causing head-of-line blocking for all responses on that connection. Fixed with a 10-second timeout. 3. HWM lookup failure caused empty responses: after a consumer leaves and the partition is deactivated, GetLatestOffset can fail. The fetch handler treated this as "no data" and entered the long-poll loop (up to 10s × 4 retries = 40s timeout). Fixed by assuming data may exist when HWM lookup fails, so the actual fetch determines availability. * fix(kafka): address review feedback on HWM sentinel and fallback timeout - Don't expose synthetic HWM (requestedOffset+1) to clients; keep result.highWaterMark at 0 when the real HWM lookup fails. - Tie fallback timeout to client's MaxWaitTime instead of a fixed 10s, so one slow partition doesn't hold the reader beyond the request budget. * fix(kafka): use large HWM sentinel and clamp fallback timeout - Use requestedOffset+10000 as sentinel HWM instead of +1, so FetchMultipleBatches doesn't artificially limit to 1 record. - Add 2s floor to fallback timeout so disk reads via gRPC have a reasonable chance even when maxWaitMs is small or zero. * fix(kafka): use MaxInt64 sentinel and derive HWM from fetch result - Use math.MaxInt64 as HWM sentinel to avoid integer overflow risk (previously requestedOffset+10000 could wrap on large offsets). - After the fetch, derive a meaningful HWM from newOffset so the client never sees MaxInt64 or 0 in the response. * fix(kafka): use remaining time budget for fallback fetch The fallback was restarting the full maxWaitMs budget even though the multi-batch fetch already consumed part of it. Now compute remaining time from either the parent context deadline or maxWaitMs minus elapsed, skip the fallback if budget is exhausted, and clamp to [2s, 10s] bounds.
217 lines
7.2 KiB
Go
217 lines
7.2 KiB
Go
package consumer
|
|
|
|
import (
|
|
"time"
|
|
)
|
|
|
|
// RebalanceTimeoutManager handles rebalance timeout logic and member eviction
|
|
type RebalanceTimeoutManager struct {
|
|
coordinator *GroupCoordinator
|
|
}
|
|
|
|
// NewRebalanceTimeoutManager creates a new rebalance timeout manager
|
|
func NewRebalanceTimeoutManager(coordinator *GroupCoordinator) *RebalanceTimeoutManager {
|
|
return &RebalanceTimeoutManager{
|
|
coordinator: coordinator,
|
|
}
|
|
}
|
|
|
|
// CheckRebalanceTimeouts checks for members that have exceeded rebalance timeouts
|
|
func (rtm *RebalanceTimeoutManager) CheckRebalanceTimeouts() {
|
|
now := time.Now()
|
|
rtm.coordinator.groupsMu.RLock()
|
|
defer rtm.coordinator.groupsMu.RUnlock()
|
|
|
|
for _, group := range rtm.coordinator.groups {
|
|
group.Mu.Lock()
|
|
|
|
// Only check timeouts for groups in rebalancing states
|
|
if group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance {
|
|
rtm.checkGroupRebalanceTimeout(group, now)
|
|
}
|
|
|
|
group.Mu.Unlock()
|
|
}
|
|
}
|
|
|
|
// checkGroupRebalanceTimeout checks and handles rebalance timeout for a specific group
|
|
func (rtm *RebalanceTimeoutManager) checkGroupRebalanceTimeout(group *ConsumerGroup, now time.Time) {
|
|
expiredMembers := make([]string, 0)
|
|
|
|
for memberID, member := range group.Members {
|
|
// Check if member has exceeded its rebalance timeout
|
|
rebalanceTimeout := time.Duration(member.RebalanceTimeout) * time.Millisecond
|
|
if rebalanceTimeout == 0 {
|
|
// Use default rebalance timeout if not specified
|
|
rebalanceTimeout = time.Duration(rtm.coordinator.rebalanceTimeoutMs) * time.Millisecond
|
|
}
|
|
|
|
// For members in pending state during rebalance, check against join time
|
|
if member.State == MemberStatePending {
|
|
if now.Sub(member.JoinedAt) > rebalanceTimeout {
|
|
expiredMembers = append(expiredMembers, memberID)
|
|
}
|
|
}
|
|
|
|
// Also check session timeout as a fallback
|
|
sessionTimeout := time.Duration(member.SessionTimeout) * time.Millisecond
|
|
if now.Sub(member.LastHeartbeat) > sessionTimeout {
|
|
expiredMembers = append(expiredMembers, memberID)
|
|
}
|
|
}
|
|
|
|
// Remove expired members and trigger rebalance if necessary
|
|
if len(expiredMembers) > 0 {
|
|
rtm.evictExpiredMembers(group, expiredMembers)
|
|
}
|
|
}
|
|
|
|
// evictExpiredMembers removes expired members and updates group state
|
|
func (rtm *RebalanceTimeoutManager) evictExpiredMembers(group *ConsumerGroup, expiredMembers []string) {
|
|
for _, memberID := range expiredMembers {
|
|
delete(group.Members, memberID)
|
|
|
|
// If the leader was evicted, clear leader
|
|
if group.Leader == memberID {
|
|
group.Leader = ""
|
|
}
|
|
}
|
|
|
|
// Update group state based on remaining members
|
|
if len(group.Members) == 0 {
|
|
group.State = GroupStateEmpty
|
|
group.Generation++
|
|
group.Leader = ""
|
|
} else {
|
|
// If we were in the middle of rebalancing, restart the process
|
|
if group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance {
|
|
// Select new leader if needed
|
|
if group.Leader == "" {
|
|
for memberID := range group.Members {
|
|
group.Leader = memberID
|
|
break
|
|
}
|
|
}
|
|
|
|
// Reset to preparing rebalance to restart the process
|
|
group.State = GroupStatePreparingRebalance
|
|
group.Generation++
|
|
|
|
// Mark remaining members as pending
|
|
for _, member := range group.Members {
|
|
member.State = MemberStatePending
|
|
}
|
|
}
|
|
}
|
|
|
|
group.LastActivity = time.Now()
|
|
}
|
|
|
|
// IsRebalanceStuck checks if a group has been stuck in rebalancing for too long
|
|
func (rtm *RebalanceTimeoutManager) IsRebalanceStuck(group *ConsumerGroup, maxRebalanceDuration time.Duration) bool {
|
|
if group.State != GroupStatePreparingRebalance && group.State != GroupStateCompletingRebalance {
|
|
return false
|
|
}
|
|
|
|
return time.Since(group.LastActivity) > maxRebalanceDuration
|
|
}
|
|
|
|
// ForceCompleteRebalance forces completion of a stuck rebalance.
|
|
// IMPORTANT: The caller must already hold group.Mu.Lock().
|
|
func (rtm *RebalanceTimeoutManager) ForceCompleteRebalance(group *ConsumerGroup) {
|
|
// If stuck in preparing rebalance, move to completing
|
|
if group.State == GroupStatePreparingRebalance {
|
|
group.State = GroupStateCompletingRebalance
|
|
group.LastActivity = time.Now()
|
|
return
|
|
}
|
|
|
|
// If stuck in completing rebalance, force to stable
|
|
if group.State == GroupStateCompletingRebalance {
|
|
group.State = GroupStateStable
|
|
for _, member := range group.Members {
|
|
member.State = MemberStateStable
|
|
}
|
|
group.LastActivity = time.Now()
|
|
return
|
|
}
|
|
}
|
|
|
|
// GetRebalanceStatus returns the current rebalance status for a group
|
|
func (rtm *RebalanceTimeoutManager) GetRebalanceStatus(groupID string) *RebalanceStatus {
|
|
group := rtm.coordinator.GetGroup(groupID)
|
|
if group == nil {
|
|
return nil
|
|
}
|
|
|
|
group.Mu.RLock()
|
|
defer group.Mu.RUnlock()
|
|
|
|
status := &RebalanceStatus{
|
|
GroupID: groupID,
|
|
State: group.State,
|
|
Generation: group.Generation,
|
|
MemberCount: len(group.Members),
|
|
Leader: group.Leader,
|
|
LastActivity: group.LastActivity,
|
|
IsRebalancing: group.State == GroupStatePreparingRebalance || group.State == GroupStateCompletingRebalance,
|
|
RebalanceDuration: time.Since(group.LastActivity),
|
|
}
|
|
|
|
// Calculate member timeout status
|
|
now := time.Now()
|
|
for memberID, member := range group.Members {
|
|
memberStatus := MemberTimeoutStatus{
|
|
MemberID: memberID,
|
|
State: member.State,
|
|
LastHeartbeat: member.LastHeartbeat,
|
|
JoinedAt: member.JoinedAt,
|
|
SessionTimeout: time.Duration(member.SessionTimeout) * time.Millisecond,
|
|
RebalanceTimeout: time.Duration(member.RebalanceTimeout) * time.Millisecond,
|
|
}
|
|
|
|
// Calculate time until session timeout
|
|
sessionTimeRemaining := memberStatus.SessionTimeout - now.Sub(member.LastHeartbeat)
|
|
if sessionTimeRemaining < 0 {
|
|
sessionTimeRemaining = 0
|
|
}
|
|
memberStatus.SessionTimeRemaining = sessionTimeRemaining
|
|
|
|
// Calculate time until rebalance timeout
|
|
rebalanceTimeRemaining := memberStatus.RebalanceTimeout - now.Sub(member.JoinedAt)
|
|
if rebalanceTimeRemaining < 0 {
|
|
rebalanceTimeRemaining = 0
|
|
}
|
|
memberStatus.RebalanceTimeRemaining = rebalanceTimeRemaining
|
|
|
|
status.Members = append(status.Members, memberStatus)
|
|
}
|
|
|
|
return status
|
|
}
|
|
|
|
// RebalanceStatus represents the current status of a group's rebalance
|
|
type RebalanceStatus struct {
|
|
GroupID string `json:"group_id"`
|
|
State GroupState `json:"state"`
|
|
Generation int32 `json:"generation"`
|
|
MemberCount int `json:"member_count"`
|
|
Leader string `json:"leader"`
|
|
LastActivity time.Time `json:"last_activity"`
|
|
IsRebalancing bool `json:"is_rebalancing"`
|
|
RebalanceDuration time.Duration `json:"rebalance_duration"`
|
|
Members []MemberTimeoutStatus `json:"members"`
|
|
}
|
|
|
|
// MemberTimeoutStatus represents timeout status for a group member
|
|
type MemberTimeoutStatus struct {
|
|
MemberID string `json:"member_id"`
|
|
State MemberState `json:"state"`
|
|
LastHeartbeat time.Time `json:"last_heartbeat"`
|
|
JoinedAt time.Time `json:"joined_at"`
|
|
SessionTimeout time.Duration `json:"session_timeout"`
|
|
RebalanceTimeout time.Duration `json:"rebalance_timeout"`
|
|
SessionTimeRemaining time.Duration `json:"session_time_remaining"`
|
|
RebalanceTimeRemaining time.Duration `json:"rebalance_time_remaining"`
|
|
}
|