* fix s3 read failover #8076 - Implement cache invalidation in vidMapClient - Add retry logic in shared PrepareStreamContentWithThrottler - Update S3 Gateway to use FilerClient directly for invalidation support - Remove obsolete simpleMasterClient struct * improve observability for chunk re-lookup failures Added a warning log when volume location re-lookup fails after cache invalidation in PrepareStreamContentWithThrottler. * address code review feedback - Prevent infinite retry loops by comparing old/new URLs before retry - Update fileId2Url map after successful re-lookup for subsequent references - Add comprehensive test coverage for failover logic - Add tests for InvalidateCache method * Fix: prevent data duplication in stream retry and improve VidMap robustness * Cleanup: remove redundant check in InvalidateCache
This commit is contained in:
@@ -106,6 +106,30 @@ func noJwtFunc(string) string {
|
||||
return ""
|
||||
}
|
||||
|
||||
type CacheInvalidator interface {
|
||||
InvalidateCache(fileId string)
|
||||
}
|
||||
|
||||
// urlSlicesEqual checks if two URL slices contain the same URLs (order-independent)
|
||||
func urlSlicesEqual(a, b []string) bool {
|
||||
if len(a) != len(b) {
|
||||
return false
|
||||
}
|
||||
// Create a map to count occurrences in first slice
|
||||
counts := make(map[string]int)
|
||||
for _, url := range a {
|
||||
counts[url]++
|
||||
}
|
||||
// Verify all URLs in second slice match
|
||||
for _, url := range b {
|
||||
if counts[url] == 0 {
|
||||
return false
|
||||
}
|
||||
counts[url]--
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
func PrepareStreamContentWithThrottler(ctx context.Context, masterClient wdclient.HasLookupFileIdFunction, jwtFunc VolumeServerJwtFunction, chunks []*filer_pb.FileChunk, offset int64, size int64, downloadMaxBytesPs int64) (DoStreamContent, error) {
|
||||
glog.V(4).InfofCtx(ctx, "prepare to stream content for chunks: %d", len(chunks))
|
||||
chunkViews := ViewFromChunks(ctx, masterClient.GetLookupFileIdFunction(), chunks, offset, size)
|
||||
@@ -153,7 +177,38 @@ func PrepareStreamContentWithThrottler(ctx context.Context, masterClient wdclien
|
||||
urlStrings := fileId2Url[chunkView.FileId]
|
||||
start := time.Now()
|
||||
jwt := jwtFunc(chunkView.FileId)
|
||||
err := retriedStreamFetchChunkData(ctx, writer, urlStrings, jwt, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize))
|
||||
written, err := retriedStreamFetchChunkData(ctx, writer, urlStrings, jwt, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize))
|
||||
|
||||
// If read failed, try to invalidate cache and re-lookup
|
||||
if err != nil && written == 0 {
|
||||
if invalidator, ok := masterClient.(CacheInvalidator); ok {
|
||||
glog.V(0).InfofCtx(ctx, "read chunk %s failed, invalidating cache and retrying", chunkView.FileId)
|
||||
invalidator.InvalidateCache(chunkView.FileId)
|
||||
|
||||
// Re-lookup
|
||||
newUrlStrings, lookupErr := masterClient.GetLookupFileIdFunction()(ctx, chunkView.FileId)
|
||||
if lookupErr == nil && len(newUrlStrings) > 0 {
|
||||
// Check if new URLs are different from old ones to avoid infinite retry
|
||||
if !urlSlicesEqual(urlStrings, newUrlStrings) {
|
||||
glog.V(0).InfofCtx(ctx, "retrying read chunk %s with new locations: %v", chunkView.FileId, newUrlStrings)
|
||||
_, err = retriedStreamFetchChunkData(ctx, writer, newUrlStrings, jwt, chunkView.CipherKey, chunkView.IsGzipped, chunkView.IsFullChunk(), chunkView.OffsetInChunk, int(chunkView.ViewSize))
|
||||
// Update the map so subsequent references use fresh URLs
|
||||
if err == nil {
|
||||
fileId2Url[chunkView.FileId] = newUrlStrings
|
||||
}
|
||||
} else {
|
||||
glog.V(0).InfofCtx(ctx, "re-lookup returned same locations for chunk %s, skipping retry", chunkView.FileId)
|
||||
}
|
||||
} else {
|
||||
if lookupErr != nil {
|
||||
glog.WarningfCtx(ctx, "failed to re-lookup chunk %s after cache invalidation: %v", chunkView.FileId, lookupErr)
|
||||
} else {
|
||||
glog.WarningfCtx(ctx, "re-lookup for chunk %s returned no locations, skipping retry", chunkView.FileId)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
offset += int64(chunkView.ViewSize)
|
||||
remaining -= int64(chunkView.ViewSize)
|
||||
stats.FilerRequestHistogram.WithLabelValues("chunkDownload").Observe(time.Since(start).Seconds())
|
||||
|
||||
Reference in New Issue
Block a user