Fix S3 Gateway Read Failover #8076 (#8087)

* fix s3 read failover #8076

- Implement cache invalidation in vidMapClient
- Add retry logic in shared PrepareStreamContentWithThrottler
- Update S3 Gateway to use FilerClient directly for invalidation support
- Remove obsolete simpleMasterClient struct

* improve observability for chunk re-lookup failures

Added a warning log when volume location re-lookup fails after cache invalidation in PrepareStreamContentWithThrottler.

* address code review feedback

- Prevent infinite retry loops by comparing old/new URLs before retry
- Update fileId2Url map after successful re-lookup for subsequent references
- Add comprehensive test coverage for failover logic
- Add tests for InvalidateCache method

* Fix: prevent data duplication in stream retry and improve VidMap robustness

* Cleanup: remove redundant check in InvalidateCache
This commit is contained in:
Chris Lu
2026-01-22 14:07:24 -08:00
committed by GitHub
parent 2e9a7e13e2
commit 066410dbd0
7 changed files with 476 additions and 22 deletions

View File

@@ -71,6 +71,9 @@ func (vc *vidMapClient) LookupFileIdWithFallback(ctx context.Context, fileId str
}
// Cache miss - extract volume ID from file ID (format: "volumeId,needle_id_cookie")
if fileId == "" {
return nil, fmt.Errorf("empty fileId")
}
parts := strings.Split(fileId, ",")
if len(parts) != 2 {
return nil, fmt.Errorf("invalid fileId %s", fileId)
@@ -345,3 +348,16 @@ func (vc *vidMapClient) resetVidMap() {
// node is guaranteed to be non-nil after the loop
node.cache.Store(nil)
}
// InvalidateCache removes all cached locations for a volume ID
func (vc *vidMapClient) InvalidateCache(fileId string) {
parts := strings.Split(fileId, ",")
vidString := parts[0]
vid, err := strconv.ParseUint(vidString, 10, 32)
if err != nil {
return
}
vc.withCurrentVidMap(func(vm *vidMap) {
vm.deleteVid(uint32(vid))
})
}