fix(s3): skip directories before marker in ListObjectVersions pagination (#8890)

* fix(s3): skip directories before marker in ListObjectVersions pagination

ListObjectVersions was re-traversing the entire directory tree from the
beginning on every paginated request, only skipping entries at the leaf
level. For buckets with millions of objects in deep hierarchies, this
caused exponentially slower responses as pagination progressed.

Two optimizations:
1. Use keyMarker to compute a startFrom position at each directory level,
   skipping directly to the relevant entry instead of scanning from the
   beginning (mirroring how ListObjects uses marker descent).
2. Skip recursing into subdirectories whose keys are entirely before the
   keyMarker.

Changes per-page cost from O(entries_before_marker) to O(tree_depth).

* test(s3): add integration test for deep-hierarchy version listing pagination

Adds TestVersioningPaginationDeepDirectoryHierarchy which creates objects
across 20 subdirectories at depth 6 (mimicking Veeam 365 backup layout)
and paginates through them with small maxKeys. Verifies correctness
(no duplicates, sorted order, all objects found) and checks that later
pages don't take dramatically longer than earlier ones — the symptom
of the pre-fix re-traversal bug. Also tests delimiter+pagination
interaction across subdirectories.

* test(s3): strengthen deep-hierarchy pagination assertions

- Replace timing warning (t.Logf) with a failing assertion (t.Errorf)
  so pagination regressions actually fail the test.
- Replace generic count/uniqueness/sort checks on CommonPrefixes with
  exact equality against the expected prefix slice, catching wrong-but-
  sorted results.

* test(s3): use allKeys for exact assertion in deep-hierarchy pagination test

Wire the allKeys slice (previously unused dead code) into the version
listing assertion, replacing generic count/uniqueness/sort checks with
an exact equality comparison against the keys that were created.
This commit is contained in:
Chris Lu
2026-04-02 15:59:52 -07:00
committed by GitHub
parent 7c59b639c9
commit a4b896a224
3 changed files with 251 additions and 1 deletions

View File

@@ -630,3 +630,57 @@ func TestListObjectVersions_PrefixWithLeadingSlash(t *testing.T) {
})
}
}
func TestComputeStartFrom(t *testing.T) {
tests := []struct {
name string
keyMarker string
relativePath string
wantStart string
wantInclusive bool
}{
{"empty marker", "", "", "", false},
{"empty marker with path", "", "dir", "", false},
{"root level file", "file1.txt", "", "file1.txt", true},
{"root level with subpath", "Mailboxes/5ac/file1", "", "Mailboxes", true},
{"matching subdir", "Mailboxes/5ac/file1", "Mailboxes", "5ac", true},
{"deeper subdir", "Mailboxes/5ac/ItemsData/file1", "Mailboxes/5ac", "ItemsData", true},
{"at leaf level", "Mailboxes/5ac/ItemsData/file1", "Mailboxes/5ac/ItemsData", "file1", true},
{"unrelated directory", "other/path", "Mailboxes", "", false},
{"marker equals relativePath", "Mailboxes", "Mailboxes", "", false},
{"marker before directory", "aaa/file", "zzz", "", false},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
vc := &versionCollector{keyMarker: tt.keyMarker}
startFrom, inclusive := vc.computeStartFrom(tt.relativePath)
assert.Equal(t, tt.wantStart, startFrom)
assert.Equal(t, tt.wantInclusive, inclusive)
})
}
}
func TestProcessDirectorySkipsBeforeMarker(t *testing.T) {
tests := []struct {
name string
keyMarker string
entryPath string
shouldSkip bool
}{
{"no marker", "", "dir_a", false},
{"dir before marker", "dir_b/file", "dir_a", true},
{"marker descends into dir", "dir_a/file", "dir_a", false},
{"dir after marker", "dir_a/file", "dir_b", false},
{"same prefix different suffix", "dir_a0/file", "dir_a", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
skip := tt.keyMarker != "" &&
!strings.HasPrefix(tt.keyMarker, tt.entryPath+"/") &&
tt.entryPath+"/" < tt.keyMarker
assert.Equal(t, tt.shouldSkip, skip)
})
}
}

View File

@@ -427,6 +427,34 @@ func (vc *versionCollector) matchesPrefixFilter(entryPath string, isDirectory bo
return isMatch || canDescend
}
// computeStartFrom extracts the first path component from keyMarker that applies
// to the given directory level (relativePath), allowing the directory listing to
// skip directly to the marker position instead of scanning from the beginning.
// Returns ("", false) when no optimization is possible.
func (vc *versionCollector) computeStartFrom(relativePath string) (startFrom string, inclusive bool) {
if vc.keyMarker == "" {
return "", false
}
var remainder string
if relativePath == "" {
remainder = vc.keyMarker
} else if strings.HasPrefix(vc.keyMarker, relativePath+"/") {
remainder = vc.keyMarker[len(relativePath)+1:]
} else {
return "", false
}
if remainder == "" {
return "", false
}
if idx := strings.Index(remainder, "/"); idx >= 0 {
return remainder[:idx], true
}
return remainder, true
}
// shouldSkipObjectForMarker returns true if the object should be skipped based on keyMarker
func (vc *versionCollector) shouldSkipObjectForMarker(objectKey string) bool {
if vc.keyMarker == "" {
@@ -639,12 +667,21 @@ func (s3a *S3ApiServer) findVersionsRecursively(currentPath, relativePath string
// collectVersions recursively collects versions from the given path
func (vc *versionCollector) collectVersions(currentPath, relativePath string) error {
startFrom := ""
inclusive := false
// On the first iteration, skip ahead to the marker position to avoid
// re-scanning all entries before the marker on every paginated request.
if markerStart, ok := vc.computeStartFrom(relativePath); ok && markerStart != "" {
startFrom = markerStart
inclusive = true
}
for {
if vc.isFull() {
return nil
}
entries, isLast, err := vc.s3a.list(currentPath, "", startFrom, false, filer.PaginationSize)
entries, isLast, err := vc.s3a.list(currentPath, "", startFrom, inclusive, filer.PaginationSize)
// After the first batch, use exclusive mode for standard pagination
inclusive = false
if err != nil {
return err
}
@@ -731,6 +768,14 @@ func (vc *versionCollector) processDirectory(currentPath, entryPath string, entr
vc.processExplicitDirectory(entryPath, entry)
}
// Skip entire subdirectory if all keys within it are before the keyMarker.
// All object keys under this directory start with entryPath+"/". If the marker
// doesn't descend into this directory and entryPath+"/" sorts before the marker,
// then every key in this subtree was already returned in a previous page.
if vc.keyMarker != "" && !strings.HasPrefix(vc.keyMarker, entryPath+"/") && entryPath+"/" < vc.keyMarker {
return nil
}
// Recursively search subdirectory
fullPath := path.Join(currentPath, entry.Name)
if err := vc.collectVersions(fullPath, entryPath); err != nil {