iceberg: detect maintenance work per operation (#8639)
* iceberg: detect maintenance work per operation * iceberg: ignore delete manifests during detection * iceberg: clean up detection maintenance planning * iceberg: tighten detection manifest heuristics * Potential fix for code scanning alert no. 330: Incorrect conversion between integer types Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com> * iceberg: tolerate per-operation detection errors * iceberg: fix fake metadata location versioning * iceberg: check snapshot expiry before manifest loads * iceberg: make expire-snapshots switch case explicit --------- Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
This commit is contained in:
@@ -243,8 +243,10 @@ func populateTable(t *testing.T, fs *fakeFilerServer, setup tableSetup) table.Me
|
||||
}
|
||||
|
||||
// Build internal metadata xattr
|
||||
const metadataVersion = 1
|
||||
internalMeta := map[string]interface{}{
|
||||
"metadataVersion": 1,
|
||||
"metadataVersion": metadataVersion,
|
||||
"metadataLocation": path.Join("metadata", fmt.Sprintf("v%d.metadata.json", metadataVersion)),
|
||||
"metadata": map[string]interface{}{
|
||||
"fullMetadata": json.RawMessage(fullMetadataJSON),
|
||||
},
|
||||
@@ -370,6 +372,66 @@ func populateTable(t *testing.T, fs *fakeFilerServer, setup tableSetup) table.Me
|
||||
return meta
|
||||
}
|
||||
|
||||
func writeCurrentSnapshotManifests(t *testing.T, fs *fakeFilerServer, setup tableSetup, meta table.Metadata, manifestEntries [][]iceberg.ManifestEntry) {
|
||||
t.Helper()
|
||||
|
||||
currentSnap := meta.CurrentSnapshot()
|
||||
if currentSnap == nil {
|
||||
t.Fatal("current snapshot is required")
|
||||
}
|
||||
|
||||
metaDir := path.Join(s3tables.TablesPath, setup.BucketName, setup.tablePath(), "metadata")
|
||||
version := meta.Version()
|
||||
schema := meta.CurrentSchema()
|
||||
spec := meta.PartitionSpec()
|
||||
|
||||
var manifests []iceberg.ManifestFile
|
||||
for i, entries := range manifestEntries {
|
||||
manifestName := fmt.Sprintf("detect-manifest-%d.avro", i+1)
|
||||
var manifestBuf bytes.Buffer
|
||||
mf, err := iceberg.WriteManifest(
|
||||
path.Join("metadata", manifestName),
|
||||
&manifestBuf,
|
||||
version,
|
||||
spec,
|
||||
schema,
|
||||
currentSnap.SnapshotID,
|
||||
entries,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("write manifest %d: %v", i+1, err)
|
||||
}
|
||||
fs.putEntry(metaDir, manifestName, &filer_pb.Entry{
|
||||
Name: manifestName,
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
Mtime: time.Now().Unix(),
|
||||
FileSize: uint64(manifestBuf.Len()),
|
||||
},
|
||||
Content: manifestBuf.Bytes(),
|
||||
})
|
||||
manifests = append(manifests, mf)
|
||||
}
|
||||
|
||||
var manifestListBuf bytes.Buffer
|
||||
seqNum := currentSnap.SequenceNumber
|
||||
if err := iceberg.WriteManifestList(version, &manifestListBuf, currentSnap.SnapshotID, currentSnap.ParentSnapshotID, &seqNum, 0, manifests); err != nil {
|
||||
t.Fatalf("write current manifest list: %v", err)
|
||||
}
|
||||
fs.putEntry(metaDir, path.Base(currentSnap.ManifestList), &filer_pb.Entry{
|
||||
Name: path.Base(currentSnap.ManifestList),
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
Mtime: time.Now().Unix(),
|
||||
FileSize: uint64(manifestListBuf.Len()),
|
||||
},
|
||||
Content: manifestListBuf.Bytes(),
|
||||
})
|
||||
}
|
||||
|
||||
func makeManifestEntries(t *testing.T, specs []testEntrySpec, snapshotID int64) []iceberg.ManifestEntry {
|
||||
t.Helper()
|
||||
return makeManifestEntriesWithSnapshot(t, specs, snapshotID, iceberg.EntryStatusADDED)
|
||||
}
|
||||
|
||||
// ---------------------------------------------------------------------------
|
||||
// Recording senders for Execute tests
|
||||
// ---------------------------------------------------------------------------
|
||||
@@ -934,6 +996,412 @@ func TestConnectToFilerFailsWhenAllAddressesAreUnreachable(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectSchedulesCompactionWithoutSnapshotPressure(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
now := time.Now().UnixMilli()
|
||||
setup := tableSetup{
|
||||
BucketName: "test-bucket",
|
||||
Namespace: "analytics",
|
||||
TableName: "events",
|
||||
Snapshots: []table.Snapshot{
|
||||
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro", SequenceNumber: 1},
|
||||
},
|
||||
}
|
||||
meta := populateTable(t, fs, setup)
|
||||
writeCurrentSnapshotManifests(t, fs, setup, meta, [][]iceberg.ManifestEntry{
|
||||
makeManifestEntries(t, []testEntrySpec{
|
||||
{path: "data/small-1.parquet", size: 1024, partition: map[int]any{}},
|
||||
{path: "data/small-2.parquet", size: 1024, partition: map[int]any{}},
|
||||
{path: "data/small-3.parquet", size: 1024, partition: map[int]any{}},
|
||||
}, 1),
|
||||
})
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
SnapshotRetentionHours: 24 * 365,
|
||||
MaxSnapshotsToKeep: 10,
|
||||
TargetFileSizeBytes: 4096,
|
||||
MinInputFiles: 2,
|
||||
Operations: "compact",
|
||||
}
|
||||
|
||||
tables, err := handler.scanTablesForMaintenance(context.Background(), client, config, "", "", "", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("scanTablesForMaintenance failed: %v", err)
|
||||
}
|
||||
if len(tables) != 1 {
|
||||
t.Fatalf("expected 1 compaction candidate, got %d", len(tables))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectSchedulesCompactionWithDeleteManifestPresent(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
now := time.Now().UnixMilli()
|
||||
setup := tableSetup{
|
||||
BucketName: "test-bucket",
|
||||
Namespace: "analytics",
|
||||
TableName: "events",
|
||||
Snapshots: []table.Snapshot{
|
||||
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro", SequenceNumber: 1},
|
||||
},
|
||||
}
|
||||
meta := populateTable(t, fs, setup)
|
||||
|
||||
currentSnap := meta.CurrentSnapshot()
|
||||
if currentSnap == nil {
|
||||
t.Fatal("current snapshot is required")
|
||||
}
|
||||
|
||||
metaDir := path.Join(s3tables.TablesPath, setup.BucketName, setup.tablePath(), "metadata")
|
||||
version := meta.Version()
|
||||
schema := meta.CurrentSchema()
|
||||
spec := meta.PartitionSpec()
|
||||
|
||||
dataEntries := makeManifestEntries(t, []testEntrySpec{
|
||||
{path: "data/small-1.parquet", size: 1024, partition: map[int]any{}},
|
||||
{path: "data/small-2.parquet", size: 1024, partition: map[int]any{}},
|
||||
{path: "data/small-3.parquet", size: 1024, partition: map[int]any{}},
|
||||
}, currentSnap.SnapshotID)
|
||||
|
||||
var dataManifestBuf bytes.Buffer
|
||||
dataManifestName := "detect-manifest-1.avro"
|
||||
dataManifest, err := iceberg.WriteManifest(
|
||||
path.Join("metadata", dataManifestName),
|
||||
&dataManifestBuf,
|
||||
version,
|
||||
spec,
|
||||
schema,
|
||||
currentSnap.SnapshotID,
|
||||
dataEntries,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("write data manifest: %v", err)
|
||||
}
|
||||
fs.putEntry(metaDir, dataManifestName, &filer_pb.Entry{
|
||||
Name: dataManifestName,
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
Mtime: time.Now().Unix(),
|
||||
FileSize: uint64(dataManifestBuf.Len()),
|
||||
},
|
||||
Content: dataManifestBuf.Bytes(),
|
||||
})
|
||||
|
||||
deleteManifest := iceberg.NewManifestFile(
|
||||
version,
|
||||
path.Join("metadata", "detect-delete-manifest.avro"),
|
||||
0,
|
||||
int32(spec.ID()),
|
||||
currentSnap.SnapshotID,
|
||||
).Content(iceberg.ManifestContentDeletes).
|
||||
SequenceNum(currentSnap.SequenceNumber, currentSnap.SequenceNumber).
|
||||
DeletedFiles(1).
|
||||
DeletedRows(1).
|
||||
Build()
|
||||
|
||||
var manifestListBuf bytes.Buffer
|
||||
seqNum := currentSnap.SequenceNumber
|
||||
if err := iceberg.WriteManifestList(
|
||||
version,
|
||||
&manifestListBuf,
|
||||
currentSnap.SnapshotID,
|
||||
currentSnap.ParentSnapshotID,
|
||||
&seqNum,
|
||||
0,
|
||||
[]iceberg.ManifestFile{dataManifest, deleteManifest},
|
||||
); err != nil {
|
||||
t.Fatalf("write manifest list: %v", err)
|
||||
}
|
||||
fs.putEntry(metaDir, path.Base(currentSnap.ManifestList), &filer_pb.Entry{
|
||||
Name: path.Base(currentSnap.ManifestList),
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
Mtime: time.Now().Unix(),
|
||||
FileSize: uint64(manifestListBuf.Len()),
|
||||
},
|
||||
Content: manifestListBuf.Bytes(),
|
||||
})
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
SnapshotRetentionHours: 24 * 365,
|
||||
MaxSnapshotsToKeep: 10,
|
||||
TargetFileSizeBytes: 4096,
|
||||
MinInputFiles: 2,
|
||||
Operations: "compact",
|
||||
}
|
||||
|
||||
tables, err := handler.scanTablesForMaintenance(context.Background(), client, config, "", "", "", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("scanTablesForMaintenance failed: %v", err)
|
||||
}
|
||||
if len(tables) != 1 {
|
||||
t.Fatalf("expected 1 compaction candidate with delete manifest present, got %d", len(tables))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectSchedulesSnapshotExpiryDespiteCompactionEvaluationError(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
now := time.Now().UnixMilli()
|
||||
setup := tableSetup{
|
||||
BucketName: "test-bucket",
|
||||
Namespace: "analytics",
|
||||
TableName: "events",
|
||||
Snapshots: []table.Snapshot{
|
||||
{SnapshotID: 1, TimestampMs: now - 1, ManifestList: "metadata/snap-1.avro", SequenceNumber: 1},
|
||||
},
|
||||
}
|
||||
populateTable(t, fs, setup)
|
||||
|
||||
metaDir := path.Join(s3tables.TablesPath, setup.BucketName, setup.tablePath(), "metadata")
|
||||
manifestListName := path.Base(setup.Snapshots[0].ManifestList)
|
||||
fs.putEntry(metaDir, manifestListName, &filer_pb.Entry{
|
||||
Name: manifestListName,
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
Mtime: time.Now().Unix(),
|
||||
FileSize: uint64(len("not-a-manifest-list")),
|
||||
},
|
||||
Content: []byte("not-a-manifest-list"),
|
||||
})
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
SnapshotRetentionHours: 0,
|
||||
MaxSnapshotsToKeep: 10,
|
||||
Operations: "compact,expire_snapshots",
|
||||
}
|
||||
|
||||
tables, err := handler.scanTablesForMaintenance(context.Background(), client, config, "", "", "", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("scanTablesForMaintenance failed: %v", err)
|
||||
}
|
||||
if len(tables) != 1 {
|
||||
t.Fatalf("expected snapshot expiration candidate despite compaction evaluation error, got %d", len(tables))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectSchedulesManifestRewriteWithoutSnapshotPressure(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
now := time.Now().UnixMilli()
|
||||
setup := tableSetup{
|
||||
BucketName: "test-bucket",
|
||||
Namespace: "analytics",
|
||||
TableName: "events",
|
||||
Snapshots: []table.Snapshot{
|
||||
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro", SequenceNumber: 1},
|
||||
},
|
||||
}
|
||||
meta := populateTable(t, fs, setup)
|
||||
|
||||
manifestEntries := make([][]iceberg.ManifestEntry, 0, 5)
|
||||
for i := 0; i < 5; i++ {
|
||||
manifestEntries = append(manifestEntries, makeManifestEntries(t, []testEntrySpec{
|
||||
{path: fmt.Sprintf("data/rewrite-%d.parquet", i), size: 1024, partition: map[int]any{}},
|
||||
}, 1))
|
||||
}
|
||||
writeCurrentSnapshotManifests(t, fs, setup, meta, manifestEntries)
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
SnapshotRetentionHours: 24 * 365,
|
||||
MaxSnapshotsToKeep: 10,
|
||||
MinManifestsToRewrite: 5,
|
||||
Operations: "rewrite_manifests",
|
||||
}
|
||||
|
||||
tables, err := handler.scanTablesForMaintenance(context.Background(), client, config, "", "", "", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("scanTablesForMaintenance failed: %v", err)
|
||||
}
|
||||
if len(tables) != 1 {
|
||||
t.Fatalf("expected 1 manifest rewrite candidate, got %d", len(tables))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectDoesNotScheduleManifestRewriteFromDeleteManifestsOnly(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
now := time.Now().UnixMilli()
|
||||
setup := tableSetup{
|
||||
BucketName: "test-bucket",
|
||||
Namespace: "analytics",
|
||||
TableName: "events",
|
||||
Snapshots: []table.Snapshot{
|
||||
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro", SequenceNumber: 1},
|
||||
},
|
||||
}
|
||||
meta := populateTable(t, fs, setup)
|
||||
|
||||
currentSnap := meta.CurrentSnapshot()
|
||||
if currentSnap == nil {
|
||||
t.Fatal("current snapshot is required")
|
||||
}
|
||||
|
||||
metaDir := path.Join(s3tables.TablesPath, setup.BucketName, setup.tablePath(), "metadata")
|
||||
version := meta.Version()
|
||||
schema := meta.CurrentSchema()
|
||||
spec := meta.PartitionSpec()
|
||||
|
||||
dataEntries := makeManifestEntries(t, []testEntrySpec{
|
||||
{path: "data/rewrite-0.parquet", size: 1024, partition: map[int]any{}},
|
||||
}, currentSnap.SnapshotID)
|
||||
|
||||
var dataManifestBuf bytes.Buffer
|
||||
dataManifestName := "detect-rewrite-data.avro"
|
||||
dataManifest, err := iceberg.WriteManifest(
|
||||
path.Join("metadata", dataManifestName),
|
||||
&dataManifestBuf,
|
||||
version,
|
||||
spec,
|
||||
schema,
|
||||
currentSnap.SnapshotID,
|
||||
dataEntries,
|
||||
)
|
||||
if err != nil {
|
||||
t.Fatalf("write data manifest: %v", err)
|
||||
}
|
||||
fs.putEntry(metaDir, dataManifestName, &filer_pb.Entry{
|
||||
Name: dataManifestName,
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
Mtime: time.Now().Unix(),
|
||||
FileSize: uint64(dataManifestBuf.Len()),
|
||||
},
|
||||
Content: dataManifestBuf.Bytes(),
|
||||
})
|
||||
|
||||
manifests := []iceberg.ManifestFile{dataManifest}
|
||||
for i := 0; i < 4; i++ {
|
||||
deleteManifest := iceberg.NewManifestFile(
|
||||
version,
|
||||
path.Join("metadata", fmt.Sprintf("detect-delete-%d.avro", i)),
|
||||
0,
|
||||
int32(spec.ID()),
|
||||
currentSnap.SnapshotID,
|
||||
).Content(iceberg.ManifestContentDeletes).
|
||||
SequenceNum(currentSnap.SequenceNumber, currentSnap.SequenceNumber).
|
||||
DeletedFiles(1).
|
||||
DeletedRows(1).
|
||||
Build()
|
||||
manifests = append(manifests, deleteManifest)
|
||||
}
|
||||
|
||||
var manifestListBuf bytes.Buffer
|
||||
seqNum := currentSnap.SequenceNumber
|
||||
if err := iceberg.WriteManifestList(
|
||||
version,
|
||||
&manifestListBuf,
|
||||
currentSnap.SnapshotID,
|
||||
currentSnap.ParentSnapshotID,
|
||||
&seqNum,
|
||||
0,
|
||||
manifests,
|
||||
); err != nil {
|
||||
t.Fatalf("write manifest list: %v", err)
|
||||
}
|
||||
fs.putEntry(metaDir, path.Base(currentSnap.ManifestList), &filer_pb.Entry{
|
||||
Name: path.Base(currentSnap.ManifestList),
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
Mtime: time.Now().Unix(),
|
||||
FileSize: uint64(manifestListBuf.Len()),
|
||||
},
|
||||
Content: manifestListBuf.Bytes(),
|
||||
})
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
SnapshotRetentionHours: 24 * 365,
|
||||
MaxSnapshotsToKeep: 10,
|
||||
MinManifestsToRewrite: 2,
|
||||
Operations: "rewrite_manifests",
|
||||
}
|
||||
|
||||
tables, err := handler.scanTablesForMaintenance(context.Background(), client, config, "", "", "", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("scanTablesForMaintenance failed: %v", err)
|
||||
}
|
||||
if len(tables) != 0 {
|
||||
t.Fatalf("expected no manifest rewrite candidate when only one data manifest exists, got %d", len(tables))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectSchedulesOrphanCleanupWithoutSnapshotPressure(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
now := time.Now().UnixMilli()
|
||||
setup := tableSetup{
|
||||
BucketName: "test-bucket",
|
||||
Namespace: "analytics",
|
||||
TableName: "events",
|
||||
Snapshots: []table.Snapshot{
|
||||
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro", SequenceNumber: 1},
|
||||
},
|
||||
}
|
||||
populateTable(t, fs, setup)
|
||||
|
||||
dataDir := path.Join(s3tables.TablesPath, setup.BucketName, setup.tablePath(), "data")
|
||||
fs.putEntry(dataDir, "stale-orphan.parquet", &filer_pb.Entry{
|
||||
Name: "stale-orphan.parquet",
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
Mtime: time.Now().Add(-200 * time.Hour).Unix(),
|
||||
FileSize: 100,
|
||||
},
|
||||
Content: []byte("orphan"),
|
||||
})
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
SnapshotRetentionHours: 24 * 365,
|
||||
MaxSnapshotsToKeep: 10,
|
||||
OrphanOlderThanHours: 72,
|
||||
Operations: "remove_orphans",
|
||||
}
|
||||
|
||||
tables, err := handler.scanTablesForMaintenance(context.Background(), client, config, "", "", "", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("scanTablesForMaintenance failed: %v", err)
|
||||
}
|
||||
if len(tables) != 1 {
|
||||
t.Fatalf("expected 1 orphan cleanup candidate, got %d", len(tables))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectSchedulesOrphanCleanupWithoutSnapshots(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
setup := tableSetup{
|
||||
BucketName: "test-bucket",
|
||||
Namespace: "analytics",
|
||||
TableName: "events",
|
||||
}
|
||||
populateTable(t, fs, setup)
|
||||
|
||||
dataDir := path.Join(s3tables.TablesPath, setup.BucketName, setup.tablePath(), "data")
|
||||
fs.putEntry(dataDir, "stale-orphan.parquet", &filer_pb.Entry{
|
||||
Name: "stale-orphan.parquet",
|
||||
Attributes: &filer_pb.FuseAttributes{
|
||||
Mtime: time.Now().Add(-200 * time.Hour).Unix(),
|
||||
FileSize: 100,
|
||||
},
|
||||
Content: []byte("orphan"),
|
||||
})
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
OrphanOlderThanHours: 72,
|
||||
Operations: "remove_orphans",
|
||||
}
|
||||
|
||||
tables, err := handler.scanTablesForMaintenance(context.Background(), client, config, "", "", "", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("scanTablesForMaintenance failed: %v", err)
|
||||
}
|
||||
if len(tables) != 1 {
|
||||
t.Fatalf("expected 1 orphan cleanup candidate without snapshots, got %d", len(tables))
|
||||
}
|
||||
}
|
||||
|
||||
func TestStalePlanGuard(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user