iceberg: detect maintenance work per operation (#8639)

* iceberg: detect maintenance work per operation

* iceberg: ignore delete manifests during detection

* iceberg: clean up detection maintenance planning

* iceberg: tighten detection manifest heuristics

* Potential fix for code scanning alert no. 330: Incorrect conversion between integer types

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>

* iceberg: tolerate per-operation detection errors

* iceberg: fix fake metadata location versioning

* iceberg: check snapshot expiry before manifest loads

* iceberg: make expire-snapshots switch case explicit

---------

Co-authored-by: Copilot Autofix powered by AI <62310815+github-advanced-security[bot]@users.noreply.github.com>
This commit is contained in:
Chris Lu
2026-03-15 20:21:50 -07:00
committed by GitHub
parent a00eddb525
commit 6b2b442450
5 changed files with 807 additions and 78 deletions

View File

@@ -505,13 +505,19 @@ func TestBuildCompactionBinsMultiplePartitions(t *testing.T) {
partA := map[int]any{1: "us-east"}
partB := map[int]any{1: "eu-west"}
partitionSpec := iceberg.NewPartitionSpec(iceberg.PartitionField{
SourceID: 1,
FieldID: 1000,
Name: "region",
Transform: iceberg.IdentityTransform{},
})
entries := makeTestEntries(t, []testEntrySpec{
{path: "data/a1.parquet", size: 1024, partition: partA},
{path: "data/a2.parquet", size: 2048, partition: partA},
{path: "data/b1.parquet", size: 1024, partition: partB},
{path: "data/b2.parquet", size: 2048, partition: partB},
{path: "data/b3.parquet", size: 4096, partition: partB},
{path: "data/a1.parquet", size: 1024, partition: partA, partitionSpec: &partitionSpec},
{path: "data/a2.parquet", size: 2048, partition: partA, partitionSpec: &partitionSpec},
{path: "data/b1.parquet", size: 1024, partition: partB, partitionSpec: &partitionSpec},
{path: "data/b2.parquet", size: 2048, partition: partB, partitionSpec: &partitionSpec},
{path: "data/b3.parquet", size: 4096, partition: partB, partitionSpec: &partitionSpec},
})
bins := buildCompactionBins(entries, targetSize, minFiles)
@@ -574,40 +580,65 @@ func TestSplitOversizedBinDropsImpossibleRunts(t *testing.T) {
}
type testEntrySpec struct {
path string
size int64
partition map[int]any
specID int32 // partition spec ID; 0 uses UnpartitionedSpec
path string
size int64
partition map[int]any
partitionSpec *iceberg.PartitionSpec
specID int32 // partition spec ID; 0 uses UnpartitionedSpec
}
// makeTestEntries creates manifest entries using UnpartitionedSpec (spec ID 0).
// The specID field in testEntrySpec is ignored here; for multi-spec testing,
// use makeTestEntriesWithSpec instead.
func makeTestEntries(t *testing.T, specs []testEntrySpec) []iceberg.ManifestEntry {
func buildTestDataFile(t *testing.T, spec testEntrySpec) iceberg.DataFile {
t.Helper()
partitionSpec := iceberg.UnpartitionedSpec
if spec.partitionSpec != nil {
partitionSpec = spec.partitionSpec
} else if len(spec.partition) > 0 {
t.Fatalf("partition spec is required for partitioned test entry %s", spec.path)
}
dfBuilder, err := iceberg.NewDataFileBuilder(
*partitionSpec,
iceberg.EntryContentData,
spec.path,
iceberg.ParquetFile,
spec.partition,
nil, nil,
1, // recordCount (must be > 0)
spec.size,
)
if err != nil {
t.Fatalf("failed to build data file %s: %v", spec.path, err)
}
return dfBuilder.Build()
}
func makeManifestEntriesWithSnapshot(
t *testing.T,
specs []testEntrySpec,
snapshotID int64,
status iceberg.ManifestEntryStatus,
) []iceberg.ManifestEntry {
t.Helper()
entries := make([]iceberg.ManifestEntry, 0, len(specs))
for _, spec := range specs {
partSpec := *iceberg.UnpartitionedSpec
dfBuilder, err := iceberg.NewDataFileBuilder(
partSpec,
iceberg.EntryContentData,
spec.path,
iceberg.ParquetFile,
spec.partition,
entries = append(entries, iceberg.NewManifestEntry(
status,
&snapshotID,
nil, nil,
1, // recordCount (must be > 0)
spec.size,
)
if err != nil {
t.Fatalf("failed to build data file %s: %v", spec.path, err)
}
snapID := int64(1)
entry := iceberg.NewManifestEntry(iceberg.EntryStatusADDED, &snapID, nil, nil, dfBuilder.Build())
entries = append(entries, entry)
buildTestDataFile(t, spec),
))
}
return entries
}
// makeTestEntries creates manifest entries using the default unpartitioned
// spec. For multi-spec testing, use makeTestEntriesWithSpec instead.
func makeTestEntries(t *testing.T, specs []testEntrySpec) []iceberg.ManifestEntry {
t.Helper()
return makeManifestEntriesWithSnapshot(t, specs, 1, iceberg.EntryStatusADDED)
}
// makeTestEntriesWithSpec creates manifest entries using specific partition specs.
// Each spec in the specs slice can specify a specID; the entry is built using
// a PartitionSpec with that ID.