iceberg: add sort-aware compaction rewrite (#8666)

* iceberg: add sort-aware compaction rewrite

* iceberg: share filtered row iteration in compaction

* iceberg: rely on table sort order for sort rewrites

* iceberg: harden sort compaction planning

* iceberg: include rewrite strategy in planning config hash

compactionPlanningConfigHash now incorporates RewriteStrategy and
SortMaxInputBytes so cached planning results are invalidated when
sort strategy settings change. Also use the bytesPerMB constant in
compactionNoEligibleMessage.
This commit is contained in:
Chris Lu
2026-03-17 00:57:32 -07:00
committed by GitHub
parent e5c0889473
commit 55e988a7ee
8 changed files with 950 additions and 135 deletions

View File

@@ -34,6 +34,12 @@ func TestParseConfig(t *testing.T) {
if config.Operations != defaultOperations {
t.Errorf("expected Operations=%q, got %q", defaultOperations, config.Operations)
}
if config.RewriteStrategy != defaultRewriteStrategy {
t.Errorf("expected RewriteStrategy=%q, got %q", defaultRewriteStrategy, config.RewriteStrategy)
}
if config.SortMaxInputBytes != 0 {
t.Errorf("expected SortMaxInputBytes=0, got %d", config.SortMaxInputBytes)
}
}
func TestParseOperations(t *testing.T) {
@@ -879,6 +885,38 @@ func TestNormalizeDetectionConfigUsesSharedDefaults(t *testing.T) {
}
}
func TestParseConfigRewriteStrategy(t *testing.T) {
config := ParseConfig(map[string]*plugin_pb.ConfigValue{
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "sort"}},
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 64}},
})
if config.RewriteStrategy != "sort" {
t.Fatalf("expected sort rewrite strategy, got %q", config.RewriteStrategy)
}
if config.SortMaxInputBytes != 64*1024*1024 {
t.Fatalf("expected SortMaxInputBytes=64MB, got %d", config.SortMaxInputBytes)
}
config = ParseConfig(map[string]*plugin_pb.ConfigValue{
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "invalid"}},
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: -1}},
})
if config.RewriteStrategy != defaultRewriteStrategy {
t.Fatalf("expected invalid rewrite strategy to fall back to %q, got %q", defaultRewriteStrategy, config.RewriteStrategy)
}
if config.SortMaxInputBytes != 0 {
t.Fatalf("expected negative sort cap to clamp to 0, got %d", config.SortMaxInputBytes)
}
maxMB := int64(^uint64(0)>>1) / bytesPerMB
config = ParseConfig(map[string]*plugin_pb.ConfigValue{
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: fmt.Sprintf("%d", maxMB+1)}},
})
if config.SortMaxInputBytes != maxMB*bytesPerMB {
t.Fatalf("expected oversized sort cap to clamp to %d bytes, got %d", maxMB*bytesPerMB, config.SortMaxInputBytes)
}
}
func TestCollectPositionDeletes(t *testing.T) {
fs, client := startFakeFiler(t)