iceberg: add sort-aware compaction rewrite (#8666)
* iceberg: add sort-aware compaction rewrite * iceberg: share filtered row iteration in compaction * iceberg: rely on table sort order for sort rewrites * iceberg: harden sort compaction planning * iceberg: include rewrite strategy in planning config hash compactionPlanningConfigHash now incorporates RewriteStrategy and SortMaxInputBytes so cached planning results are invalidated when sort strategy settings change. Also use the bytesPerMB constant in compactionNoEligibleMessage.
This commit is contained in:
@@ -187,19 +187,11 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
||||
{
|
||||
Name: "rewrite_strategy",
|
||||
Label: "Rewrite Strategy",
|
||||
Description: "binpack keeps the current row order; sort rewrites each compaction bin using sort_fields or the table sort order.",
|
||||
Description: "binpack keeps the existing row order; sort rewrites each compaction bin using the Iceberg table sort order.",
|
||||
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
|
||||
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
|
||||
Placeholder: "binpack or sort",
|
||||
},
|
||||
{
|
||||
Name: "sort_fields",
|
||||
Label: "Sort Fields",
|
||||
Description: "Comma-separated field names for rewrite_strategy=sort. Blank uses the table sort order when present.",
|
||||
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
|
||||
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
|
||||
Placeholder: "id, created_at",
|
||||
},
|
||||
{
|
||||
Name: "sort_max_input_mb",
|
||||
Label: "Sort Max Input (MB)",
|
||||
@@ -325,8 +317,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
||||
"max_commit_retries": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMaxCommitRetries}},
|
||||
"operations": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: defaultOperations}},
|
||||
"apply_deletes": {Kind: &plugin_pb.ConfigValue_BoolValue{BoolValue: true}},
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "binpack"}},
|
||||
"sort_fields": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: defaultRewriteStrategy}},
|
||||
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 0}},
|
||||
"where": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
|
||||
},
|
||||
@@ -355,8 +346,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
||||
"max_commit_retries": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMaxCommitRetries}},
|
||||
"operations": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: defaultOperations}},
|
||||
"apply_deletes": {Kind: &plugin_pb.ConfigValue_BoolValue{BoolValue: true}},
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "binpack"}},
|
||||
"sort_fields": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: defaultRewriteStrategy}},
|
||||
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 0}},
|
||||
"where": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
|
||||
},
|
||||
|
||||
Reference in New Issue
Block a user