Change iceberg compaction target file size config from bytes to MB (#8636)

Change iceberg target_file_size config from bytes to MB

Rename the config field from target_file_size_bytes to
target_file_size_mb with a default of 256 (MB). The value is
converted to bytes internally. This makes the config more
user-friendly — entering 256 is clearer than 268435456.

Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
Chris Lu
2026-03-15 11:42:06 -07:00
committed by GitHub
parent 8cde3d4486
commit d9d6707401
2 changed files with 9 additions and 9 deletions

View File

@@ -16,7 +16,7 @@ const (
defaultMaxSnapshotsToKeep = 5 defaultMaxSnapshotsToKeep = 5
defaultOrphanOlderThanHours = 72 defaultOrphanOlderThanHours = 72
defaultMaxCommitRetries = 5 defaultMaxCommitRetries = 5
defaultTargetFileSizeBytes = 256 * 1024 * 1024 defaultTargetFileSizeMB = 256
defaultMinInputFiles = 5 defaultMinInputFiles = 5
defaultMinManifestsToRewrite = 5 defaultMinManifestsToRewrite = 5
defaultOperations = "all" defaultOperations = "all"
@@ -42,7 +42,7 @@ func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config {
MaxSnapshotsToKeep: readInt64Config(values, "max_snapshots_to_keep", defaultMaxSnapshotsToKeep), MaxSnapshotsToKeep: readInt64Config(values, "max_snapshots_to_keep", defaultMaxSnapshotsToKeep),
OrphanOlderThanHours: readInt64Config(values, "orphan_older_than_hours", defaultOrphanOlderThanHours), OrphanOlderThanHours: readInt64Config(values, "orphan_older_than_hours", defaultOrphanOlderThanHours),
MaxCommitRetries: readInt64Config(values, "max_commit_retries", defaultMaxCommitRetries), MaxCommitRetries: readInt64Config(values, "max_commit_retries", defaultMaxCommitRetries),
TargetFileSizeBytes: readInt64Config(values, "target_file_size_bytes", defaultTargetFileSizeBytes), TargetFileSizeBytes: readInt64Config(values, "target_file_size_mb", defaultTargetFileSizeMB) * 1024 * 1024,
MinInputFiles: readInt64Config(values, "min_input_files", defaultMinInputFiles), MinInputFiles: readInt64Config(values, "min_input_files", defaultMinInputFiles),
MinManifestsToRewrite: readInt64Config(values, "min_manifests_to_rewrite", defaultMinManifestsToRewrite), MinManifestsToRewrite: readInt64Config(values, "min_manifests_to_rewrite", defaultMinManifestsToRewrite),
Operations: readStringConfig(values, "operations", defaultOperations), Operations: readStringConfig(values, "operations", defaultOperations),
@@ -62,7 +62,7 @@ func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config {
cfg.MaxCommitRetries = defaultMaxCommitRetries cfg.MaxCommitRetries = defaultMaxCommitRetries
} }
if cfg.TargetFileSizeBytes <= 0 { if cfg.TargetFileSizeBytes <= 0 {
cfg.TargetFileSizeBytes = defaultTargetFileSizeBytes cfg.TargetFileSizeBytes = defaultTargetFileSizeMB * 1024 * 1024
} }
if cfg.MinInputFiles < 2 { if cfg.MinInputFiles < 2 {
cfg.MinInputFiles = defaultMinInputFiles cfg.MinInputFiles = defaultMinInputFiles

View File

@@ -133,12 +133,12 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
Description: "Controls for bin-packing small Parquet data files.", Description: "Controls for bin-packing small Parquet data files.",
Fields: []*plugin_pb.ConfigField{ Fields: []*plugin_pb.ConfigField{
{ {
Name: "target_file_size_bytes", Name: "target_file_size_mb",
Label: "Target File Size (bytes)", Label: "Target File Size (MB)",
Description: "Files smaller than this are candidates for compaction.", Description: "Files smaller than this (in megabytes) are candidates for compaction.",
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_INT64, FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_INT64,
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_NUMBER, Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_NUMBER,
MinValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 1024 * 1024}}, MinValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 1}},
}, },
{ {
Name: "min_input_files", Name: "min_input_files",
@@ -205,7 +205,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
}, },
}, },
DefaultValues: map[string]*plugin_pb.ConfigValue{ DefaultValues: map[string]*plugin_pb.ConfigValue{
"target_file_size_bytes": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeBytes}}, "target_file_size_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeMB}},
"min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}}, "min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}},
"min_manifests_to_rewrite": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinManifestsToRewrite}}, "min_manifests_to_rewrite": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinManifestsToRewrite}},
"snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}}, "snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}},
@@ -227,7 +227,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
JobTypeMaxRuntimeSeconds: 3600, // 1 hour max JobTypeMaxRuntimeSeconds: 3600, // 1 hour max
}, },
WorkerDefaultValues: map[string]*plugin_pb.ConfigValue{ WorkerDefaultValues: map[string]*plugin_pb.ConfigValue{
"target_file_size_bytes": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeBytes}}, "target_file_size_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeMB}},
"min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}}, "min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}},
"snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}}, "snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}},
"max_snapshots_to_keep": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMaxSnapshotsToKeep}}, "max_snapshots_to_keep": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMaxSnapshotsToKeep}},