From d9d67074013c07f01a7ee94772a1b3d5c609a833 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 15 Mar 2026 11:42:06 -0700 Subject: [PATCH] Change iceberg compaction target file size config from bytes to MB (#8636) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change iceberg target_file_size config from bytes to MB Rename the config field from target_file_size_bytes to target_file_size_mb with a default of 256 (MB). The value is converted to bytes internally. This makes the config more user-friendly — entering 256 is clearer than 268435456. Co-authored-by: Copilot --- weed/plugin/worker/iceberg/config.go | 6 +++--- weed/plugin/worker/iceberg/handler.go | 12 ++++++------ 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/weed/plugin/worker/iceberg/config.go b/weed/plugin/worker/iceberg/config.go index f5a6557a1..bf9c1b06e 100644 --- a/weed/plugin/worker/iceberg/config.go +++ b/weed/plugin/worker/iceberg/config.go @@ -16,7 +16,7 @@ const ( defaultMaxSnapshotsToKeep = 5 defaultOrphanOlderThanHours = 72 defaultMaxCommitRetries = 5 - defaultTargetFileSizeBytes = 256 * 1024 * 1024 + defaultTargetFileSizeMB = 256 defaultMinInputFiles = 5 defaultMinManifestsToRewrite = 5 defaultOperations = "all" @@ -42,7 +42,7 @@ func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config { MaxSnapshotsToKeep: readInt64Config(values, "max_snapshots_to_keep", defaultMaxSnapshotsToKeep), OrphanOlderThanHours: readInt64Config(values, "orphan_older_than_hours", defaultOrphanOlderThanHours), MaxCommitRetries: readInt64Config(values, "max_commit_retries", defaultMaxCommitRetries), - TargetFileSizeBytes: readInt64Config(values, "target_file_size_bytes", defaultTargetFileSizeBytes), + TargetFileSizeBytes: readInt64Config(values, "target_file_size_mb", defaultTargetFileSizeMB) * 1024 * 1024, MinInputFiles: readInt64Config(values, "min_input_files", defaultMinInputFiles), MinManifestsToRewrite: readInt64Config(values, "min_manifests_to_rewrite", defaultMinManifestsToRewrite), Operations: readStringConfig(values, "operations", defaultOperations), @@ -62,7 +62,7 @@ func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config { cfg.MaxCommitRetries = defaultMaxCommitRetries } if cfg.TargetFileSizeBytes <= 0 { - cfg.TargetFileSizeBytes = defaultTargetFileSizeBytes + cfg.TargetFileSizeBytes = defaultTargetFileSizeMB * 1024 * 1024 } if cfg.MinInputFiles < 2 { cfg.MinInputFiles = defaultMinInputFiles diff --git a/weed/plugin/worker/iceberg/handler.go b/weed/plugin/worker/iceberg/handler.go index e128a7575..b43354892 100644 --- a/weed/plugin/worker/iceberg/handler.go +++ b/weed/plugin/worker/iceberg/handler.go @@ -133,12 +133,12 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor { Description: "Controls for bin-packing small Parquet data files.", Fields: []*plugin_pb.ConfigField{ { - Name: "target_file_size_bytes", - Label: "Target File Size (bytes)", - Description: "Files smaller than this are candidates for compaction.", + Name: "target_file_size_mb", + Label: "Target File Size (MB)", + Description: "Files smaller than this (in megabytes) are candidates for compaction.", FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_INT64, Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_NUMBER, - MinValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 1024 * 1024}}, + MinValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 1}}, }, { Name: "min_input_files", @@ -205,7 +205,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor { }, }, DefaultValues: map[string]*plugin_pb.ConfigValue{ - "target_file_size_bytes": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeBytes}}, + "target_file_size_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeMB}}, "min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}}, "min_manifests_to_rewrite": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinManifestsToRewrite}}, "snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}}, @@ -227,7 +227,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor { JobTypeMaxRuntimeSeconds: 3600, // 1 hour max }, WorkerDefaultValues: map[string]*plugin_pb.ConfigValue{ - "target_file_size_bytes": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeBytes}}, + "target_file_size_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeMB}}, "min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}}, "snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}}, "max_snapshots_to_keep": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMaxSnapshotsToKeep}},