Change iceberg compaction target file size config from bytes to MB (#8636)
Change iceberg target_file_size config from bytes to MB Rename the config field from target_file_size_bytes to target_file_size_mb with a default of 256 (MB). The value is converted to bytes internally. This makes the config more user-friendly — entering 256 is clearer than 268435456. Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
@@ -16,7 +16,7 @@ const (
|
|||||||
defaultMaxSnapshotsToKeep = 5
|
defaultMaxSnapshotsToKeep = 5
|
||||||
defaultOrphanOlderThanHours = 72
|
defaultOrphanOlderThanHours = 72
|
||||||
defaultMaxCommitRetries = 5
|
defaultMaxCommitRetries = 5
|
||||||
defaultTargetFileSizeBytes = 256 * 1024 * 1024
|
defaultTargetFileSizeMB = 256
|
||||||
defaultMinInputFiles = 5
|
defaultMinInputFiles = 5
|
||||||
defaultMinManifestsToRewrite = 5
|
defaultMinManifestsToRewrite = 5
|
||||||
defaultOperations = "all"
|
defaultOperations = "all"
|
||||||
@@ -42,7 +42,7 @@ func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config {
|
|||||||
MaxSnapshotsToKeep: readInt64Config(values, "max_snapshots_to_keep", defaultMaxSnapshotsToKeep),
|
MaxSnapshotsToKeep: readInt64Config(values, "max_snapshots_to_keep", defaultMaxSnapshotsToKeep),
|
||||||
OrphanOlderThanHours: readInt64Config(values, "orphan_older_than_hours", defaultOrphanOlderThanHours),
|
OrphanOlderThanHours: readInt64Config(values, "orphan_older_than_hours", defaultOrphanOlderThanHours),
|
||||||
MaxCommitRetries: readInt64Config(values, "max_commit_retries", defaultMaxCommitRetries),
|
MaxCommitRetries: readInt64Config(values, "max_commit_retries", defaultMaxCommitRetries),
|
||||||
TargetFileSizeBytes: readInt64Config(values, "target_file_size_bytes", defaultTargetFileSizeBytes),
|
TargetFileSizeBytes: readInt64Config(values, "target_file_size_mb", defaultTargetFileSizeMB) * 1024 * 1024,
|
||||||
MinInputFiles: readInt64Config(values, "min_input_files", defaultMinInputFiles),
|
MinInputFiles: readInt64Config(values, "min_input_files", defaultMinInputFiles),
|
||||||
MinManifestsToRewrite: readInt64Config(values, "min_manifests_to_rewrite", defaultMinManifestsToRewrite),
|
MinManifestsToRewrite: readInt64Config(values, "min_manifests_to_rewrite", defaultMinManifestsToRewrite),
|
||||||
Operations: readStringConfig(values, "operations", defaultOperations),
|
Operations: readStringConfig(values, "operations", defaultOperations),
|
||||||
@@ -62,7 +62,7 @@ func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config {
|
|||||||
cfg.MaxCommitRetries = defaultMaxCommitRetries
|
cfg.MaxCommitRetries = defaultMaxCommitRetries
|
||||||
}
|
}
|
||||||
if cfg.TargetFileSizeBytes <= 0 {
|
if cfg.TargetFileSizeBytes <= 0 {
|
||||||
cfg.TargetFileSizeBytes = defaultTargetFileSizeBytes
|
cfg.TargetFileSizeBytes = defaultTargetFileSizeMB * 1024 * 1024
|
||||||
}
|
}
|
||||||
if cfg.MinInputFiles < 2 {
|
if cfg.MinInputFiles < 2 {
|
||||||
cfg.MinInputFiles = defaultMinInputFiles
|
cfg.MinInputFiles = defaultMinInputFiles
|
||||||
|
|||||||
@@ -133,12 +133,12 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
|||||||
Description: "Controls for bin-packing small Parquet data files.",
|
Description: "Controls for bin-packing small Parquet data files.",
|
||||||
Fields: []*plugin_pb.ConfigField{
|
Fields: []*plugin_pb.ConfigField{
|
||||||
{
|
{
|
||||||
Name: "target_file_size_bytes",
|
Name: "target_file_size_mb",
|
||||||
Label: "Target File Size (bytes)",
|
Label: "Target File Size (MB)",
|
||||||
Description: "Files smaller than this are candidates for compaction.",
|
Description: "Files smaller than this (in megabytes) are candidates for compaction.",
|
||||||
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_INT64,
|
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_INT64,
|
||||||
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_NUMBER,
|
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_NUMBER,
|
||||||
MinValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 1024 * 1024}},
|
MinValue: &plugin_pb.ConfigValue{Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 1}},
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
Name: "min_input_files",
|
Name: "min_input_files",
|
||||||
@@ -205,7 +205,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
|||||||
},
|
},
|
||||||
},
|
},
|
||||||
DefaultValues: map[string]*plugin_pb.ConfigValue{
|
DefaultValues: map[string]*plugin_pb.ConfigValue{
|
||||||
"target_file_size_bytes": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeBytes}},
|
"target_file_size_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeMB}},
|
||||||
"min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}},
|
"min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}},
|
||||||
"min_manifests_to_rewrite": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinManifestsToRewrite}},
|
"min_manifests_to_rewrite": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinManifestsToRewrite}},
|
||||||
"snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}},
|
"snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}},
|
||||||
@@ -227,7 +227,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
|||||||
JobTypeMaxRuntimeSeconds: 3600, // 1 hour max
|
JobTypeMaxRuntimeSeconds: 3600, // 1 hour max
|
||||||
},
|
},
|
||||||
WorkerDefaultValues: map[string]*plugin_pb.ConfigValue{
|
WorkerDefaultValues: map[string]*plugin_pb.ConfigValue{
|
||||||
"target_file_size_bytes": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeBytes}},
|
"target_file_size_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultTargetFileSizeMB}},
|
||||||
"min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}},
|
"min_input_files": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMinInputFiles}},
|
||||||
"snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}},
|
"snapshot_retention_hours": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultSnapshotRetentionHours}},
|
||||||
"max_snapshots_to_keep": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMaxSnapshotsToKeep}},
|
"max_snapshots_to_keep": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMaxSnapshotsToKeep}},
|
||||||
|
|||||||
Reference in New Issue
Block a user