* Add multi-partition-spec compaction and delete-aware compaction (Phase 3) Multi-partition-spec compaction: - Add SpecID to compactionBin struct and group by spec+partition key - Remove the len(specIDs) > 1 skip that blocked spec-evolved tables - Write per-spec manifests in compaction commit using specByID map - Use per-bin PartitionSpec when calling NewDataFileBuilder Delete-aware compaction: - Add ApplyDeletes config (default: true) with readBoolConfig helper - Implement position delete collection (file_path + pos Parquet columns) - Implement equality delete collection (field ID to column mapping) - Update mergeParquetFiles to filter rows via position deletes (binary search) and equality deletes (hash set lookup) - Smart delete manifest carry-forward: drop when all data files compacted - Fix EXISTING/DELETED entries to include sequence numbers Tests for multi-spec bins, delete collection, merge filtering, and end-to-end compaction with position/equality/mixed deletes. * Add structured metrics and per-bin progress to iceberg maintenance - Change return type of all four operations from (string, error) to (string, map[string]int64, error) with structured metric counts (files_merged, snapshots_expired, orphans_removed, duration_ms, etc.) - Add onProgress callback to compactDataFiles for per-bin progress - In Execute, pass progress callback that sends JobProgressUpdate with per-bin stage messages - Accumulate per-operation metrics with dot-prefixed keys (e.g. compact.files_merged) into OutputValues on completion - Update testing_api.go wrappers and integration test call sites - Add tests: TestCompactDataFilesMetrics, TestExpireSnapshotsMetrics, TestExecuteCompletionOutputValues * Address review feedback: group equality deletes by field IDs, use metric constants - Group equality deletes by distinct equality_ids sets so different delete files with different equality columns are handled correctly - Use length-prefixed type-aware encoding in buildEqualityKey to avoid ambiguity between types and collisions from null bytes - Extract metric key strings into package-level constants * Fix buildEqualityKey to use length-prefixed type-aware encoding The previous implementation used plain String() concatenation with null byte separators, which caused type ambiguity (int 123 vs string "123") and separator collisions when values contain null bytes. Now each value is serialized as "kind:length:value" for unambiguous composite keys. This fix was missed in the prior cherry-pick due to a merge conflict. * Address nitpick review comments - Document patchManifestContentToDeletes workaround: explain that iceberg-go WriteManifest cannot create delete manifests, and note the fail-fast validation on pattern match - Document makeTestEntries: note that specID field is ignored and callers should use makeTestEntriesWithSpec for multi-spec testing * fmt * Fix path normalization, manifest threshold, and artifact filename collisions - Normalize file paths in position delete collection and lookup so that absolute S3 URLs and relative paths match correctly - Fix rewriteManifests threshold check to count only data manifests (was including delete manifests in the count and metric) - Add random suffix to artifact filenames in compactDataFiles and rewriteManifests to prevent collisions between concurrent runs - Sort compaction bins by SpecID then PartitionKey for deterministic ordering across specs * Fix pos delete read, deduplicate column resolution, minor cleanups - Remove broken Column() guard in position delete reading that silently defaulted pos to 0; unconditionally extract Int64() instead - Deduplicate column resolution in readEqualityDeleteFile by calling resolveEqualityColIndices instead of inlining the same logic - Add warning log in readBoolConfig for unrecognized string values - Fix CompactDataFiles call site in integration test to capture 3 return values * Advance progress on all bins, deterministic manifest order, assert metrics - Call onProgress for every bin iteration including skipped/failed bins so progress reporting never appears stalled - Sort spec IDs before iterating specEntriesMap to produce deterministic manifest list ordering across runs - Assert expected metric keys in CompactDataFiles integration test --------- Co-authored-by: Copilot <copilot@github.com>
227 lines
7.4 KiB
Go
227 lines
7.4 KiB
Go
package iceberg
|
|
|
|
import (
|
|
"fmt"
|
|
"strconv"
|
|
"strings"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/plugin_pb"
|
|
)
|
|
|
|
const (
|
|
jobType = "iceberg_maintenance"
|
|
|
|
defaultSnapshotRetentionHours = 168 // 7 days
|
|
defaultMaxSnapshotsToKeep = 5
|
|
defaultOrphanOlderThanHours = 72
|
|
defaultMaxCommitRetries = 5
|
|
defaultTargetFileSizeMB = 256
|
|
defaultMinInputFiles = 5
|
|
defaultMinManifestsToRewrite = 5
|
|
defaultOperations = "all"
|
|
|
|
// Metric keys returned by maintenance operations.
|
|
MetricFilesMerged = "files_merged"
|
|
MetricFilesWritten = "files_written"
|
|
MetricBins = "bins"
|
|
MetricSnapshotsExpired = "snapshots_expired"
|
|
MetricFilesDeleted = "files_deleted"
|
|
MetricOrphansRemoved = "orphans_removed"
|
|
MetricManifestsRewritten = "manifests_rewritten"
|
|
MetricEntriesTotal = "entries_total"
|
|
MetricDurationMs = "duration_ms"
|
|
)
|
|
|
|
// Config holds parsed worker config values.
|
|
type Config struct {
|
|
SnapshotRetentionHours int64
|
|
MaxSnapshotsToKeep int64
|
|
OrphanOlderThanHours int64
|
|
MaxCommitRetries int64
|
|
TargetFileSizeBytes int64
|
|
MinInputFiles int64
|
|
MinManifestsToRewrite int64
|
|
Operations string
|
|
ApplyDeletes bool
|
|
}
|
|
|
|
// ParseConfig extracts an iceberg maintenance Config from plugin config values.
|
|
// Values are clamped to safe minimums to prevent misconfiguration.
|
|
func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config {
|
|
cfg := Config{
|
|
SnapshotRetentionHours: readInt64Config(values, "snapshot_retention_hours", defaultSnapshotRetentionHours),
|
|
MaxSnapshotsToKeep: readInt64Config(values, "max_snapshots_to_keep", defaultMaxSnapshotsToKeep),
|
|
OrphanOlderThanHours: readInt64Config(values, "orphan_older_than_hours", defaultOrphanOlderThanHours),
|
|
MaxCommitRetries: readInt64Config(values, "max_commit_retries", defaultMaxCommitRetries),
|
|
TargetFileSizeBytes: readInt64Config(values, "target_file_size_mb", defaultTargetFileSizeMB) * 1024 * 1024,
|
|
MinInputFiles: readInt64Config(values, "min_input_files", defaultMinInputFiles),
|
|
MinManifestsToRewrite: readInt64Config(values, "min_manifests_to_rewrite", defaultMinManifestsToRewrite),
|
|
Operations: readStringConfig(values, "operations", defaultOperations),
|
|
ApplyDeletes: readBoolConfig(values, "apply_deletes", true),
|
|
}
|
|
|
|
// Clamp to safe minimums using the default constants
|
|
if cfg.SnapshotRetentionHours <= 0 {
|
|
cfg.SnapshotRetentionHours = defaultSnapshotRetentionHours
|
|
}
|
|
if cfg.MaxSnapshotsToKeep <= 0 {
|
|
cfg.MaxSnapshotsToKeep = defaultMaxSnapshotsToKeep
|
|
}
|
|
if cfg.OrphanOlderThanHours <= 0 {
|
|
cfg.OrphanOlderThanHours = defaultOrphanOlderThanHours
|
|
}
|
|
if cfg.MaxCommitRetries <= 0 {
|
|
cfg.MaxCommitRetries = defaultMaxCommitRetries
|
|
}
|
|
if cfg.TargetFileSizeBytes <= 0 {
|
|
cfg.TargetFileSizeBytes = defaultTargetFileSizeMB * 1024 * 1024
|
|
}
|
|
if cfg.MinInputFiles < 2 {
|
|
cfg.MinInputFiles = defaultMinInputFiles
|
|
}
|
|
if cfg.MinManifestsToRewrite < 2 {
|
|
cfg.MinManifestsToRewrite = 2
|
|
}
|
|
|
|
return cfg
|
|
}
|
|
|
|
// parseOperations returns the ordered list of maintenance operations to execute.
|
|
// Order follows Iceberg best practices: compact → expire_snapshots → remove_orphans → rewrite_manifests.
|
|
// Returns an error if any unknown operation is specified or the result would be empty.
|
|
func parseOperations(ops string) ([]string, error) {
|
|
ops = strings.TrimSpace(strings.ToLower(ops))
|
|
if ops == "" || ops == "all" {
|
|
return []string{"compact", "expire_snapshots", "remove_orphans", "rewrite_manifests"}, nil
|
|
}
|
|
|
|
validOps := map[string]struct{}{
|
|
"compact": {},
|
|
"expire_snapshots": {},
|
|
"remove_orphans": {},
|
|
"rewrite_manifests": {},
|
|
}
|
|
|
|
requested := make(map[string]struct{})
|
|
for _, op := range strings.Split(ops, ",") {
|
|
op = strings.TrimSpace(op)
|
|
if op == "" {
|
|
continue
|
|
}
|
|
if _, ok := validOps[op]; !ok {
|
|
return nil, fmt.Errorf("unknown maintenance operation %q (valid: compact, expire_snapshots, remove_orphans, rewrite_manifests)", op)
|
|
}
|
|
requested[op] = struct{}{}
|
|
}
|
|
|
|
// Return in canonical order: compact → expire_snapshots → remove_orphans → rewrite_manifests
|
|
canonicalOrder := []string{"compact", "expire_snapshots", "remove_orphans", "rewrite_manifests"}
|
|
var result []string
|
|
for _, op := range canonicalOrder {
|
|
if _, ok := requested[op]; ok {
|
|
result = append(result, op)
|
|
}
|
|
}
|
|
|
|
if len(result) == 0 {
|
|
return nil, fmt.Errorf("no valid maintenance operations specified")
|
|
}
|
|
return result, nil
|
|
}
|
|
|
|
func extractMetadataVersion(metadataFileName string) int {
|
|
// Parse "v3.metadata.json" or "v3-{nonce}.metadata.json" → 3
|
|
name := strings.TrimPrefix(metadataFileName, "v")
|
|
name = strings.TrimSuffix(name, ".metadata.json")
|
|
// Strip any nonce suffix (e.g. "3-1709766000" → "3")
|
|
if dashIdx := strings.Index(name, "-"); dashIdx > 0 {
|
|
name = name[:dashIdx]
|
|
}
|
|
version, _ := strconv.Atoi(name)
|
|
return version
|
|
}
|
|
|
|
// readStringConfig reads a string value from plugin config, with fallback.
|
|
func readStringConfig(values map[string]*plugin_pb.ConfigValue, field string, fallback string) string {
|
|
if values == nil {
|
|
return fallback
|
|
}
|
|
value := values[field]
|
|
if value == nil {
|
|
return fallback
|
|
}
|
|
switch kind := value.Kind.(type) {
|
|
case *plugin_pb.ConfigValue_StringValue:
|
|
return kind.StringValue
|
|
case *plugin_pb.ConfigValue_Int64Value:
|
|
return strconv.FormatInt(kind.Int64Value, 10)
|
|
case *plugin_pb.ConfigValue_DoubleValue:
|
|
return strconv.FormatFloat(kind.DoubleValue, 'f', -1, 64)
|
|
case *plugin_pb.ConfigValue_BoolValue:
|
|
return strconv.FormatBool(kind.BoolValue)
|
|
default:
|
|
glog.V(1).Infof("readStringConfig: unexpected config value type %T for field %q, using fallback", value.Kind, field)
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
// readBoolConfig reads a bool value from plugin config, with fallback.
|
|
func readBoolConfig(values map[string]*plugin_pb.ConfigValue, field string, fallback bool) bool {
|
|
if values == nil {
|
|
return fallback
|
|
}
|
|
value := values[field]
|
|
if value == nil {
|
|
return fallback
|
|
}
|
|
switch kind := value.Kind.(type) {
|
|
case *plugin_pb.ConfigValue_BoolValue:
|
|
return kind.BoolValue
|
|
case *plugin_pb.ConfigValue_StringValue:
|
|
s := strings.TrimSpace(strings.ToLower(kind.StringValue))
|
|
if s == "true" || s == "1" || s == "yes" {
|
|
return true
|
|
}
|
|
if s == "false" || s == "0" || s == "no" {
|
|
return false
|
|
}
|
|
glog.V(1).Infof("readBoolConfig: unrecognized string value %q for field %q, using fallback %v", kind.StringValue, field, fallback)
|
|
case *plugin_pb.ConfigValue_Int64Value:
|
|
return kind.Int64Value != 0
|
|
default:
|
|
glog.V(1).Infof("readBoolConfig: unexpected config value type %T for field %q, using fallback", value.Kind, field)
|
|
}
|
|
return fallback
|
|
}
|
|
|
|
// readInt64Config reads an int64 value from plugin config, with fallback.
|
|
func readInt64Config(values map[string]*plugin_pb.ConfigValue, field string, fallback int64) int64 {
|
|
if values == nil {
|
|
return fallback
|
|
}
|
|
value := values[field]
|
|
if value == nil {
|
|
return fallback
|
|
}
|
|
switch kind := value.Kind.(type) {
|
|
case *plugin_pb.ConfigValue_Int64Value:
|
|
return kind.Int64Value
|
|
case *plugin_pb.ConfigValue_DoubleValue:
|
|
return int64(kind.DoubleValue)
|
|
case *plugin_pb.ConfigValue_StringValue:
|
|
parsed, err := strconv.ParseInt(strings.TrimSpace(kind.StringValue), 10, 64)
|
|
if err == nil {
|
|
return parsed
|
|
}
|
|
case *plugin_pb.ConfigValue_BoolValue:
|
|
if kind.BoolValue {
|
|
return 1
|
|
}
|
|
return 0
|
|
default:
|
|
glog.V(1).Infof("readInt64Config: unexpected config value type %T for field %q, using fallback", value.Kind, field)
|
|
}
|
|
return fallback
|
|
}
|