iceberg: add delete file rewrite maintenance (#8664)

* iceberg: add delete file rewrite maintenance

* iceberg: preserve untouched delete files during rewrites

* iceberg: share detection threshold defaults

* iceberg: add partition-scoped maintenance filters (#8665)

* iceberg: add partition-scoped maintenance filters

* iceberg: tighten where-filter partition matching
This commit is contained in:
Chris Lu
2026-03-16 21:11:09 -07:00
committed by GitHub
parent a3717cd4b5
commit e5c0889473
11 changed files with 2138 additions and 115 deletions

View File

@@ -153,20 +153,14 @@ func (h *Handler) scanTablesForMaintenance(
}
func normalizeDetectionConfig(config Config) Config {
normalized := config
if normalized.TargetFileSizeBytes <= 0 {
normalized.TargetFileSizeBytes = defaultTargetFileSizeMB * 1024 * 1024
config = applyThresholdDefaults(config)
if config.SnapshotRetentionHours <= 0 {
config.SnapshotRetentionHours = defaultSnapshotRetentionHours
}
if normalized.MinInputFiles < 2 {
normalized.MinInputFiles = defaultMinInputFiles
if config.MaxSnapshotsToKeep <= 0 {
config.MaxSnapshotsToKeep = defaultMaxSnapshotsToKeep
}
if normalized.MinManifestsToRewrite < minManifestsToRewrite {
normalized.MinManifestsToRewrite = minManifestsToRewrite
}
if normalized.OrphanOlderThanHours <= 0 {
normalized.OrphanOlderThanHours = defaultOrphanOlderThanHours
}
return normalized
return config
}
func (h *Handler) tableNeedsMaintenance(
@@ -181,6 +175,25 @@ func (h *Handler) tableNeedsMaintenance(
) (bool, error) {
config = normalizeDetectionConfig(config)
var predicate *partitionPredicate
if strings.TrimSpace(config.Where) != "" {
needsPredicate := false
for _, op := range ops {
if op == "compact" || op == "rewrite_position_delete_files" || op == "rewrite_manifests" {
needsPredicate = true
break
}
}
if needsPredicate {
var err error
predicate, err = parsePartitionPredicate(config.Where, meta)
if err != nil {
return false, err
}
}
}
_ = predicate // used by rewrite_position_delete_files; planning index handles compact/rewrite_manifests
// Evaluate the metadata-only expiration check first so large tables do not
// pay for manifest reads when snapshot expiry already makes them eligible.
for _, op := range ops {
@@ -267,6 +280,20 @@ func (h *Handler) tableNeedsMaintenance(
if eligible {
return true, nil
}
case "rewrite_position_delete_files":
manifests, err := getCurrentManifests()
if err != nil {
opEvalErrors = append(opEvalErrors, fmt.Sprintf("%s: %v", op, err))
continue
}
eligible, err := hasEligibleDeleteRewrite(ctx, filerClient, bucketName, tablePath, manifests, config, meta, predicate)
if err != nil {
opEvalErrors = append(opEvalErrors, fmt.Sprintf("%s: %v", op, err))
continue
}
if eligible {
return true, nil
}
case "rewrite_manifests":
eligible, err := checkPlanningIndex(op, (*planningIndex).rewriteManifestsEligible)
if err != nil {
@@ -351,6 +378,8 @@ func hasEligibleCompaction(
bucketName, tablePath string,
manifests []iceberg.ManifestFile,
config Config,
meta table.Metadata,
predicate *partitionPredicate,
) (bool, error) {
if len(manifests) == 0 {
return false, nil
@@ -390,10 +419,81 @@ func hasEligibleCompaction(
allEntries = append(allEntries, entries...)
}
bins := buildCompactionBins(allEntries, config.TargetFileSizeBytes, minInputFiles)
candidateEntries := allEntries
if predicate != nil {
specsByID := specByID(meta)
candidateEntries = make([]iceberg.ManifestEntry, 0, len(allEntries))
for _, entry := range allEntries {
spec, ok := specsByID[int(entry.DataFile().SpecID())]
if !ok {
continue
}
match, err := predicate.Matches(spec, entry.DataFile().Partition())
if err != nil {
return false, err
}
if match {
candidateEntries = append(candidateEntries, entry)
}
}
}
bins := buildCompactionBins(candidateEntries, config.TargetFileSizeBytes, minInputFiles)
return len(bins) > 0, nil
}
func countDataManifestsForRewrite(
ctx context.Context,
filerClient filer_pb.SeaweedFilerClient,
bucketName, tablePath string,
manifests []iceberg.ManifestFile,
meta table.Metadata,
predicate *partitionPredicate,
) (int64, error) {
if predicate == nil {
return countDataManifests(manifests), nil
}
specsByID := specByID(meta)
var count int64
for _, mf := range manifests {
if mf.ManifestContent() != iceberg.ManifestContentData {
continue
}
manifestData, err := loadFileByIcebergPath(ctx, filerClient, bucketName, tablePath, mf.FilePath())
if err != nil {
return 0, fmt.Errorf("read manifest %s: %w", mf.FilePath(), err)
}
entries, err := iceberg.ReadManifest(mf, bytes.NewReader(manifestData), true)
if err != nil {
return 0, fmt.Errorf("parse manifest %s: %w", mf.FilePath(), err)
}
if len(entries) == 0 {
continue
}
spec, ok := specsByID[int(mf.PartitionSpecID())]
if !ok {
continue
}
allMatch := len(entries) > 0
for _, entry := range entries {
match, err := predicate.Matches(spec, entry.DataFile().Partition())
if err != nil {
return 0, err
}
if !match {
allMatch = false
break
}
}
if allMatch {
count++
}
}
return count, nil
}
func compactionMinInputFiles(minInputFiles int64) (int, error) {
// Ensure the configured value is positive and fits into the platform's int type
if minInputFiles <= 0 {