iceberg: add sort-aware compaction rewrite (#8666)
* iceberg: add sort-aware compaction rewrite * iceberg: share filtered row iteration in compaction * iceberg: rely on table sort order for sort rewrites * iceberg: harden sort compaction planning * iceberg: include rewrite strategy in planning config hash compactionPlanningConfigHash now incorporates RewriteStrategy and SortMaxInputBytes so cached planning results are invalidated when sort strategy settings change. Also use the bytesPerMB constant in compactionNoEligibleMessage.
This commit is contained in:
@@ -58,6 +58,11 @@ func (h *Handler) compactDataFiles(
|
||||
return "no current snapshot", nil, nil
|
||||
}
|
||||
|
||||
rewritePlan, err := resolveCompactionRewritePlan(config, meta)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("resolve rewrite strategy: %w", err)
|
||||
}
|
||||
|
||||
// Read manifest list
|
||||
manifestListData, err := loadFileByIcebergPath(ctx, filerClient, bucketName, tablePath, currentSnap.ManifestList)
|
||||
if err != nil {
|
||||
@@ -182,9 +187,12 @@ func (h *Handler) compactDataFiles(
|
||||
}
|
||||
|
||||
// Build compaction bins: group small data files by partition.
|
||||
bins := buildCompactionBins(candidateEntries, config.TargetFileSizeBytes, minInputFiles)
|
||||
targetSize := compactionTargetSizeForPlan(config, rewritePlan)
|
||||
bins := buildCompactionBins(candidateEntries, targetSize, minInputFiles)
|
||||
initialBinCount := len(bins)
|
||||
bins = filterCompactionBinsByPlan(bins, config, rewritePlan)
|
||||
if len(bins) == 0 {
|
||||
return "no files eligible for compaction", nil, nil
|
||||
return compactionNoEligibleMessage(config, rewritePlan, initialBinCount), nil, nil
|
||||
}
|
||||
|
||||
// Build a lookup from spec ID to PartitionSpec for per-bin manifest writing.
|
||||
@@ -256,7 +264,13 @@ func (h *Handler) compactDataFiles(
|
||||
mergedFileName := fmt.Sprintf("compact-%d-%d-%s-%d.parquet", snapshotID, newSnapID, artifactSuffix, binIdx)
|
||||
mergedFilePath := path.Join("data", mergedFileName)
|
||||
|
||||
mergedData, recordCount, err := mergeParquetFiles(ctx, filerClient, bucketName, tablePath, bin.Entries, positionDeletes, eqDeleteGroups, schema)
|
||||
var mergedData []byte
|
||||
var recordCount int64
|
||||
if rewritePlan != nil && rewritePlan.strategy == "sort" {
|
||||
mergedData, recordCount, err = mergeParquetFilesSorted(ctx, filerClient, bucketName, tablePath, bin.Entries, positionDeletes, eqDeleteGroups, schema, rewritePlan)
|
||||
} else {
|
||||
mergedData, recordCount, err = mergeParquetFiles(ctx, filerClient, bucketName, tablePath, bin.Entries, positionDeletes, eqDeleteGroups, schema)
|
||||
}
|
||||
if err != nil {
|
||||
glog.Warningf("iceberg compact: failed to merge bin %d (%d files): %v", binIdx, len(bin.Entries), err)
|
||||
goto binDone
|
||||
@@ -477,6 +491,7 @@ func (h *Handler) compactDataFiles(
|
||||
"merged-files": fmt.Sprintf("%d", totalMerged),
|
||||
"new-files": fmt.Sprintf("%d", len(newManifestEntries)),
|
||||
"compaction-bins": fmt.Sprintf("%d", len(bins)),
|
||||
"rewrite-strategy": rewritePlan.strategy,
|
||||
},
|
||||
},
|
||||
SchemaID: func() *int {
|
||||
@@ -484,6 +499,9 @@ func (h *Handler) compactDataFiles(
|
||||
return &id
|
||||
}(),
|
||||
}
|
||||
if rewritePlan != nil && rewritePlan.strategy == "sort" {
|
||||
newSnapshot.Summary.Properties["sort-fields"] = rewritePlan.summaryLabel()
|
||||
}
|
||||
if err := builder.AddSnapshot(newSnapshot); err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -500,7 +518,11 @@ func (h *Handler) compactDataFiles(
|
||||
MetricBins: int64(len(bins)),
|
||||
MetricDurationMs: time.Since(start).Milliseconds(),
|
||||
}
|
||||
return fmt.Sprintf("compacted %d files into %d (across %d bins)", totalMerged, len(newManifestEntries), len(bins)), metrics, nil
|
||||
result := fmt.Sprintf("compacted %d files into %d (across %d bins)", totalMerged, len(newManifestEntries), len(bins))
|
||||
if rewritePlan != nil && rewritePlan.strategy == "sort" {
|
||||
result = fmt.Sprintf("%s using %s", result, rewritePlan.summaryLabel())
|
||||
}
|
||||
return result, metrics, nil
|
||||
}
|
||||
|
||||
// buildCompactionBins groups small data files by partition for bin-packing.
|
||||
@@ -922,109 +944,28 @@ func mergeParquetFiles(
|
||||
return nil, 0, fmt.Errorf("no parquet schema found in %s", entries[0].DataFile().FilePath())
|
||||
}
|
||||
|
||||
// Resolve equality delete column indices for each group.
|
||||
type resolvedEqGroup struct {
|
||||
colIndices []int
|
||||
keys map[string]struct{}
|
||||
}
|
||||
var resolvedEqGroups []resolvedEqGroup
|
||||
if len(eqDeleteGroups) > 0 && icebergSchema != nil {
|
||||
for _, g := range eqDeleteGroups {
|
||||
indices, resolveErr := resolveEqualityColIndices(parquetSchema, g.FieldIDs, icebergSchema)
|
||||
if resolveErr != nil {
|
||||
resolvedEqGroups, err := resolveEqualityDeleteGroupsForSchema(parquetSchema, eqDeleteGroups, icebergSchema)
|
||||
if err != nil {
|
||||
firstReader.Close()
|
||||
return nil, 0, fmt.Errorf("resolve equality columns: %w", resolveErr)
|
||||
}
|
||||
resolvedEqGroups = append(resolvedEqGroups, resolvedEqGroup{colIndices: indices, keys: g.Keys})
|
||||
}
|
||||
return nil, 0, fmt.Errorf("resolve equality columns: %w", err)
|
||||
}
|
||||
|
||||
var outputBuf bytes.Buffer
|
||||
writer := parquet.NewWriter(&outputBuf, parquetSchema)
|
||||
|
||||
var totalRows int64
|
||||
rows := make([]parquet.Row, 256)
|
||||
hasEqDeletes := len(resolvedEqGroups) > 0
|
||||
|
||||
// drainReader streams rows from reader into writer, filtering out deleted
|
||||
// rows. source is the data file path (used for error messages and
|
||||
// position delete lookups).
|
||||
drainReader := func(reader *parquet.Reader, source string) error {
|
||||
defer reader.Close()
|
||||
|
||||
// Normalize source path so it matches the normalized keys in positionDeletes.
|
||||
normalizedSource := normalizeIcebergPath(source, bucketName, tablePath)
|
||||
posDeletes := positionDeletes[normalizedSource]
|
||||
posDeleteIdx := 0
|
||||
var absolutePos int64
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
drainReader := func(reader *parquet.Reader, source string) (int64, error) {
|
||||
return visitFilteredParquetRows(ctx, reader, source, bucketName, tablePath, positionDeletes, resolvedEqGroups, func(filtered []parquet.Row) error {
|
||||
if _, err := writer.WriteRows(filtered); err != nil {
|
||||
return fmt.Errorf("write rows from %s: %w", source, err)
|
||||
}
|
||||
n, readErr := reader.ReadRows(rows)
|
||||
if n > 0 {
|
||||
// Filter rows if we have any deletes
|
||||
if len(posDeletes) > 0 || hasEqDeletes {
|
||||
writeIdx := 0
|
||||
for i := 0; i < n; i++ {
|
||||
rowPos := absolutePos + int64(i)
|
||||
|
||||
// Check position deletes (sorted, so advance index)
|
||||
if len(posDeletes) > 0 {
|
||||
for posDeleteIdx < len(posDeletes) && posDeletes[posDeleteIdx] < rowPos {
|
||||
posDeleteIdx++
|
||||
}
|
||||
if posDeleteIdx < len(posDeletes) && posDeletes[posDeleteIdx] == rowPos {
|
||||
posDeleteIdx++
|
||||
continue // skip this row
|
||||
}
|
||||
}
|
||||
|
||||
// Check equality deletes — each group independently
|
||||
deleted := false
|
||||
for _, g := range resolvedEqGroups {
|
||||
key := buildEqualityKey(rows[i], g.colIndices)
|
||||
if _, ok := g.keys[key]; ok {
|
||||
deleted = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if deleted {
|
||||
continue // skip this row
|
||||
}
|
||||
|
||||
rows[writeIdx] = rows[i]
|
||||
writeIdx++
|
||||
}
|
||||
absolutePos += int64(n)
|
||||
if writeIdx > 0 {
|
||||
if _, writeErr := writer.WriteRows(rows[:writeIdx]); writeErr != nil {
|
||||
return fmt.Errorf("write rows from %s: %w", source, writeErr)
|
||||
}
|
||||
totalRows += int64(writeIdx)
|
||||
}
|
||||
} else {
|
||||
if _, writeErr := writer.WriteRows(rows[:n]); writeErr != nil {
|
||||
return fmt.Errorf("write rows from %s: %w", source, writeErr)
|
||||
}
|
||||
totalRows += int64(n)
|
||||
}
|
||||
}
|
||||
if readErr != nil {
|
||||
if readErr == io.EOF {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("read rows from %s: %w", source, readErr)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// Drain the first file.
|
||||
firstSource := entries[0].DataFile().FilePath()
|
||||
if err := drainReader(firstReader, firstSource); err != nil {
|
||||
totalRows, err := drainReader(firstReader, firstSource)
|
||||
if err != nil {
|
||||
writer.Close()
|
||||
return nil, 0, err
|
||||
}
|
||||
@@ -1052,10 +993,12 @@ func mergeParquetFiles(
|
||||
return nil, 0, fmt.Errorf("schema mismatch in %s: cannot merge files with different schemas", entry.DataFile().FilePath())
|
||||
}
|
||||
|
||||
if err := drainReader(reader, entry.DataFile().FilePath()); err != nil {
|
||||
rowsWritten, err := drainReader(reader, entry.DataFile().FilePath())
|
||||
if err != nil {
|
||||
writer.Close()
|
||||
return nil, 0, err
|
||||
}
|
||||
totalRows += rowsWritten
|
||||
// data goes out of scope here, eligible for GC before next iteration.
|
||||
}
|
||||
|
||||
@@ -1066,6 +1009,202 @@ func mergeParquetFiles(
|
||||
return outputBuf.Bytes(), totalRows, nil
|
||||
}
|
||||
|
||||
type resolvedEqDeleteGroup struct {
|
||||
colIndices []int
|
||||
keys map[string]struct{}
|
||||
}
|
||||
|
||||
func resolveEqualityDeleteGroupsForSchema(
|
||||
parquetSchema *parquet.Schema,
|
||||
eqDeleteGroups []equalityDeleteGroup,
|
||||
icebergSchema *iceberg.Schema,
|
||||
) ([]resolvedEqDeleteGroup, error) {
|
||||
if len(eqDeleteGroups) == 0 || icebergSchema == nil {
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
resolved := make([]resolvedEqDeleteGroup, 0, len(eqDeleteGroups))
|
||||
for _, g := range eqDeleteGroups {
|
||||
indices, err := resolveEqualityColIndices(parquetSchema, g.FieldIDs, icebergSchema)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
resolved = append(resolved, resolvedEqDeleteGroup{colIndices: indices, keys: g.Keys})
|
||||
}
|
||||
return resolved, nil
|
||||
}
|
||||
|
||||
func visitFilteredParquetRows(
|
||||
ctx context.Context,
|
||||
reader *parquet.Reader,
|
||||
source, bucketName, tablePath string,
|
||||
positionDeletes map[string][]int64,
|
||||
resolvedEqGroups []resolvedEqDeleteGroup,
|
||||
onRows func([]parquet.Row) error,
|
||||
) (int64, error) {
|
||||
defer reader.Close()
|
||||
|
||||
rows := make([]parquet.Row, 256)
|
||||
filteredRows := make([]parquet.Row, 0, len(rows))
|
||||
normalizedSource := normalizeIcebergPath(source, bucketName, tablePath)
|
||||
posDeletes := positionDeletes[normalizedSource]
|
||||
posDeleteIdx := 0
|
||||
var absolutePos int64
|
||||
var totalRows int64
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return totalRows, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
n, readErr := reader.ReadRows(rows)
|
||||
if n > 0 {
|
||||
filteredRows = filteredRows[:0]
|
||||
for i := 0; i < n; i++ {
|
||||
rowPos := absolutePos + int64(i)
|
||||
for posDeleteIdx < len(posDeletes) && posDeletes[posDeleteIdx] < rowPos {
|
||||
posDeleteIdx++
|
||||
}
|
||||
if posDeleteIdx < len(posDeletes) && posDeletes[posDeleteIdx] == rowPos {
|
||||
posDeleteIdx++
|
||||
continue
|
||||
}
|
||||
|
||||
deleted := false
|
||||
for _, g := range resolvedEqGroups {
|
||||
key := buildEqualityKey(rows[i], g.colIndices)
|
||||
if _, ok := g.keys[key]; ok {
|
||||
deleted = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if deleted {
|
||||
continue
|
||||
}
|
||||
|
||||
filteredRows = append(filteredRows, rows[i])
|
||||
}
|
||||
absolutePos += int64(n)
|
||||
if len(filteredRows) > 0 {
|
||||
if err := onRows(filteredRows); err != nil {
|
||||
return totalRows, err
|
||||
}
|
||||
totalRows += int64(len(filteredRows))
|
||||
}
|
||||
}
|
||||
|
||||
if readErr != nil {
|
||||
if readErr == io.EOF {
|
||||
return totalRows, nil
|
||||
}
|
||||
return totalRows, fmt.Errorf("read rows from %s: %w", source, readErr)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func mergeParquetFilesSorted(
|
||||
ctx context.Context,
|
||||
filerClient filer_pb.SeaweedFilerClient,
|
||||
bucketName, tablePath string,
|
||||
entries []iceberg.ManifestEntry,
|
||||
positionDeletes map[string][]int64,
|
||||
eqDeleteGroups []equalityDeleteGroup,
|
||||
icebergSchema *iceberg.Schema,
|
||||
rewritePlan *compactionRewritePlan,
|
||||
) ([]byte, int64, error) {
|
||||
if len(entries) == 0 {
|
||||
return nil, 0, fmt.Errorf("no entries to merge")
|
||||
}
|
||||
if rewritePlan == nil || rewritePlan.strategy != "sort" {
|
||||
return nil, 0, fmt.Errorf("sorted merge requires sort rewrite plan")
|
||||
}
|
||||
|
||||
firstData, err := loadFileByIcebergPath(ctx, filerClient, bucketName, tablePath, entries[0].DataFile().FilePath())
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("read parquet file %s: %w", entries[0].DataFile().FilePath(), err)
|
||||
}
|
||||
firstReader := parquet.NewReader(bytes.NewReader(firstData))
|
||||
parquetSchema := firstReader.Schema()
|
||||
if parquetSchema == nil {
|
||||
firstReader.Close()
|
||||
return nil, 0, fmt.Errorf("no parquet schema found in %s", entries[0].DataFile().FilePath())
|
||||
}
|
||||
|
||||
sortingColumns, err := rewritePlan.parquetSortingColumns(parquetSchema)
|
||||
if err != nil {
|
||||
firstReader.Close()
|
||||
return nil, 0, fmt.Errorf("resolve parquet sorting columns: %w", err)
|
||||
}
|
||||
|
||||
resolvedEqGroups, err := resolveEqualityDeleteGroupsForSchema(parquetSchema, eqDeleteGroups, icebergSchema)
|
||||
if err != nil {
|
||||
firstReader.Close()
|
||||
return nil, 0, fmt.Errorf("resolve equality columns: %w", err)
|
||||
}
|
||||
|
||||
comparator := parquetSchema.Comparator(sortingColumns...)
|
||||
var allRows []parquet.Row
|
||||
|
||||
collectRows := func(reader *parquet.Reader, source string) (int64, error) {
|
||||
return visitFilteredParquetRows(ctx, reader, source, bucketName, tablePath, positionDeletes, resolvedEqGroups, func(filtered []parquet.Row) error {
|
||||
for _, row := range filtered {
|
||||
allRows = append(allRows, row.Clone())
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
totalRows, err := collectRows(firstReader, entries[0].DataFile().FilePath())
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
|
||||
for _, entry := range entries[1:] {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, 0, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
data, err := loadFileByIcebergPath(ctx, filerClient, bucketName, tablePath, entry.DataFile().FilePath())
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("read parquet file %s: %w", entry.DataFile().FilePath(), err)
|
||||
}
|
||||
|
||||
reader := parquet.NewReader(bytes.NewReader(data))
|
||||
if !schemasEqual(parquetSchema, reader.Schema()) {
|
||||
reader.Close()
|
||||
return nil, 0, fmt.Errorf("schema mismatch in %s: cannot merge files with different schemas", entry.DataFile().FilePath())
|
||||
}
|
||||
|
||||
rowsCollected, err := collectRows(reader, entry.DataFile().FilePath())
|
||||
if err != nil {
|
||||
return nil, 0, err
|
||||
}
|
||||
totalRows += rowsCollected
|
||||
}
|
||||
|
||||
sort.SliceStable(allRows, func(i, j int) bool {
|
||||
return comparator(allRows[i], allRows[j]) < 0
|
||||
})
|
||||
|
||||
var outputBuf bytes.Buffer
|
||||
writer := parquet.NewWriter(&outputBuf, parquetSchema)
|
||||
if len(allRows) > 0 {
|
||||
if _, err := writer.WriteRows(allRows); err != nil {
|
||||
writer.Close()
|
||||
return nil, 0, fmt.Errorf("write sorted rows: %w", err)
|
||||
}
|
||||
}
|
||||
if err := writer.Close(); err != nil {
|
||||
return nil, 0, fmt.Errorf("close writer: %w", err)
|
||||
}
|
||||
|
||||
return outputBuf.Bytes(), totalRows, nil
|
||||
}
|
||||
|
||||
// compactRandomSuffix returns a short random hex string for use in artifact
|
||||
// filenames to prevent collisions between concurrent runs.
|
||||
func compactRandomSuffix() string {
|
||||
|
||||
@@ -22,6 +22,7 @@ const (
|
||||
defaultDeleteMinInputFiles = 2
|
||||
defaultDeleteMaxGroupSizeMB = 256
|
||||
defaultDeleteMaxOutputFiles = 8
|
||||
defaultRewriteStrategy = "binpack"
|
||||
defaultMinManifestsToRewrite = 5
|
||||
minManifestsToRewrite = 2
|
||||
defaultOperations = "all"
|
||||
@@ -43,6 +44,8 @@ const (
|
||||
MetricDurationMs = "duration_ms"
|
||||
)
|
||||
|
||||
const bytesPerMB int64 = 1024 * 1024
|
||||
|
||||
// Config holds parsed worker config values.
|
||||
type Config struct {
|
||||
SnapshotRetentionHours int64
|
||||
@@ -60,7 +63,6 @@ type Config struct {
|
||||
ApplyDeletes bool
|
||||
Where string
|
||||
RewriteStrategy string
|
||||
SortFields string
|
||||
SortMaxInputBytes int64
|
||||
}
|
||||
|
||||
@@ -72,19 +74,18 @@ func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config {
|
||||
MaxSnapshotsToKeep: readInt64Config(values, "max_snapshots_to_keep", defaultMaxSnapshotsToKeep),
|
||||
OrphanOlderThanHours: readInt64Config(values, "orphan_older_than_hours", defaultOrphanOlderThanHours),
|
||||
MaxCommitRetries: readInt64Config(values, "max_commit_retries", defaultMaxCommitRetries),
|
||||
TargetFileSizeBytes: readInt64Config(values, "target_file_size_mb", defaultTargetFileSizeMB) * 1024 * 1024,
|
||||
TargetFileSizeBytes: readSizeMBConfig(values, "target_file_size_mb", defaultTargetFileSizeMB),
|
||||
MinInputFiles: readInt64Config(values, "min_input_files", defaultMinInputFiles),
|
||||
DeleteTargetFileSizeBytes: readInt64Config(values, "delete_target_file_size_mb", defaultDeleteTargetFileSizeMB) * 1024 * 1024,
|
||||
DeleteTargetFileSizeBytes: readSizeMBConfig(values, "delete_target_file_size_mb", defaultDeleteTargetFileSizeMB),
|
||||
DeleteMinInputFiles: readInt64Config(values, "delete_min_input_files", defaultDeleteMinInputFiles),
|
||||
DeleteMaxFileGroupSizeBytes: readInt64Config(values, "delete_max_file_group_size_mb", defaultDeleteMaxGroupSizeMB) * 1024 * 1024,
|
||||
DeleteMaxFileGroupSizeBytes: readSizeMBConfig(values, "delete_max_file_group_size_mb", defaultDeleteMaxGroupSizeMB),
|
||||
DeleteMaxOutputFiles: readInt64Config(values, "delete_max_output_files", defaultDeleteMaxOutputFiles),
|
||||
MinManifestsToRewrite: readInt64Config(values, "min_manifests_to_rewrite", defaultMinManifestsToRewrite),
|
||||
Operations: readStringConfig(values, "operations", defaultOperations),
|
||||
ApplyDeletes: readBoolConfig(values, "apply_deletes", true),
|
||||
Where: strings.TrimSpace(readStringConfig(values, "where", "")),
|
||||
RewriteStrategy: strings.TrimSpace(strings.ToLower(readStringConfig(values, "rewrite_strategy", "binpack"))),
|
||||
SortFields: strings.TrimSpace(readStringConfig(values, "sort_fields", "")),
|
||||
SortMaxInputBytes: readInt64Config(values, "sort_max_input_mb", 0) * 1024 * 1024,
|
||||
RewriteStrategy: strings.TrimSpace(strings.ToLower(readStringConfig(values, "rewrite_strategy", defaultRewriteStrategy))),
|
||||
SortMaxInputBytes: readSizeMBConfig(values, "sort_max_input_mb", 0),
|
||||
}
|
||||
|
||||
// Clamp the fields that are always defaulted by worker config parsing.
|
||||
@@ -98,15 +99,6 @@ func ParseConfig(values map[string]*plugin_pb.ConfigValue) Config {
|
||||
cfg.MaxCommitRetries = defaultMaxCommitRetries
|
||||
}
|
||||
cfg = applyThresholdDefaults(cfg)
|
||||
if cfg.RewriteStrategy == "" {
|
||||
cfg.RewriteStrategy = "binpack"
|
||||
}
|
||||
if cfg.RewriteStrategy != "binpack" && cfg.RewriteStrategy != "sort" {
|
||||
cfg.RewriteStrategy = "binpack"
|
||||
}
|
||||
if cfg.SortMaxInputBytes < 0 {
|
||||
cfg.SortMaxInputBytes = 0
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
@@ -132,12 +124,34 @@ func applyThresholdDefaults(cfg Config) Config {
|
||||
if cfg.DeleteMaxOutputFiles <= 0 {
|
||||
cfg.DeleteMaxOutputFiles = defaultDeleteMaxOutputFiles
|
||||
}
|
||||
if cfg.RewriteStrategy == "" {
|
||||
cfg.RewriteStrategy = defaultRewriteStrategy
|
||||
}
|
||||
if cfg.RewriteStrategy != "binpack" && cfg.RewriteStrategy != "sort" {
|
||||
cfg.RewriteStrategy = defaultRewriteStrategy
|
||||
}
|
||||
if cfg.SortMaxInputBytes < 0 {
|
||||
cfg.SortMaxInputBytes = 0
|
||||
}
|
||||
if cfg.MinManifestsToRewrite < minManifestsToRewrite {
|
||||
cfg.MinManifestsToRewrite = minManifestsToRewrite
|
||||
}
|
||||
return cfg
|
||||
}
|
||||
|
||||
func readSizeMBConfig(values map[string]*plugin_pb.ConfigValue, field string, fallbackMB int64) int64 {
|
||||
mb := readInt64Config(values, field, fallbackMB)
|
||||
if mb <= 0 {
|
||||
return 0
|
||||
}
|
||||
maxMB := int64(^uint64(0)>>1) / bytesPerMB
|
||||
if mb > maxMB {
|
||||
glog.V(1).Infof("readSizeMBConfig: clamping %q from %d MB to %d MB", field, mb, maxMB)
|
||||
mb = maxMB
|
||||
}
|
||||
return mb * bytesPerMB
|
||||
}
|
||||
|
||||
// parseOperations returns the ordered list of maintenance operations to execute.
|
||||
// Order follows Iceberg best practices: compact → rewrite_position_delete_files
|
||||
// → expire_snapshots → remove_orphans → rewrite_manifests.
|
||||
|
||||
@@ -438,7 +438,14 @@ func hasEligibleCompaction(
|
||||
}
|
||||
}
|
||||
|
||||
bins := buildCompactionBins(candidateEntries, config.TargetFileSizeBytes, minInputFiles)
|
||||
rewritePlan, err := resolveCompactionRewritePlan(config, meta)
|
||||
if err != nil {
|
||||
return false, fmt.Errorf("resolve rewrite strategy: %w", err)
|
||||
}
|
||||
|
||||
targetSize := compactionTargetSizeForPlan(config, rewritePlan)
|
||||
bins := buildCompactionBins(candidateEntries, targetSize, minInputFiles)
|
||||
bins = filterCompactionBinsByPlan(bins, config, rewritePlan)
|
||||
return len(bins) > 0, nil
|
||||
}
|
||||
|
||||
|
||||
@@ -2025,13 +2025,44 @@ func populateTableWithDeleteFiles(
|
||||
Name string
|
||||
}
|
||||
},
|
||||
) table.Metadata {
|
||||
return populateTableWithDeleteFilesAndSortOrder(t, fs, setup, dataFiles, posDeleteFiles, eqDeleteFiles, table.UnsortedSortOrder)
|
||||
}
|
||||
|
||||
func populateTableWithDeleteFilesAndSortOrder(
|
||||
t *testing.T,
|
||||
fs *fakeFilerServer,
|
||||
setup tableSetup,
|
||||
dataFiles []struct {
|
||||
Name string
|
||||
Rows []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}
|
||||
},
|
||||
posDeleteFiles []struct {
|
||||
Name string
|
||||
Rows []struct {
|
||||
FilePath string
|
||||
Pos int64
|
||||
}
|
||||
},
|
||||
eqDeleteFiles []struct {
|
||||
Name string
|
||||
FieldIDs []int
|
||||
Rows []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}
|
||||
},
|
||||
sortOrder table.SortOrder,
|
||||
) table.Metadata {
|
||||
t.Helper()
|
||||
|
||||
schema := newTestSchema()
|
||||
spec := *iceberg.UnpartitionedSpec
|
||||
|
||||
meta, err := table.NewMetadata(schema, &spec, table.UnsortedSortOrder, "s3://"+setup.BucketName+"/"+setup.tablePath(), nil)
|
||||
meta, err := table.NewMetadata(schema, &spec, sortOrder, "s3://"+setup.BucketName+"/"+setup.tablePath(), nil)
|
||||
if err != nil {
|
||||
t.Fatalf("create metadata: %v", err)
|
||||
}
|
||||
@@ -2188,7 +2219,20 @@ func populateTableWithDeleteFiles(
|
||||
// Build final metadata with snapshot
|
||||
now := time.Now().UnixMilli()
|
||||
snap := table.Snapshot{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro"}
|
||||
meta = buildTestMetadata(t, []table.Snapshot{snap})
|
||||
builder, err := table.MetadataBuilderFromBase(meta, "s3://"+setup.BucketName+"/"+setup.tablePath())
|
||||
if err != nil {
|
||||
t.Fatalf("create metadata builder: %v", err)
|
||||
}
|
||||
if err := builder.AddSnapshot(&snap); err != nil {
|
||||
t.Fatalf("add snapshot: %v", err)
|
||||
}
|
||||
if err := builder.SetSnapshotRef(table.MainBranch, snap.SnapshotID, table.BranchRef); err != nil {
|
||||
t.Fatalf("set snapshot ref: %v", err)
|
||||
}
|
||||
meta, err = builder.Build()
|
||||
if err != nil {
|
||||
t.Fatalf("build metadata: %v", err)
|
||||
}
|
||||
|
||||
// Register table structure
|
||||
fullMetadataJSON, _ := json.Marshal(meta)
|
||||
@@ -2325,6 +2369,52 @@ func rewriteDeleteManifestsAsMixed(
|
||||
})
|
||||
}
|
||||
|
||||
func readCompactedRows(t *testing.T, fs *fakeFilerServer, setup tableSetup) []struct {
|
||||
ID int64
|
||||
Name string
|
||||
} {
|
||||
t.Helper()
|
||||
|
||||
dataDir := path.Join(s3tables.TablesPath, setup.BucketName, setup.tablePath(), "data")
|
||||
entries := fs.listDir(dataDir)
|
||||
var mergedContent []byte
|
||||
for _, e := range entries {
|
||||
if strings.HasPrefix(e.Name, "compact-") {
|
||||
mergedContent = e.Content
|
||||
break
|
||||
}
|
||||
}
|
||||
if mergedContent == nil {
|
||||
t.Fatal("no merged file found")
|
||||
}
|
||||
|
||||
reader := parquet.NewReader(bytes.NewReader(mergedContent))
|
||||
defer reader.Close()
|
||||
|
||||
type row struct {
|
||||
ID int64 `parquet:"id"`
|
||||
Name string `parquet:"name"`
|
||||
}
|
||||
var outputRows []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}
|
||||
for {
|
||||
var r row
|
||||
if err := reader.Read(&r); err != nil {
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
t.Fatalf("read: %v", err)
|
||||
}
|
||||
outputRows = append(outputRows, struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{ID: r.ID, Name: r.Name})
|
||||
}
|
||||
return outputRows
|
||||
}
|
||||
|
||||
func TestCompactDataFilesMetrics(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
@@ -2832,6 +2922,335 @@ func TestCompactDataFilesWithMixedDeletes(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompactDataFilesSortStrategyUsesAscendingTableSortOrder(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
sortOrder, err := table.NewSortOrder(1, []table.SortField{{
|
||||
SourceID: 1,
|
||||
Transform: iceberg.IdentityTransform{},
|
||||
Direction: table.SortASC,
|
||||
NullOrder: table.NullsFirst,
|
||||
}})
|
||||
if err != nil {
|
||||
t.Fatalf("new sort order: %v", err)
|
||||
}
|
||||
|
||||
setup := tableSetup{BucketName: "tb", Namespace: "ns", TableName: "tbl"}
|
||||
populateTableWithDeleteFilesAndSortOrder(t, fs, setup,
|
||||
[]struct {
|
||||
Name string
|
||||
Rows []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}
|
||||
}{
|
||||
{"d1.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{3, "charlie"}, {1, "alice"}}},
|
||||
{"d2.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{4, "delta"}, {2, "bravo"}}},
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
sortOrder,
|
||||
)
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
TargetFileSizeBytes: 256 * 1024 * 1024,
|
||||
MinInputFiles: 2,
|
||||
MaxCommitRetries: 3,
|
||||
ApplyDeletes: true,
|
||||
RewriteStrategy: "sort",
|
||||
}
|
||||
|
||||
result, _, err := handler.compactDataFiles(context.Background(), client, setup.BucketName, setup.tablePath(), config, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("compactDataFiles: %v", err)
|
||||
}
|
||||
if !strings.Contains(result, "using sort") {
|
||||
t.Fatalf("expected sorted compaction result, got %q", result)
|
||||
}
|
||||
|
||||
rows := readCompactedRows(t, fs, setup)
|
||||
if len(rows) != 4 {
|
||||
t.Fatalf("expected 4 compacted rows, got %d", len(rows))
|
||||
}
|
||||
for i := 1; i < len(rows); i++ {
|
||||
if rows[i-1].ID > rows[i].ID {
|
||||
t.Fatalf("rows are not sorted by id: %+v", rows)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompactDataFilesSortStrategyUsesTableSortOrder(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
sortOrder, err := table.NewSortOrder(1, []table.SortField{{
|
||||
SourceID: 2,
|
||||
Transform: iceberg.IdentityTransform{},
|
||||
Direction: table.SortDESC,
|
||||
NullOrder: table.NullsLast,
|
||||
}})
|
||||
if err != nil {
|
||||
t.Fatalf("new sort order: %v", err)
|
||||
}
|
||||
|
||||
setup := tableSetup{BucketName: "tb", Namespace: "ns", TableName: "tbl"}
|
||||
populateTableWithDeleteFilesAndSortOrder(t, fs, setup,
|
||||
[]struct {
|
||||
Name string
|
||||
Rows []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}
|
||||
}{
|
||||
{"d1.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{1, "alice"}, {2, "charlie"}}},
|
||||
{"d2.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{3, "bravo"}, {4, "delta"}}},
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
sortOrder,
|
||||
)
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
TargetFileSizeBytes: 256 * 1024 * 1024,
|
||||
MinInputFiles: 2,
|
||||
MaxCommitRetries: 3,
|
||||
ApplyDeletes: true,
|
||||
RewriteStrategy: "sort",
|
||||
}
|
||||
|
||||
result, _, err := handler.compactDataFiles(context.Background(), client, setup.BucketName, setup.tablePath(), config, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("compactDataFiles: %v", err)
|
||||
}
|
||||
if !strings.Contains(result, "name desc") {
|
||||
t.Fatalf("expected table sort order in result, got %q", result)
|
||||
}
|
||||
|
||||
rows := readCompactedRows(t, fs, setup)
|
||||
gotNames := make([]string, 0, len(rows))
|
||||
for _, row := range rows {
|
||||
gotNames = append(gotNames, row.Name)
|
||||
}
|
||||
expectedNames := []string{"delta", "charlie", "bravo", "alice"}
|
||||
if len(gotNames) != len(expectedNames) {
|
||||
t.Fatalf("got %d rows, want %d", len(gotNames), len(expectedNames))
|
||||
}
|
||||
for i := range expectedNames {
|
||||
if gotNames[i] != expectedNames[i] {
|
||||
t.Fatalf("names[%d] = %q, want %q (all names: %v)", i, gotNames[i], expectedNames[i], gotNames)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectSkipsSortCompactionBinsAboveCap(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
sortOrder, err := table.NewSortOrder(1, []table.SortField{{
|
||||
SourceID: 1,
|
||||
Transform: iceberg.IdentityTransform{},
|
||||
Direction: table.SortASC,
|
||||
NullOrder: table.NullsFirst,
|
||||
}})
|
||||
if err != nil {
|
||||
t.Fatalf("new sort order: %v", err)
|
||||
}
|
||||
|
||||
setup := tableSetup{BucketName: "test-bucket", Namespace: "analytics", TableName: "events"}
|
||||
populateTableWithDeleteFilesAndSortOrder(t, fs, setup,
|
||||
[]struct {
|
||||
Name string
|
||||
Rows []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}
|
||||
}{
|
||||
{"small-1.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{1, "alpha"}}},
|
||||
{"small-2.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{2, "bravo"}}},
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
sortOrder,
|
||||
)
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
TargetFileSizeBytes: 4096,
|
||||
MinInputFiles: 2,
|
||||
Operations: "compact",
|
||||
RewriteStrategy: "sort",
|
||||
SortMaxInputBytes: 1,
|
||||
}
|
||||
|
||||
tables, err := handler.scanTablesForMaintenance(context.Background(), client, config, "", "", "", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("scanTablesForMaintenance failed: %v", err)
|
||||
}
|
||||
if len(tables) != 0 {
|
||||
t.Fatalf("expected no sort compaction candidates above the input cap, got %d", len(tables))
|
||||
}
|
||||
}
|
||||
|
||||
func TestDetectSplitsSortCompactionBinsByCap(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
sortOrder, err := table.NewSortOrder(1, []table.SortField{{
|
||||
SourceID: 1,
|
||||
Transform: iceberg.IdentityTransform{},
|
||||
Direction: table.SortASC,
|
||||
NullOrder: table.NullsFirst,
|
||||
}})
|
||||
if err != nil {
|
||||
t.Fatalf("new sort order: %v", err)
|
||||
}
|
||||
|
||||
setup := tableSetup{BucketName: "test-bucket", Namespace: "analytics", TableName: "events"}
|
||||
populateTableWithDeleteFilesAndSortOrder(t, fs, setup,
|
||||
[]struct {
|
||||
Name string
|
||||
Rows []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}
|
||||
}{
|
||||
{"small-1.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{1, "alpha"}}},
|
||||
{"small-2.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{2, "bravo"}}},
|
||||
{"small-3.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{3, "charlie"}}},
|
||||
{"small-4.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{4, "delta"}}},
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
sortOrder,
|
||||
)
|
||||
|
||||
meta, _, err := loadCurrentMetadata(context.Background(), client, setup.BucketName, setup.tablePath())
|
||||
if err != nil {
|
||||
t.Fatalf("loadCurrentMetadata: %v", err)
|
||||
}
|
||||
manifests, err := loadCurrentManifests(context.Background(), client, setup.BucketName, setup.tablePath(), meta)
|
||||
if err != nil {
|
||||
t.Fatalf("loadCurrentManifests: %v", err)
|
||||
}
|
||||
|
||||
var totalSize int64
|
||||
var maxFileSize int64
|
||||
for _, mf := range manifests {
|
||||
if mf.ManifestContent() != iceberg.ManifestContentData {
|
||||
continue
|
||||
}
|
||||
manifestData, err := loadFileByIcebergPath(context.Background(), client, setup.BucketName, setup.tablePath(), mf.FilePath())
|
||||
if err != nil {
|
||||
t.Fatalf("load data manifest: %v", err)
|
||||
}
|
||||
entries, err := iceberg.ReadManifest(mf, bytes.NewReader(manifestData), true)
|
||||
if err != nil {
|
||||
t.Fatalf("read data manifest: %v", err)
|
||||
}
|
||||
for _, entry := range entries {
|
||||
size := entry.DataFile().FileSizeBytes()
|
||||
totalSize += size
|
||||
if size > maxFileSize {
|
||||
maxFileSize = size
|
||||
}
|
||||
}
|
||||
}
|
||||
if totalSize == 0 || maxFileSize == 0 {
|
||||
t.Fatal("expected data file sizes to be populated")
|
||||
}
|
||||
|
||||
capBytes := totalSize / 2
|
||||
if capBytes <= maxFileSize {
|
||||
capBytes = maxFileSize + 1
|
||||
}
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
TargetFileSizeBytes: totalSize + 1,
|
||||
MinInputFiles: 2,
|
||||
Operations: "compact",
|
||||
RewriteStrategy: "sort",
|
||||
SortMaxInputBytes: capBytes,
|
||||
}
|
||||
|
||||
tables, err := handler.scanTablesForMaintenance(context.Background(), client, config, "", "", "", 0)
|
||||
if err != nil {
|
||||
t.Fatalf("scanTablesForMaintenance failed: %v", err)
|
||||
}
|
||||
if len(tables) != 1 {
|
||||
t.Fatalf("expected sort compaction candidate to survive cap-based repartitioning, got %d", len(tables))
|
||||
}
|
||||
}
|
||||
|
||||
func TestCompactDataFilesSortStrategyRequiresTableSortOrder(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
setup := tableSetup{BucketName: "tb", Namespace: "ns", TableName: "tbl"}
|
||||
populateTableWithDeleteFiles(t, fs, setup,
|
||||
[]struct {
|
||||
Name string
|
||||
Rows []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}
|
||||
}{
|
||||
{"d1.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{3, "charlie"}, {1, "alice"}}},
|
||||
{"d2.parquet", []struct {
|
||||
ID int64
|
||||
Name string
|
||||
}{{4, "delta"}, {2, "bravo"}}},
|
||||
},
|
||||
nil,
|
||||
nil,
|
||||
)
|
||||
|
||||
handler := NewHandler(nil)
|
||||
config := Config{
|
||||
TargetFileSizeBytes: 256 * 1024 * 1024,
|
||||
MinInputFiles: 2,
|
||||
MaxCommitRetries: 3,
|
||||
ApplyDeletes: true,
|
||||
RewriteStrategy: "sort",
|
||||
}
|
||||
|
||||
_, _, err := handler.compactDataFiles(context.Background(), client, setup.BucketName, setup.tablePath(), config, nil)
|
||||
if err == nil || !strings.Contains(err.Error(), "table sort order") {
|
||||
t.Fatalf("expected missing table sort order error, got %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRewritePositionDeleteFilesExecution(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
@@ -3197,3 +3616,28 @@ func TestRewritePositionDeleteFilesRebuildsMixedDeleteManifests(t *testing.T) {
|
||||
t.Fatalf("expected equality delete file to be preserved, got %v", eqPaths)
|
||||
}
|
||||
}
|
||||
|
||||
func TestResolveCompactionRewritePlanFallsBackForUnsupportedSortTransform(t *testing.T) {
|
||||
sortOrder, err := table.NewSortOrder(1, []table.SortField{{
|
||||
SourceID: 1,
|
||||
Transform: iceberg.BucketTransform{NumBuckets: 16},
|
||||
Direction: table.SortASC,
|
||||
NullOrder: table.NullsFirst,
|
||||
}})
|
||||
if err != nil {
|
||||
t.Fatalf("new sort order: %v", err)
|
||||
}
|
||||
|
||||
meta, err := table.NewMetadata(newTestSchema(), iceberg.UnpartitionedSpec, sortOrder, "s3://test-bucket/test-table", nil)
|
||||
if err != nil {
|
||||
t.Fatalf("new metadata: %v", err)
|
||||
}
|
||||
|
||||
plan, err := resolveCompactionRewritePlan(Config{RewriteStrategy: "sort"}, meta)
|
||||
if err != nil {
|
||||
t.Fatalf("resolveCompactionRewritePlan: %v", err)
|
||||
}
|
||||
if plan == nil || plan.strategy != defaultRewriteStrategy {
|
||||
t.Fatalf("expected fallback to %q for unsupported transform, got %+v", defaultRewriteStrategy, plan)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -187,19 +187,11 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
||||
{
|
||||
Name: "rewrite_strategy",
|
||||
Label: "Rewrite Strategy",
|
||||
Description: "binpack keeps the current row order; sort rewrites each compaction bin using sort_fields or the table sort order.",
|
||||
Description: "binpack keeps the existing row order; sort rewrites each compaction bin using the Iceberg table sort order.",
|
||||
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
|
||||
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
|
||||
Placeholder: "binpack or sort",
|
||||
},
|
||||
{
|
||||
Name: "sort_fields",
|
||||
Label: "Sort Fields",
|
||||
Description: "Comma-separated field names for rewrite_strategy=sort. Blank uses the table sort order when present.",
|
||||
FieldType: plugin_pb.ConfigFieldType_CONFIG_FIELD_TYPE_STRING,
|
||||
Widget: plugin_pb.ConfigWidget_CONFIG_WIDGET_TEXT,
|
||||
Placeholder: "id, created_at",
|
||||
},
|
||||
{
|
||||
Name: "sort_max_input_mb",
|
||||
Label: "Sort Max Input (MB)",
|
||||
@@ -325,8 +317,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
||||
"max_commit_retries": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMaxCommitRetries}},
|
||||
"operations": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: defaultOperations}},
|
||||
"apply_deletes": {Kind: &plugin_pb.ConfigValue_BoolValue{BoolValue: true}},
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "binpack"}},
|
||||
"sort_fields": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: defaultRewriteStrategy}},
|
||||
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 0}},
|
||||
"where": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
|
||||
},
|
||||
@@ -355,8 +346,7 @@ func (h *Handler) Descriptor() *plugin_pb.JobTypeDescriptor {
|
||||
"max_commit_retries": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: defaultMaxCommitRetries}},
|
||||
"operations": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: defaultOperations}},
|
||||
"apply_deletes": {Kind: &plugin_pb.ConfigValue_BoolValue{BoolValue: true}},
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "binpack"}},
|
||||
"sort_fields": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: defaultRewriteStrategy}},
|
||||
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 0}},
|
||||
"where": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: ""}},
|
||||
},
|
||||
|
||||
@@ -34,6 +34,12 @@ func TestParseConfig(t *testing.T) {
|
||||
if config.Operations != defaultOperations {
|
||||
t.Errorf("expected Operations=%q, got %q", defaultOperations, config.Operations)
|
||||
}
|
||||
if config.RewriteStrategy != defaultRewriteStrategy {
|
||||
t.Errorf("expected RewriteStrategy=%q, got %q", defaultRewriteStrategy, config.RewriteStrategy)
|
||||
}
|
||||
if config.SortMaxInputBytes != 0 {
|
||||
t.Errorf("expected SortMaxInputBytes=0, got %d", config.SortMaxInputBytes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseOperations(t *testing.T) {
|
||||
@@ -879,6 +885,38 @@ func TestNormalizeDetectionConfigUsesSharedDefaults(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseConfigRewriteStrategy(t *testing.T) {
|
||||
config := ParseConfig(map[string]*plugin_pb.ConfigValue{
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "sort"}},
|
||||
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: 64}},
|
||||
})
|
||||
if config.RewriteStrategy != "sort" {
|
||||
t.Fatalf("expected sort rewrite strategy, got %q", config.RewriteStrategy)
|
||||
}
|
||||
if config.SortMaxInputBytes != 64*1024*1024 {
|
||||
t.Fatalf("expected SortMaxInputBytes=64MB, got %d", config.SortMaxInputBytes)
|
||||
}
|
||||
|
||||
config = ParseConfig(map[string]*plugin_pb.ConfigValue{
|
||||
"rewrite_strategy": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: "invalid"}},
|
||||
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_Int64Value{Int64Value: -1}},
|
||||
})
|
||||
if config.RewriteStrategy != defaultRewriteStrategy {
|
||||
t.Fatalf("expected invalid rewrite strategy to fall back to %q, got %q", defaultRewriteStrategy, config.RewriteStrategy)
|
||||
}
|
||||
if config.SortMaxInputBytes != 0 {
|
||||
t.Fatalf("expected negative sort cap to clamp to 0, got %d", config.SortMaxInputBytes)
|
||||
}
|
||||
|
||||
maxMB := int64(^uint64(0)>>1) / bytesPerMB
|
||||
config = ParseConfig(map[string]*plugin_pb.ConfigValue{
|
||||
"sort_max_input_mb": {Kind: &plugin_pb.ConfigValue_StringValue{StringValue: fmt.Sprintf("%d", maxMB+1)}},
|
||||
})
|
||||
if config.SortMaxInputBytes != maxMB*bytesPerMB {
|
||||
t.Fatalf("expected oversized sort cap to clamp to %d bytes, got %d", maxMB*bytesPerMB, config.SortMaxInputBytes)
|
||||
}
|
||||
}
|
||||
|
||||
func TestCollectPositionDeletes(t *testing.T) {
|
||||
fs, client := startFakeFiler(t)
|
||||
|
||||
|
||||
@@ -105,7 +105,9 @@ func (idx *planningIndex) rewriteManifestsEligible(config Config) (bool, bool) {
|
||||
}
|
||||
|
||||
func compactionPlanningConfigHash(config Config) string {
|
||||
return fmt.Sprintf("target=%d|min=%d", config.TargetFileSizeBytes, config.MinInputFiles)
|
||||
return fmt.Sprintf("target=%d|min=%d|strategy=%s|sortcap=%d",
|
||||
config.TargetFileSizeBytes, config.MinInputFiles,
|
||||
config.RewriteStrategy, config.SortMaxInputBytes)
|
||||
}
|
||||
|
||||
func operationRequested(ops []string, wanted string) bool {
|
||||
|
||||
181
weed/plugin/worker/iceberg/sort_strategy.go
Normal file
181
weed/plugin/worker/iceberg/sort_strategy.go
Normal file
@@ -0,0 +1,181 @@
|
||||
package iceberg
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"github.com/apache/iceberg-go"
|
||||
"github.com/apache/iceberg-go/table"
|
||||
"github.com/parquet-go/parquet-go"
|
||||
"github.com/seaweedfs/seaweedfs/weed/glog"
|
||||
)
|
||||
|
||||
type compactionRewritePlan struct {
|
||||
strategy string
|
||||
sortFields []compactionSortField
|
||||
}
|
||||
|
||||
type compactionSortField struct {
|
||||
path string
|
||||
descending bool
|
||||
nullsFirst bool
|
||||
}
|
||||
|
||||
func resolveCompactionRewritePlan(config Config, meta table.Metadata) (*compactionRewritePlan, error) {
|
||||
strategy := strings.TrimSpace(strings.ToLower(config.RewriteStrategy))
|
||||
if strategy == "" {
|
||||
strategy = defaultRewriteStrategy
|
||||
}
|
||||
if strategy == defaultRewriteStrategy {
|
||||
return &compactionRewritePlan{strategy: defaultRewriteStrategy}, nil
|
||||
}
|
||||
if strategy != "sort" {
|
||||
return nil, fmt.Errorf("unsupported rewrite strategy %q", config.RewriteStrategy)
|
||||
}
|
||||
|
||||
sortFields, err := resolveCompactionSortFields(meta)
|
||||
if err != nil {
|
||||
if err == errUnsupportedTableSortOrder {
|
||||
glog.V(2).Infof("iceberg compact: falling back to binpack because the table sort order is not supported")
|
||||
return &compactionRewritePlan{strategy: defaultRewriteStrategy}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &compactionRewritePlan{
|
||||
strategy: strategy,
|
||||
sortFields: sortFields,
|
||||
}, nil
|
||||
}
|
||||
|
||||
var errUnsupportedTableSortOrder = fmt.Errorf("unsupported table sort order")
|
||||
|
||||
func resolveCompactionSortFields(meta table.Metadata) ([]compactionSortField, error) {
|
||||
if meta == nil || meta.CurrentSchema() == nil {
|
||||
return nil, fmt.Errorf("sort rewrite requires table schema metadata")
|
||||
}
|
||||
|
||||
sortOrder := meta.SortOrder()
|
||||
if sortOrder.IsUnsorted() {
|
||||
return nil, fmt.Errorf("sort rewrite requires a table sort order")
|
||||
}
|
||||
|
||||
schema := meta.CurrentSchema()
|
||||
seen := make(map[string]struct{})
|
||||
fields := make([]compactionSortField, 0, sortOrder.Len())
|
||||
for sortField := range sortOrder.Fields() {
|
||||
if _, ok := sortField.Transform.(iceberg.IdentityTransform); !ok {
|
||||
return nil, errUnsupportedTableSortOrder
|
||||
}
|
||||
|
||||
field, ok := schema.FindFieldByID(sortField.SourceID)
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("table sort field %d not found in schema", sortField.SourceID)
|
||||
}
|
||||
if _, ok := field.Type.(iceberg.PrimitiveType); !ok {
|
||||
return nil, fmt.Errorf("table sort field %q is not a primitive column", field.Name)
|
||||
}
|
||||
|
||||
columnPath, ok := schema.FindColumnName(sortField.SourceID)
|
||||
if !ok {
|
||||
columnPath = field.Name
|
||||
}
|
||||
normalizedPath := strings.ToLower(columnPath)
|
||||
if _, exists := seen[normalizedPath]; exists {
|
||||
return nil, fmt.Errorf("duplicate sort field %q", columnPath)
|
||||
}
|
||||
seen[normalizedPath] = struct{}{}
|
||||
|
||||
fields = append(fields, compactionSortField{
|
||||
path: columnPath,
|
||||
descending: sortField.Direction == table.SortDESC,
|
||||
nullsFirst: sortField.NullOrder == table.NullsFirst,
|
||||
})
|
||||
}
|
||||
|
||||
if len(fields) == 0 {
|
||||
return nil, fmt.Errorf("sort rewrite requires at least one sort field")
|
||||
}
|
||||
return fields, nil
|
||||
}
|
||||
|
||||
func compactionTargetSizeForPlan(config Config, plan *compactionRewritePlan) int64 {
|
||||
targetSize := config.TargetFileSizeBytes
|
||||
if plan != nil && plan.strategy == "sort" && config.SortMaxInputBytes > 0 && config.SortMaxInputBytes < targetSize {
|
||||
return config.SortMaxInputBytes
|
||||
}
|
||||
return targetSize
|
||||
}
|
||||
|
||||
func filterCompactionBinsByPlan(bins []compactionBin, config Config, plan *compactionRewritePlan) []compactionBin {
|
||||
if plan == nil || plan.strategy != "sort" || config.SortMaxInputBytes <= 0 {
|
||||
return bins
|
||||
}
|
||||
|
||||
filtered := make([]compactionBin, 0, len(bins))
|
||||
for _, bin := range bins {
|
||||
if bin.TotalSize <= config.SortMaxInputBytes {
|
||||
filtered = append(filtered, bin)
|
||||
}
|
||||
}
|
||||
return filtered
|
||||
}
|
||||
|
||||
func compactionNoEligibleMessage(config Config, plan *compactionRewritePlan, initialBinCount int) string {
|
||||
if plan != nil && plan.strategy == "sort" && config.SortMaxInputBytes > 0 && initialBinCount > 0 {
|
||||
return fmt.Sprintf("no files eligible for sorted compaction within %d MB sort input cap", config.SortMaxInputBytes/bytesPerMB)
|
||||
}
|
||||
return "no files eligible for compaction"
|
||||
}
|
||||
|
||||
func (p *compactionRewritePlan) summaryLabel() string {
|
||||
if p == nil || p.strategy != "sort" {
|
||||
return defaultRewriteStrategy
|
||||
}
|
||||
|
||||
paths := make([]string, 0, len(p.sortFields))
|
||||
for _, field := range p.sortFields {
|
||||
label := field.path
|
||||
if field.descending {
|
||||
label += " desc"
|
||||
} else {
|
||||
label += " asc"
|
||||
}
|
||||
if field.nullsFirst {
|
||||
label += " nulls-first"
|
||||
} else {
|
||||
label += " nulls-last"
|
||||
}
|
||||
paths = append(paths, label)
|
||||
}
|
||||
return fmt.Sprintf("sort (%s)", strings.Join(paths, ", "))
|
||||
}
|
||||
|
||||
func (p *compactionRewritePlan) parquetSortingColumns(pqSchema *parquet.Schema) ([]parquet.SortingColumn, error) {
|
||||
if p == nil || p.strategy != "sort" {
|
||||
return nil, nil
|
||||
}
|
||||
if pqSchema == nil {
|
||||
return nil, fmt.Errorf("parquet schema is required for sort rewrite")
|
||||
}
|
||||
|
||||
columns := make([]parquet.SortingColumn, 0, len(p.sortFields))
|
||||
for _, field := range p.sortFields {
|
||||
path := strings.Split(field.path, ".")
|
||||
if _, ok := pqSchema.Lookup(path...); !ok {
|
||||
return nil, fmt.Errorf("parquet schema missing sort field %q", field.path)
|
||||
}
|
||||
|
||||
var column parquet.SortingColumn
|
||||
if field.descending {
|
||||
column = parquet.Descending(path...)
|
||||
} else {
|
||||
column = parquet.Ascending(path...)
|
||||
}
|
||||
if field.nullsFirst {
|
||||
column = parquet.NullsFirst(column)
|
||||
}
|
||||
columns = append(columns, column)
|
||||
}
|
||||
return columns, nil
|
||||
}
|
||||
Reference in New Issue
Block a user