iceberg: cache detection planning results (#8667)
* iceberg: cache detection planning results * iceberg: tighten planning index cache handling * iceberg: remove dead planning-index metadata fallback * iceberg: preserve partial planning index caches * iceberg: scope planning index caching per op * iceberg: rename copy vars to avoid shadowing builtin
This commit is contained in:
@@ -3,7 +3,6 @@ package iceberg
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"path"
|
||||
"strings"
|
||||
@@ -121,30 +120,14 @@ func (h *Handler) scanTablesForMaintenance(
|
||||
continue
|
||||
}
|
||||
|
||||
// Parse the internal metadata to get FullMetadata
|
||||
var internalMeta struct {
|
||||
MetadataLocation string `json:"metadataLocation,omitempty"`
|
||||
Metadata *struct {
|
||||
FullMetadata json.RawMessage `json:"fullMetadata,omitempty"`
|
||||
} `json:"metadata,omitempty"`
|
||||
}
|
||||
if err := json.Unmarshal(metadataBytes, &internalMeta); err != nil {
|
||||
glog.V(2).Infof("iceberg maintenance: skipping %s/%s/%s: cannot parse metadata: %v", bucketName, nsName, tblName, err)
|
||||
continue
|
||||
}
|
||||
if internalMeta.Metadata == nil || len(internalMeta.Metadata.FullMetadata) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
icebergMeta, err := table.ParseMetadataBytes(internalMeta.Metadata.FullMetadata)
|
||||
icebergMeta, metadataFileName, planningIndex, err := parseTableMetadataEnvelope(metadataBytes)
|
||||
if err != nil {
|
||||
glog.V(2).Infof("iceberg maintenance: skipping %s/%s/%s: cannot parse iceberg metadata: %v", bucketName, nsName, tblName, err)
|
||||
continue
|
||||
}
|
||||
|
||||
tablePath := path.Join(nsName, tblName)
|
||||
metadataFileName := metadataFileNameFromLocation(internalMeta.MetadataLocation, bucketName, tablePath)
|
||||
needsWork, err := h.tableNeedsMaintenance(ctx, filerClient, bucketName, tablePath, icebergMeta, metadataFileName, config, ops)
|
||||
needsWork, err := h.tableNeedsMaintenance(ctx, filerClient, bucketName, tablePath, icebergMeta, metadataFileName, planningIndex, config, ops)
|
||||
if err != nil {
|
||||
glog.V(2).Infof("iceberg maintenance: skipping %s/%s/%s: cannot evaluate maintenance need: %v", bucketName, nsName, tblName, err)
|
||||
continue
|
||||
@@ -192,6 +175,7 @@ func (h *Handler) tableNeedsMaintenance(
|
||||
bucketName, tablePath string,
|
||||
meta table.Metadata,
|
||||
metadataFileName string,
|
||||
cachedPlanningIndex *planningIndex,
|
||||
config Config,
|
||||
ops []string,
|
||||
) (bool, error) {
|
||||
@@ -205,22 +189,66 @@ func (h *Handler) tableNeedsMaintenance(
|
||||
}
|
||||
}
|
||||
|
||||
loadManifests := func() ([]iceberg.ManifestFile, error) {
|
||||
return loadCurrentManifests(ctx, filerClient, bucketName, tablePath, meta)
|
||||
}
|
||||
|
||||
var currentManifests []iceberg.ManifestFile
|
||||
var manifestsErr error
|
||||
manifestsLoaded := false
|
||||
var manifestsLoaded bool
|
||||
getCurrentManifests := func() ([]iceberg.ManifestFile, error) {
|
||||
if manifestsLoaded {
|
||||
return currentManifests, manifestsErr
|
||||
}
|
||||
currentManifests, manifestsErr = loadManifests()
|
||||
currentManifests, manifestsErr = loadCurrentManifests(ctx, filerClient, bucketName, tablePath, meta)
|
||||
manifestsLoaded = true
|
||||
return currentManifests, manifestsErr
|
||||
}
|
||||
|
||||
computedPlanningIndexes := make(map[string]*planningIndex)
|
||||
planningIndexLoaded := make(map[string]bool)
|
||||
planningIndexErrs := make(map[string]error)
|
||||
getPlanningIndex := func(op string) (*planningIndex, error) {
|
||||
if planningIndexLoaded[op] {
|
||||
return computedPlanningIndexes[op], planningIndexErrs[op]
|
||||
}
|
||||
planningIndexLoaded[op] = true
|
||||
|
||||
manifests, err := getCurrentManifests()
|
||||
if err != nil {
|
||||
planningIndexErrs[op] = err
|
||||
return nil, err
|
||||
}
|
||||
index, err := buildPlanningIndexFromManifests(ctx, filerClient, bucketName, tablePath, meta, config, []string{op}, manifests)
|
||||
if err != nil {
|
||||
planningIndexErrs[op] = err
|
||||
return nil, err
|
||||
}
|
||||
computedPlanningIndexes[op] = index
|
||||
if index != nil {
|
||||
if err := persistPlanningIndex(ctx, filerClient, bucketName, tablePath, index); err != nil {
|
||||
glog.V(2).Infof("iceberg maintenance: unable to persist planning index for %s/%s: %v", bucketName, tablePath, err)
|
||||
}
|
||||
}
|
||||
return index, nil
|
||||
}
|
||||
checkPlanningIndex := func(op string, eligibleFn func(*planningIndex, Config) (bool, bool)) (bool, error) {
|
||||
if cachedPlanningIndex != nil && cachedPlanningIndex.matchesSnapshot(meta) {
|
||||
if eligible, ok := eligibleFn(cachedPlanningIndex, config); ok {
|
||||
return eligible, nil
|
||||
}
|
||||
}
|
||||
|
||||
index, err := getPlanningIndex(op)
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if index == nil {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
eligible, _ := eligibleFn(index, config)
|
||||
return eligible, nil
|
||||
}
|
||||
|
||||
var opEvalErrors []string
|
||||
planningIndexErrorReported := false
|
||||
|
||||
for _, op := range ops {
|
||||
switch op {
|
||||
@@ -228,26 +256,27 @@ func (h *Handler) tableNeedsMaintenance(
|
||||
// Handled by the metadata-only check above.
|
||||
continue
|
||||
case "compact":
|
||||
manifests, err := getCurrentManifests()
|
||||
eligible, err := checkPlanningIndex(op, (*planningIndex).compactionEligible)
|
||||
if err != nil {
|
||||
opEvalErrors = append(opEvalErrors, fmt.Sprintf("%s: %v", op, err))
|
||||
continue
|
||||
}
|
||||
eligible, err := hasEligibleCompaction(ctx, filerClient, bucketName, tablePath, manifests, config)
|
||||
if err != nil {
|
||||
opEvalErrors = append(opEvalErrors, fmt.Sprintf("%s: %v", op, err))
|
||||
if !planningIndexErrorReported {
|
||||
opEvalErrors = append(opEvalErrors, fmt.Sprintf("%s: %v", op, err))
|
||||
planningIndexErrorReported = true
|
||||
}
|
||||
continue
|
||||
}
|
||||
if eligible {
|
||||
return true, nil
|
||||
}
|
||||
case "rewrite_manifests":
|
||||
manifests, err := getCurrentManifests()
|
||||
eligible, err := checkPlanningIndex(op, (*planningIndex).rewriteManifestsEligible)
|
||||
if err != nil {
|
||||
opEvalErrors = append(opEvalErrors, fmt.Sprintf("%s: %v", op, err))
|
||||
if !planningIndexErrorReported {
|
||||
opEvalErrors = append(opEvalErrors, fmt.Sprintf("%s: %v", op, err))
|
||||
planningIndexErrorReported = true
|
||||
}
|
||||
continue
|
||||
}
|
||||
if countDataManifests(manifests) >= config.MinManifestsToRewrite {
|
||||
if eligible {
|
||||
return true, nil
|
||||
}
|
||||
case "remove_orphans":
|
||||
|
||||
Reference in New Issue
Block a user