* iceberg: handle filer-backed compaction inputs * iceberg: preserve upsert creation times * iceberg: align compaction test schema * iceberg: tighten compact output assertion * iceberg: document compact output match * iceberg: clear stale chunks in upsert helper * iceberg: strengthen compaction integration coverage
983 lines
32 KiB
Go
983 lines
32 KiB
Go
// Package maintenance contains integration tests for the iceberg table
|
|
// maintenance plugin worker. Tests start a real weed mini cluster, create
|
|
// tables via the S3 Tables API, populate Iceberg metadata via the filer
|
|
// gRPC API, and then exercise the iceberg.Handler operations against the
|
|
// live filer.
|
|
package maintenance
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/rand"
|
|
"crypto/sha256"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"errors"
|
|
"flag"
|
|
"fmt"
|
|
"io"
|
|
"net"
|
|
"net/http"
|
|
"os"
|
|
"path"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/apache/iceberg-go"
|
|
"github.com/apache/iceberg-go/table"
|
|
"github.com/aws/aws-sdk-go-v2/aws"
|
|
v4 "github.com/aws/aws-sdk-go-v2/aws/signer/v4"
|
|
"github.com/aws/aws-sdk-go-v2/config"
|
|
"github.com/aws/aws-sdk-go-v2/credentials"
|
|
"github.com/aws/aws-sdk-go-v2/service/s3"
|
|
"github.com/parquet-go/parquet-go"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"google.golang.org/grpc"
|
|
"google.golang.org/grpc/credentials/insecure"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/command"
|
|
"github.com/seaweedfs/seaweedfs/weed/filer"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
icebergHandler "github.com/seaweedfs/seaweedfs/weed/plugin/worker/iceberg"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3tables"
|
|
)
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Cluster lifecycle (mirrors test/s3tables/table-buckets pattern)
|
|
// ---------------------------------------------------------------------------
|
|
|
|
type testCluster struct {
|
|
dataDir string
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
wg sync.WaitGroup
|
|
filerGrpcPort int
|
|
s3Port int
|
|
s3Endpoint string
|
|
isRunning bool
|
|
}
|
|
|
|
var shared *testCluster
|
|
|
|
func TestMain(m *testing.M) {
|
|
flag.Parse()
|
|
if testing.Short() {
|
|
os.Exit(m.Run())
|
|
}
|
|
|
|
testDir, err := os.MkdirTemp("", "seaweed-iceberg-maint-*")
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "SKIP: failed to create temp dir: %v\n", err)
|
|
os.Exit(0)
|
|
}
|
|
|
|
cluster, err := startCluster(testDir, nil)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "SKIP: failed to start cluster: %v\n", err)
|
|
os.RemoveAll(testDir)
|
|
os.Exit(0)
|
|
}
|
|
shared = cluster
|
|
|
|
code := m.Run()
|
|
shared.stop()
|
|
os.RemoveAll(testDir)
|
|
os.Exit(code)
|
|
}
|
|
|
|
func startCluster(testDir string, extraArgs []string) (*testCluster, error) {
|
|
ports, err := findPorts(10)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
masterPort, masterGrpc := ports[0], ports[1]
|
|
volumePort, volumeGrpc := ports[2], ports[3]
|
|
filerPort, filerGrpc := ports[4], ports[5]
|
|
s3Port, s3Grpc := ports[6], ports[7]
|
|
adminPort, adminGrpc := ports[8], ports[9]
|
|
|
|
_ = os.Remove(filepath.Join(testDir, "mini.options"))
|
|
|
|
// Empty security.toml disables JWT auth.
|
|
if err := os.WriteFile(filepath.Join(testDir, "security.toml"), []byte("# test\n"), 0644); err != nil {
|
|
return nil, err
|
|
}
|
|
if os.Getenv("AWS_ACCESS_KEY_ID") == "" {
|
|
os.Setenv("AWS_ACCESS_KEY_ID", "admin")
|
|
}
|
|
if os.Getenv("AWS_SECRET_ACCESS_KEY") == "" {
|
|
os.Setenv("AWS_SECRET_ACCESS_KEY", "admin")
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
c := &testCluster{
|
|
dataDir: testDir,
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
filerGrpcPort: filerGrpc,
|
|
s3Port: s3Port,
|
|
s3Endpoint: fmt.Sprintf("http://127.0.0.1:%d", s3Port),
|
|
}
|
|
|
|
c.wg.Add(1)
|
|
go func() {
|
|
defer c.wg.Done()
|
|
oldDir, _ := os.Getwd()
|
|
oldArgs := os.Args
|
|
defer func() { os.Chdir(oldDir); os.Args = oldArgs }()
|
|
os.Chdir(testDir)
|
|
|
|
args := []string{
|
|
"-dir=" + testDir,
|
|
"-master.dir=" + testDir,
|
|
"-master.port=" + strconv.Itoa(masterPort),
|
|
"-master.port.grpc=" + strconv.Itoa(masterGrpc),
|
|
"-volume.port=" + strconv.Itoa(volumePort),
|
|
"-volume.port.grpc=" + strconv.Itoa(volumeGrpc),
|
|
"-volume.port.public=" + strconv.Itoa(volumePort),
|
|
"-volume.publicUrl=127.0.0.1:" + strconv.Itoa(volumePort),
|
|
"-filer.port=" + strconv.Itoa(filerPort),
|
|
"-filer.port.grpc=" + strconv.Itoa(filerGrpc),
|
|
"-s3.port=" + strconv.Itoa(s3Port),
|
|
"-s3.port.grpc=" + strconv.Itoa(s3Grpc),
|
|
"-admin.port=" + strconv.Itoa(adminPort),
|
|
"-admin.port.grpc=" + strconv.Itoa(adminGrpc),
|
|
"-webdav.port=0",
|
|
"-admin.ui=false",
|
|
"-master.volumeSizeLimitMB=32",
|
|
"-ip=127.0.0.1",
|
|
"-master.peers=none",
|
|
"-s3.iam.readOnly=false",
|
|
}
|
|
args = append(args, extraArgs...)
|
|
os.Args = append([]string{"weed"}, args...)
|
|
glog.MaxSize = 1024 * 1024
|
|
for _, cmd := range command.Commands {
|
|
if cmd.Name() == "mini" && cmd.Run != nil {
|
|
cmd.Flag.Parse(os.Args[1:])
|
|
command.MiniClusterCtx = ctx
|
|
cmd.Run(cmd, cmd.Flag.Args())
|
|
command.MiniClusterCtx = nil
|
|
return
|
|
}
|
|
}
|
|
}()
|
|
|
|
if err := waitReady(c.s3Endpoint, 30*time.Second); err != nil {
|
|
cancel()
|
|
return nil, err
|
|
}
|
|
c.isRunning = true
|
|
return c, nil
|
|
}
|
|
|
|
func (c *testCluster) stop() {
|
|
if c.cancel != nil {
|
|
c.cancel()
|
|
}
|
|
if c.isRunning {
|
|
time.Sleep(500 * time.Millisecond)
|
|
}
|
|
done := make(chan struct{})
|
|
go func() { c.wg.Wait(); close(done) }()
|
|
select {
|
|
case <-done:
|
|
case <-time.After(2 * time.Second):
|
|
}
|
|
}
|
|
|
|
func (c *testCluster) filerConn(t *testing.T) (*grpc.ClientConn, filer_pb.SeaweedFilerClient) {
|
|
t.Helper()
|
|
addr := fmt.Sprintf("127.0.0.1:%d", c.filerGrpcPort)
|
|
conn, err := grpc.NewClient(addr, grpc.WithTransportCredentials(insecure.NewCredentials()))
|
|
require.NoError(t, err)
|
|
t.Cleanup(func() { conn.Close() })
|
|
return conn, filer_pb.NewSeaweedFilerClient(conn)
|
|
}
|
|
|
|
func findPorts(n int) ([]int, error) {
|
|
ls := make([]*net.TCPListener, n)
|
|
ps := make([]int, n)
|
|
for i := 0; i < n; i++ {
|
|
l, err := net.Listen("tcp", "127.0.0.1:0")
|
|
if err != nil {
|
|
for j := 0; j < i; j++ {
|
|
ls[j].Close()
|
|
}
|
|
return nil, err
|
|
}
|
|
ls[i] = l.(*net.TCPListener)
|
|
ps[i] = ls[i].Addr().(*net.TCPAddr).Port
|
|
}
|
|
for _, l := range ls {
|
|
l.Close()
|
|
}
|
|
return ps, nil
|
|
}
|
|
|
|
func waitReady(endpoint string, timeout time.Duration) error {
|
|
client := &http.Client{Timeout: 1 * time.Second}
|
|
deadline := time.Now().Add(timeout)
|
|
for time.Now().Before(deadline) {
|
|
resp, err := client.Get(endpoint)
|
|
if err == nil {
|
|
resp.Body.Close()
|
|
time.Sleep(500 * time.Millisecond)
|
|
return nil
|
|
}
|
|
time.Sleep(200 * time.Millisecond)
|
|
}
|
|
return fmt.Errorf("timeout waiting for %s", endpoint)
|
|
}
|
|
|
|
func randomSuffix() string {
|
|
b := make([]byte, 4)
|
|
rand.Read(b)
|
|
return fmt.Sprintf("%x", b)
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Helpers for populating Iceberg table state via filer gRPC
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func newTestSchema() *iceberg.Schema {
|
|
return iceberg.NewSchema(0,
|
|
iceberg.NestedField{ID: 1, Type: iceberg.PrimitiveTypes.Int64, Name: "id", Required: true},
|
|
iceberg.NestedField{ID: 2, Type: iceberg.PrimitiveTypes.String, Name: "name", Required: false},
|
|
iceberg.NestedField{ID: 3, Type: iceberg.PrimitiveTypes.String, Name: "payload", Required: false},
|
|
)
|
|
}
|
|
|
|
type compactRow struct {
|
|
ID int64 `parquet:"id"`
|
|
Name string `parquet:"name"`
|
|
Payload string `parquet:"payload"`
|
|
}
|
|
|
|
func buildParquetData(t *testing.T, prefix string, rowCount int) []byte {
|
|
t.Helper()
|
|
|
|
rows := make([]compactRow, rowCount)
|
|
for i := range rows {
|
|
rows[i] = compactRow{
|
|
ID: int64(i + 1),
|
|
Name: fmt.Sprintf("%s-%06d", prefix, i),
|
|
Payload: fmt.Sprintf("%s-%06d-%s", prefix, i, strings.Repeat(fmt.Sprintf("%04d", i%10000), 128)),
|
|
}
|
|
}
|
|
|
|
var buf bytes.Buffer
|
|
writer := parquet.NewGenericWriter[compactRow](&buf)
|
|
_, err := writer.Write(rows)
|
|
require.NoError(t, err)
|
|
require.NoError(t, writer.Close())
|
|
return buf.Bytes()
|
|
}
|
|
|
|
func buildMetadata(t *testing.T, snapshots []table.Snapshot) table.Metadata {
|
|
t.Helper()
|
|
schema := newTestSchema()
|
|
meta, err := table.NewMetadata(schema, iceberg.UnpartitionedSpec, table.UnsortedSortOrder, "s3://test/table", nil)
|
|
require.NoError(t, err)
|
|
if len(snapshots) == 0 {
|
|
return meta
|
|
}
|
|
|
|
// Iceberg validates that snapshot timestamps >= metadata lastUpdatedMs.
|
|
// Offset all timestamps so they're safely after the metadata was created.
|
|
baseMs := time.Now().UnixMilli() + 100
|
|
for i := range snapshots {
|
|
snapshots[i].TimestampMs = baseMs + int64(i)
|
|
}
|
|
|
|
builder, err := table.MetadataBuilderFromBase(meta, "s3://test/table")
|
|
require.NoError(t, err)
|
|
var lastID int64
|
|
for i := range snapshots {
|
|
s := snapshots[i]
|
|
require.NoError(t, builder.AddSnapshot(&s))
|
|
lastID = s.SnapshotID
|
|
}
|
|
require.NoError(t, builder.SetSnapshotRef(table.MainBranch, lastID, table.BranchRef))
|
|
result, err := builder.Build()
|
|
require.NoError(t, err)
|
|
return result
|
|
}
|
|
|
|
// s3put performs an S3-signed PUT request. When key is empty it creates a
|
|
// bucket; otherwise it uploads an object.
|
|
func s3put(t *testing.T, s3Endpoint, bucket, key string, body []byte) {
|
|
t.Helper()
|
|
urlPath := "/" + bucket
|
|
if key != "" {
|
|
urlPath += "/" + key
|
|
}
|
|
var reader *bytes.Reader
|
|
if body != nil {
|
|
reader = bytes.NewReader(body)
|
|
} else {
|
|
reader = bytes.NewReader(nil)
|
|
}
|
|
req, err := http.NewRequest(http.MethodPut, s3Endpoint+urlPath, reader)
|
|
require.NoError(t, err)
|
|
req.Header.Set("Host", req.URL.Host)
|
|
hash := sha256.Sum256(body)
|
|
err = v4.NewSigner().SignHTTP(context.Background(),
|
|
aws.Credentials{AccessKeyID: "admin", SecretAccessKey: "admin"},
|
|
req, hex.EncodeToString(hash[:]), "s3", "us-east-1", time.Now())
|
|
require.NoError(t, err)
|
|
resp, err := (&http.Client{Timeout: 10 * time.Second}).Do(req)
|
|
require.NoError(t, err)
|
|
defer resp.Body.Close()
|
|
if resp.StatusCode >= 400 {
|
|
b, _ := io.ReadAll(resp.Body)
|
|
require.FailNowf(t, "s3put failed", "s3put(%s/%s) → %d: %s", bucket, key, resp.StatusCode, string(b))
|
|
}
|
|
}
|
|
|
|
func newS3Client(t *testing.T, endpoint string) *s3.Client {
|
|
t.Helper()
|
|
|
|
cfg, err := config.LoadDefaultConfig(context.Background(),
|
|
config.WithRegion("us-east-1"),
|
|
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider("admin", "admin", "")),
|
|
)
|
|
require.NoError(t, err)
|
|
|
|
return s3.NewFromConfig(cfg, func(o *s3.Options) {
|
|
o.BaseEndpoint = aws.String(endpoint)
|
|
o.UsePathStyle = true
|
|
})
|
|
}
|
|
|
|
func s3putObject(t *testing.T, client *s3.Client, bucket, key string, body []byte) {
|
|
t.Helper()
|
|
|
|
ctx, cancel := context.WithTimeout(context.Background(), time.Minute)
|
|
defer cancel()
|
|
|
|
_, err := client.PutObject(ctx, &s3.PutObjectInput{
|
|
Bucket: aws.String(bucket),
|
|
Key: aws.String(key),
|
|
Body: bytes.NewReader(body),
|
|
})
|
|
require.NoError(t, err)
|
|
}
|
|
|
|
type testFilerClient struct {
|
|
client filer_pb.SeaweedFilerClient
|
|
}
|
|
|
|
func (c testFilerClient) WithFilerClient(_ bool, fn func(filer_pb.SeaweedFilerClient) error) error {
|
|
return fn(c.client)
|
|
}
|
|
|
|
func (c testFilerClient) AdjustedUrl(location *filer_pb.Location) string {
|
|
if location == nil {
|
|
return ""
|
|
}
|
|
if location.PublicUrl != "" {
|
|
return location.PublicUrl
|
|
}
|
|
return location.Url
|
|
}
|
|
|
|
func (c testFilerClient) GetDataCenter() string {
|
|
return ""
|
|
}
|
|
|
|
func readFile(t *testing.T, client filer_pb.SeaweedFilerClient, dir, name string) []byte {
|
|
t.Helper()
|
|
|
|
entry := lookupEntry(t, client, dir, name)
|
|
require.NotNil(t, entry, "readFile(%s, %s): entry missing", dir, name)
|
|
if len(entry.Content) > 0 || len(entry.Chunks) == 0 {
|
|
return entry.Content
|
|
}
|
|
|
|
reader := filer.NewFileReader(testFilerClient{client: client}, entry)
|
|
if closer, ok := reader.(io.Closer); ok {
|
|
defer closer.Close()
|
|
}
|
|
content, err := io.ReadAll(reader)
|
|
require.NoError(t, err, "readFile(%s, %s): read chunked content", dir, name)
|
|
return content
|
|
}
|
|
|
|
// createBucketViaS3 creates a regular S3 bucket via the S3 PUT Bucket API.
|
|
// populateTableViaFiler creates the Iceberg directory structure, metadata xattr,
|
|
// manifests, and data files for a table directly in the filer.
|
|
// The table bucket, namespace, and table must already exist (created via S3
|
|
// Tables API).
|
|
func populateTableViaFiler(
|
|
t *testing.T,
|
|
client filer_pb.SeaweedFilerClient,
|
|
s3Endpoint, bucketName, namespace, tableName string,
|
|
snapshots []table.Snapshot,
|
|
) table.Metadata {
|
|
t.Helper()
|
|
ctx := context.Background()
|
|
meta := buildMetadata(t, snapshots)
|
|
fullJSON, err := json.Marshal(meta)
|
|
require.NoError(t, err)
|
|
|
|
tablePath := path.Join(namespace, tableName)
|
|
bucketsPath := s3tables.TablesPath
|
|
tableFilerPath := path.Join(bucketsPath, bucketName, tablePath)
|
|
metaDir := path.Join(tableFilerPath, "metadata")
|
|
dataDir := path.Join(tableFilerPath, "data")
|
|
|
|
// Build the table metadata xattr
|
|
internalMeta := map[string]interface{}{
|
|
"metadataVersion": 1,
|
|
"metadataLocation": "metadata/v1.metadata.json",
|
|
"metadata": map[string]interface{}{
|
|
"fullMetadata": json.RawMessage(fullJSON),
|
|
},
|
|
}
|
|
xattr, err := json.Marshal(internalMeta)
|
|
require.NoError(t, err)
|
|
|
|
// Create the S3 bucket via PUT, then create the directory tree via S3 PUT
|
|
// object with zero-byte content (the filer creates intermediate dirs).
|
|
s3put(t, s3Endpoint, bucketName, "", nil) // create bucket
|
|
s3put(t, s3Endpoint, bucketName, namespace+"/.dir", []byte{}) // create ns dir
|
|
s3put(t, s3Endpoint, bucketName, tablePath+"/.dir", []byte{}) // create table dir
|
|
s3put(t, s3Endpoint, bucketName, tablePath+"/metadata/.dir", []byte{}) // create metadata dir
|
|
s3put(t, s3Endpoint, bucketName, tablePath+"/data/.dir", []byte{}) // create data dir
|
|
|
|
// Now set the table bucket and table xattrs via filer gRPC.
|
|
// Mark bucket as table bucket.
|
|
bucketEntry := lookupEntry(t, client, bucketsPath, bucketName)
|
|
require.NotNil(t, bucketEntry, "bucket should exist after S3 PUT")
|
|
if bucketEntry.Extended == nil {
|
|
bucketEntry.Extended = make(map[string][]byte)
|
|
}
|
|
bucketEntry.Extended[s3tables.ExtendedKeyTableBucket] = []byte("true")
|
|
_, err = client.UpdateEntry(ctx, &filer_pb.UpdateEntryRequest{
|
|
Directory: bucketsPath, Entry: bucketEntry,
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
// Set table metadata xattr.
|
|
nsDir := path.Join(bucketsPath, bucketName, namespace)
|
|
tableEntry := lookupEntry(t, client, nsDir, tableName)
|
|
require.NotNil(t, tableEntry, "table dir should exist after S3 PUT")
|
|
if tableEntry.Extended == nil {
|
|
tableEntry.Extended = make(map[string][]byte)
|
|
}
|
|
tableEntry.Extended[s3tables.ExtendedKeyMetadata] = xattr
|
|
_, err = client.UpdateEntry(ctx, &filer_pb.UpdateEntryRequest{
|
|
Directory: nsDir, Entry: tableEntry,
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
// Write manifest + manifest list + data file for each snapshot
|
|
schema := meta.CurrentSchema()
|
|
spec := meta.PartitionSpec()
|
|
version := meta.Version()
|
|
|
|
for _, snap := range snapshots {
|
|
if snap.ManifestList == "" {
|
|
continue
|
|
}
|
|
snapID := snap.SnapshotID
|
|
dataFileName := fmt.Sprintf("snap-%d-data.parquet", snapID)
|
|
dfBuilder, err := iceberg.NewDataFileBuilder(
|
|
spec, iceberg.EntryContentData,
|
|
path.Join("data", dataFileName), iceberg.ParquetFile,
|
|
map[int]any{}, nil, nil, 10, 4096,
|
|
)
|
|
require.NoError(t, err)
|
|
|
|
entry := iceberg.NewManifestEntry(
|
|
iceberg.EntryStatusADDED, &snapID, nil, nil, dfBuilder.Build(),
|
|
)
|
|
|
|
// Manifest
|
|
manifestName := fmt.Sprintf("manifest-%d.avro", snapID)
|
|
var manifestBuf bytes.Buffer
|
|
mf, err := iceberg.WriteManifest(
|
|
path.Join("metadata", manifestName), &manifestBuf,
|
|
version, spec, schema, snapID, []iceberg.ManifestEntry{entry},
|
|
)
|
|
require.NoError(t, err)
|
|
writeFile(t, ctx, client, metaDir, manifestName, manifestBuf.Bytes())
|
|
|
|
// Manifest list
|
|
mlName := path.Base(snap.ManifestList)
|
|
var mlBuf bytes.Buffer
|
|
parent := snap.ParentSnapshotID
|
|
seqNum := snap.SequenceNumber
|
|
require.NoError(t, iceberg.WriteManifestList(
|
|
version, &mlBuf, snapID, parent, &seqNum, 0,
|
|
[]iceberg.ManifestFile{mf},
|
|
))
|
|
writeFile(t, ctx, client, metaDir, mlName, mlBuf.Bytes())
|
|
|
|
// Data file (dummy content)
|
|
writeFile(t, ctx, client, dataDir, dataFileName, []byte("fake-parquet"))
|
|
}
|
|
|
|
// Wait for snapshot timestamps (set slightly in the future by
|
|
// buildMetadata) to move into the past so expiration checks work.
|
|
time.Sleep(200 * time.Millisecond)
|
|
|
|
return meta
|
|
}
|
|
|
|
func writeFile(t *testing.T, ctx context.Context, client filer_pb.SeaweedFilerClient, dir, name string, content []byte) {
|
|
t.Helper()
|
|
resp, err := client.CreateEntry(ctx, &filer_pb.CreateEntryRequest{
|
|
Directory: dir,
|
|
Entry: &filer_pb.Entry{
|
|
Name: name,
|
|
Attributes: &filer_pb.FuseAttributes{
|
|
Mtime: time.Now().Unix(), Crtime: time.Now().Unix(),
|
|
FileMode: uint32(0644), FileSize: uint64(len(content)),
|
|
},
|
|
Content: content,
|
|
},
|
|
})
|
|
require.NoError(t, err, "writeFile(%s, %s): rpc error", dir, name)
|
|
require.Empty(t, resp.Error, "writeFile(%s, %s): resp error", dir, name)
|
|
}
|
|
|
|
func upsertFile(t *testing.T, ctx context.Context, client filer_pb.SeaweedFilerClient, dir, name string, content []byte) {
|
|
t.Helper()
|
|
|
|
entry := lookupEntry(t, client, dir, name)
|
|
if entry == nil {
|
|
writeFile(t, ctx, client, dir, name, content)
|
|
return
|
|
}
|
|
if entry.Attributes == nil {
|
|
entry.Attributes = &filer_pb.FuseAttributes{}
|
|
}
|
|
entry.Content = content
|
|
entry.Chunks = nil
|
|
entry.Attributes.Mtime = time.Now().Unix()
|
|
entry.Attributes.FileMode = uint32(0644)
|
|
entry.Attributes.FileSize = uint64(len(content))
|
|
resp, err := client.UpdateEntry(ctx, &filer_pb.UpdateEntryRequest{
|
|
Directory: dir,
|
|
Entry: entry,
|
|
})
|
|
require.NoError(t, err, "upsertFile(%s, %s): rpc error", dir, name)
|
|
require.NotNil(t, resp, "upsertFile(%s, %s): nil response", dir, name)
|
|
}
|
|
|
|
func lookupEntry(t *testing.T, client filer_pb.SeaweedFilerClient, dir, name string) *filer_pb.Entry {
|
|
t.Helper()
|
|
resp, err := filer_pb.LookupEntry(context.Background(), client, &filer_pb.LookupDirectoryEntryRequest{
|
|
Directory: dir, Name: name,
|
|
})
|
|
if err != nil {
|
|
if errors.Is(err, filer_pb.ErrNotFound) {
|
|
return nil
|
|
}
|
|
require.NoError(t, err, "lookupEntry(%s, %s): unexpected error", dir, name)
|
|
}
|
|
return resp.Entry
|
|
}
|
|
|
|
// ---------------------------------------------------------------------------
|
|
// Integration tests
|
|
// ---------------------------------------------------------------------------
|
|
|
|
func TestIcebergMaintenanceIntegration(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("skipping integration test in short mode")
|
|
}
|
|
|
|
t.Run("ExpireSnapshots", testExpireSnapshots)
|
|
t.Run("CompactDataFiles", testCompactDataFiles)
|
|
t.Run("RemoveOrphans", testRemoveOrphans)
|
|
t.Run("RewriteManifests", testRewriteManifests)
|
|
t.Run("FullMaintenanceCycle", testFullMaintenanceCycle)
|
|
}
|
|
|
|
func testExpireSnapshots(t *testing.T) {
|
|
_, client := shared.filerConn(t)
|
|
suffix := randomSuffix()
|
|
bucket := "maint-expire-" + suffix
|
|
ns := "ns"
|
|
tbl := "tbl"
|
|
|
|
now := time.Now().UnixMilli()
|
|
snapshots := []table.Snapshot{
|
|
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro"},
|
|
{SnapshotID: 2, TimestampMs: now + 1, ManifestList: "metadata/snap-2.avro"},
|
|
{SnapshotID: 3, TimestampMs: now + 2, ManifestList: "metadata/snap-3.avro"},
|
|
}
|
|
populateTableViaFiler(t, client, shared.s3Endpoint, bucket, ns, tbl, snapshots)
|
|
|
|
handler := icebergHandler.NewHandler(nil)
|
|
config := icebergHandler.Config{
|
|
SnapshotRetentionHours: 0, // instant expiry — everything eligible
|
|
MaxSnapshotsToKeep: 1, // keep only the current snapshot
|
|
MaxCommitRetries: 3,
|
|
}
|
|
|
|
result, err := handler.ExpireSnapshots(context.Background(), client, bucket, path.Join(ns, tbl), config)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "expired")
|
|
t.Logf("ExpireSnapshots result: %s", result)
|
|
|
|
// Verify metadata was updated
|
|
entry := lookupEntry(t, client, path.Join(s3tables.TablesPath, bucket, ns), tbl)
|
|
require.NotNil(t, entry)
|
|
xattr := entry.Extended[s3tables.ExtendedKeyMetadata]
|
|
require.NotEmpty(t, xattr)
|
|
|
|
// Parse updated metadata to verify snapshot count
|
|
var internalMeta struct {
|
|
MetadataVersion int `json:"metadataVersion"`
|
|
}
|
|
require.NoError(t, json.Unmarshal(xattr, &internalMeta))
|
|
assert.Greater(t, internalMeta.MetadataVersion, 1, "metadata version should have been incremented")
|
|
}
|
|
|
|
func testCompactDataFiles(t *testing.T) {
|
|
_, client := shared.filerConn(t)
|
|
suffix := randomSuffix()
|
|
bucket := "maint-compact-" + suffix
|
|
ns := "ns"
|
|
tbl := "tbl"
|
|
|
|
now := time.Now().UnixMilli()
|
|
snapshots := []table.Snapshot{
|
|
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro", SequenceNumber: 1},
|
|
}
|
|
meta := populateTableViaFiler(t, client, shared.s3Endpoint, bucket, ns, tbl, snapshots)
|
|
|
|
ctx := context.Background()
|
|
tablePath := path.Join(ns, tbl)
|
|
dataDir := path.Join(s3tables.TablesPath, bucket, tablePath, "data")
|
|
metaDir := path.Join(s3tables.TablesPath, bucket, tablePath, "metadata")
|
|
|
|
type parquetFile struct {
|
|
name string
|
|
recordCount int64
|
|
fileSize int64
|
|
}
|
|
|
|
s3Client := newS3Client(t, shared.s3Endpoint)
|
|
var files []parquetFile
|
|
for i := 1; i <= 3; i++ {
|
|
name := fmt.Sprintf("compact-input-%d.parquet", i)
|
|
content := buildParquetData(t, fmt.Sprintf("compact-%d", i), 2000)
|
|
s3putObject(t, s3Client, bucket, path.Join(tablePath, "data", name), content)
|
|
|
|
entry := lookupEntry(t, client, dataDir, name)
|
|
require.NotNil(t, entry, "uploaded parquet file should exist")
|
|
require.NotEmpty(t, entry.Chunks, "uploaded parquet file should be chunk-backed")
|
|
|
|
files = append(files, parquetFile{
|
|
name: name,
|
|
recordCount: 2000,
|
|
fileSize: int64(len(content)),
|
|
})
|
|
}
|
|
|
|
schema := meta.CurrentSchema()
|
|
spec := meta.PartitionSpec()
|
|
version := meta.Version()
|
|
snapID := snapshots[0].SnapshotID
|
|
|
|
entries := make([]iceberg.ManifestEntry, 0, len(files))
|
|
for _, file := range files {
|
|
dfBuilder, err := iceberg.NewDataFileBuilder(
|
|
spec,
|
|
iceberg.EntryContentData,
|
|
path.Join("data", file.name),
|
|
iceberg.ParquetFile,
|
|
map[int]any{},
|
|
nil, nil,
|
|
file.recordCount,
|
|
file.fileSize,
|
|
)
|
|
require.NoError(t, err)
|
|
entries = append(entries, iceberg.NewManifestEntry(
|
|
iceberg.EntryStatusADDED, &snapID, nil, nil, dfBuilder.Build(),
|
|
))
|
|
}
|
|
|
|
manifestName := "manifest-compact.avro"
|
|
var manifestBuf bytes.Buffer
|
|
manifest, err := iceberg.WriteManifest(
|
|
path.Join("metadata", manifestName), &manifestBuf,
|
|
version, spec, schema, snapID, entries,
|
|
)
|
|
require.NoError(t, err)
|
|
writeFile(t, ctx, client, metaDir, manifestName, manifestBuf.Bytes())
|
|
|
|
var manifestListBuf bytes.Buffer
|
|
seqNum := snapshots[0].SequenceNumber
|
|
require.NoError(t, iceberg.WriteManifestList(
|
|
version, &manifestListBuf, snapID, snapshots[0].ParentSnapshotID, &seqNum, 0,
|
|
[]iceberg.ManifestFile{manifest},
|
|
))
|
|
upsertFile(t, ctx, client, metaDir, path.Base(snapshots[0].ManifestList), manifestListBuf.Bytes())
|
|
|
|
handler := icebergHandler.NewHandler(nil)
|
|
config := icebergHandler.Config{
|
|
TargetFileSizeBytes: 16 * 1024 * 1024,
|
|
MinInputFiles: 2,
|
|
MaxCommitRetries: 3,
|
|
}
|
|
|
|
result, err := handler.CompactDataFiles(ctx, client, bucket, tablePath, config)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "compacted")
|
|
t.Logf("CompactDataFiles result: %s", result)
|
|
|
|
var compacted *filer_pb.Entry
|
|
listErr := filer_pb.SeaweedList(ctx, client, dataDir, "", func(entry *filer_pb.Entry, isLast bool) error {
|
|
// Compacted output uses compact-{snapID}-{newSnapID}-{binIdx}.parquet.
|
|
// The test inputs use compact-input-{n}.parquet and must not satisfy this check.
|
|
if strings.HasPrefix(entry.Name, "compact-") &&
|
|
!strings.HasPrefix(entry.Name, "compact-input-") &&
|
|
strings.HasSuffix(entry.Name, ".parquet") {
|
|
compacted = entry
|
|
}
|
|
return nil
|
|
}, "", false, 1000)
|
|
require.NoError(t, listErr)
|
|
require.NotNil(t, compacted, "compaction should create a merged parquet file")
|
|
|
|
tableEntry := lookupEntry(t, client, path.Join(s3tables.TablesPath, bucket, ns), tbl)
|
|
require.NotNil(t, tableEntry)
|
|
xattr := tableEntry.Extended[s3tables.ExtendedKeyMetadata]
|
|
require.NotEmpty(t, xattr)
|
|
|
|
var internalMeta struct {
|
|
MetadataVersion int `json:"metadataVersion"`
|
|
MetadataLocation string `json:"metadataLocation,omitempty"`
|
|
}
|
|
require.NoError(t, json.Unmarshal(xattr, &internalMeta))
|
|
assert.Greater(t, internalMeta.MetadataVersion, 1, "metadata version should advance after compaction")
|
|
|
|
metadataName := path.Base(internalMeta.MetadataLocation)
|
|
require.NotEmpty(t, metadataName, "compaction should point table metadata at a committed metadata file")
|
|
metadataBytes := readFile(t, client, metaDir, metadataName)
|
|
committedMeta, err := table.ParseMetadataBytes(metadataBytes)
|
|
require.NoError(t, err)
|
|
|
|
currentSnap := committedMeta.CurrentSnapshot()
|
|
require.NotNil(t, currentSnap, "committed metadata should include a current snapshot")
|
|
assert.Equal(t, snapshots[0].SequenceNumber+1, currentSnap.SequenceNumber, "compaction should advance the snapshot sequence number")
|
|
|
|
manifestListName := path.Base(currentSnap.ManifestList)
|
|
require.NotEmpty(t, manifestListName, "committed snapshot should reference a manifest list")
|
|
manifestListBytes := readFile(t, client, metaDir, manifestListName)
|
|
manifestFiles, err := iceberg.ReadManifestList(bytes.NewReader(manifestListBytes))
|
|
require.NoError(t, err)
|
|
require.Len(t, manifestFiles, 1, "compaction should commit a single replacement manifest")
|
|
assert.Equal(t, snapshots[0].SequenceNumber+1, manifestFiles[0].SequenceNum(), "new manifest should use the committed snapshot sequence number")
|
|
assert.Equal(t, snapshots[0].SequenceNumber+1, manifestFiles[0].MinSequenceNum(), "compaction manifest should inherit the committed sequence number for added data")
|
|
|
|
manifestName = path.Base(manifestFiles[0].FilePath())
|
|
manifestBytes := readFile(t, client, metaDir, manifestName)
|
|
manifestEntries, err := iceberg.ReadManifest(manifestFiles[0], bytes.NewReader(manifestBytes), false)
|
|
require.NoError(t, err)
|
|
require.Len(t, manifestEntries, len(files)+1, "compaction should replace the input files with one merged output")
|
|
|
|
deletedPaths := make(map[string]struct{}, len(files))
|
|
addedPaths := make(map[string]struct{})
|
|
for _, entry := range manifestEntries {
|
|
switch entry.Status() {
|
|
case iceberg.EntryStatusADDED:
|
|
addedPaths[entry.DataFile().FilePath()] = struct{}{}
|
|
assert.Equal(t, snapshots[0].SequenceNumber+1, entry.SequenceNum(), "added entries should inherit the new snapshot sequence number")
|
|
fileSeqNum := entry.FileSequenceNum()
|
|
require.NotNil(t, fileSeqNum, "added entries should carry a file sequence number")
|
|
assert.Equal(t, snapshots[0].SequenceNumber+1, *fileSeqNum)
|
|
case iceberg.EntryStatusDELETED:
|
|
deletedPaths[entry.DataFile().FilePath()] = struct{}{}
|
|
assert.Equal(t, snapshots[0].SequenceNumber, entry.SequenceNum(), "deleted entries should preserve the original data sequence number")
|
|
fileSeqNum := entry.FileSequenceNum()
|
|
require.NotNil(t, fileSeqNum, "deleted entries should preserve file sequence numbers")
|
|
assert.Equal(t, snapshots[0].SequenceNumber, *fileSeqNum)
|
|
default:
|
|
t.Fatalf("unexpected manifest entry status %v for %s", entry.Status(), entry.DataFile().FilePath())
|
|
}
|
|
}
|
|
|
|
require.Len(t, addedPaths, 1, "compaction should add exactly one merged parquet file")
|
|
assert.Contains(t, addedPaths, path.Join("data", compacted.Name))
|
|
require.Len(t, deletedPaths, len(files), "compaction should delete every original small input file")
|
|
for _, file := range files {
|
|
assert.Contains(t, deletedPaths, path.Join("data", file.name))
|
|
}
|
|
}
|
|
|
|
func testRemoveOrphans(t *testing.T) {
|
|
_, client := shared.filerConn(t)
|
|
suffix := randomSuffix()
|
|
bucket := "maint-orphan-" + suffix
|
|
ns := "ns"
|
|
tbl := "tbl"
|
|
|
|
now := time.Now().UnixMilli()
|
|
snapshots := []table.Snapshot{
|
|
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro"},
|
|
}
|
|
populateTableViaFiler(t, client, shared.s3Endpoint, bucket, ns, tbl, snapshots)
|
|
|
|
ctx := context.Background()
|
|
tablePath := path.Join(ns, tbl)
|
|
tableFilerPath := path.Join(s3tables.TablesPath, bucket, tablePath)
|
|
dataDir := path.Join(tableFilerPath, "data")
|
|
metaDir := path.Join(tableFilerPath, "metadata")
|
|
|
|
// Create orphan files (old enough to be removed)
|
|
oldTime := time.Now().Add(-200 * time.Hour).Unix()
|
|
writeOrphan := func(dir, name string) {
|
|
resp, err := client.CreateEntry(ctx, &filer_pb.CreateEntryRequest{
|
|
Directory: dir,
|
|
Entry: &filer_pb.Entry{
|
|
Name: name,
|
|
Attributes: &filer_pb.FuseAttributes{
|
|
Mtime: oldTime, Crtime: oldTime,
|
|
FileMode: uint32(0644), FileSize: 100,
|
|
},
|
|
Content: []byte("orphan"),
|
|
},
|
|
})
|
|
require.NoError(t, err, "writeOrphan(%s, %s): rpc error", dir, name)
|
|
require.Empty(t, resp.Error, "writeOrphan(%s, %s): resp error", dir, name)
|
|
}
|
|
writeOrphan(dataDir, "orphan-data.parquet")
|
|
writeOrphan(metaDir, "orphan-meta.avro")
|
|
|
|
// Create a recent orphan (should NOT be removed)
|
|
writeFile(t, ctx, client, dataDir, "recent-orphan.parquet", []byte("new"))
|
|
|
|
handler := icebergHandler.NewHandler(nil)
|
|
config := icebergHandler.Config{
|
|
OrphanOlderThanHours: 72,
|
|
MaxCommitRetries: 3,
|
|
}
|
|
|
|
result, err := handler.RemoveOrphans(ctx, client, bucket, tablePath, config)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "removed")
|
|
t.Logf("RemoveOrphans result: %s", result)
|
|
|
|
// Verify orphans were removed
|
|
assert.Nil(t, lookupEntry(t, client, dataDir, "orphan-data.parquet"), "old orphan data file should be deleted")
|
|
assert.Nil(t, lookupEntry(t, client, metaDir, "orphan-meta.avro"), "old orphan meta file should be deleted")
|
|
|
|
// Verify recent orphan was kept
|
|
assert.NotNil(t, lookupEntry(t, client, dataDir, "recent-orphan.parquet"), "recent orphan should be kept")
|
|
}
|
|
|
|
func testRewriteManifests(t *testing.T) {
|
|
_, client := shared.filerConn(t)
|
|
suffix := randomSuffix()
|
|
bucket := "maint-rewrite-" + suffix
|
|
ns := "ns"
|
|
tbl := "tbl"
|
|
|
|
// Each snapshot gets its own manifest list with one manifest. The current
|
|
// snapshot (latest) therefore has only 1 manifest — below the rewrite
|
|
// threshold. This tests the threshold check against a real filer.
|
|
now := time.Now().UnixMilli()
|
|
snapshots := []table.Snapshot{
|
|
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro"},
|
|
}
|
|
populateTableViaFiler(t, client, shared.s3Endpoint, bucket, ns, tbl, snapshots)
|
|
|
|
handler := icebergHandler.NewHandler(nil)
|
|
config := icebergHandler.Config{
|
|
MinManifestsToRewrite: 5, // threshold higher than 1 manifest
|
|
MaxCommitRetries: 3,
|
|
}
|
|
|
|
tablePath := path.Join(ns, tbl)
|
|
result, err := handler.RewriteManifests(context.Background(), client, bucket, tablePath, config)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "below threshold")
|
|
t.Logf("RewriteManifests result: %s", result)
|
|
}
|
|
|
|
func testFullMaintenanceCycle(t *testing.T) {
|
|
_, client := shared.filerConn(t)
|
|
suffix := randomSuffix()
|
|
bucket := "maint-full-" + suffix
|
|
ns := "ns"
|
|
tbl := "tbl"
|
|
|
|
now := time.Now().UnixMilli()
|
|
snapshots := []table.Snapshot{
|
|
{SnapshotID: 1, TimestampMs: now, ManifestList: "metadata/snap-1.avro"},
|
|
{SnapshotID: 2, TimestampMs: now + 1, ManifestList: "metadata/snap-2.avro"},
|
|
{SnapshotID: 3, TimestampMs: now + 2, ManifestList: "metadata/snap-3.avro"},
|
|
}
|
|
populateTableViaFiler(t, client, shared.s3Endpoint, bucket, ns, tbl, snapshots)
|
|
|
|
ctx := context.Background()
|
|
tablePath := path.Join(ns, tbl)
|
|
|
|
// Add an orphan
|
|
dataDir := path.Join(s3tables.TablesPath, bucket, tablePath, "data")
|
|
oldTime := time.Now().Add(-200 * time.Hour).Unix()
|
|
orphanResp, err := client.CreateEntry(ctx, &filer_pb.CreateEntryRequest{
|
|
Directory: dataDir,
|
|
Entry: &filer_pb.Entry{
|
|
Name: "orphan.parquet",
|
|
Attributes: &filer_pb.FuseAttributes{
|
|
Mtime: oldTime, Crtime: oldTime,
|
|
FileMode: uint32(0644), FileSize: 100,
|
|
},
|
|
Content: []byte("orphan"),
|
|
},
|
|
})
|
|
require.NoError(t, err)
|
|
require.Empty(t, orphanResp.Error)
|
|
|
|
handler := icebergHandler.NewHandler(nil)
|
|
|
|
// Step 1: Expire snapshots
|
|
expireConfig := icebergHandler.Config{
|
|
SnapshotRetentionHours: 0, // instant expiry
|
|
MaxSnapshotsToKeep: 1,
|
|
MaxCommitRetries: 3,
|
|
}
|
|
result, err := handler.ExpireSnapshots(ctx, client, bucket, tablePath, expireConfig)
|
|
require.NoError(t, err)
|
|
assert.Contains(t, result, "expired")
|
|
t.Logf("Step 1 (expire): %s", result)
|
|
|
|
// Step 2: Remove orphans
|
|
orphanConfig := icebergHandler.Config{
|
|
OrphanOlderThanHours: 72,
|
|
MaxCommitRetries: 3,
|
|
}
|
|
result, err = handler.RemoveOrphans(ctx, client, bucket, tablePath, orphanConfig)
|
|
require.NoError(t, err)
|
|
t.Logf("Step 2 (orphans): %s", result)
|
|
// The orphan and the unreferenced files from expired snapshots should be gone
|
|
assert.Contains(t, result, "removed")
|
|
|
|
// Step 3: Verify metadata is consistent
|
|
entry := lookupEntry(t, client, path.Join(s3tables.TablesPath, bucket, ns), tbl)
|
|
require.NotNil(t, entry)
|
|
xattr := entry.Extended[s3tables.ExtendedKeyMetadata]
|
|
require.NotEmpty(t, xattr)
|
|
|
|
var internalMeta struct {
|
|
MetadataVersion int `json:"metadataVersion"`
|
|
}
|
|
require.NoError(t, json.Unmarshal(xattr, &internalMeta))
|
|
assert.Greater(t, internalMeta.MetadataVersion, 1, "metadata version should have advanced through the cycle")
|
|
t.Logf("Final metadata version: %d", internalMeta.MetadataVersion)
|
|
}
|