Files
seaweedFS/test/s3tables/catalog/iceberg_catalog_test.go
Chris Lu 0d8588e3ae S3: Implement IAM defaults and STS signing key fallback (#8348)
* S3: Implement IAM defaults and STS signing key fallback logic

* S3: Refactor startup order to init SSE-S3 key manager before IAM

* S3: Derive STS signing key from KEK using HKDF for security isolation

* S3: Document STS signing key fallback in security.toml

* fix(s3api): refine anonymous access logic and secure-by-default behavior

- Initialize anonymous identity by default in `NewIdentityAccessManagement` to prevent nil pointer exceptions.
- Ensure `ReplaceS3ApiConfiguration` preserves the anonymous identity if not present in the new configuration.
- Update `NewIdentityAccessManagement` signature to accept `filerClient`.
- In legacy mode (no policy engine), anonymous defaults to Deny (no actions), preserving secure-by-default behavior.
- Use specific `LookupAnonymous` method instead of generic map lookup.
- Update tests to accommodate signature changes and verify improved anonymous handling.

* feat(s3api): make IAM configuration optional

- Start S3 API server without a configuration file if `EnableIam` option is set.
- Default to `Allow` effect for policy engine when no configuration is provided (Zero-Config mode).
- Handle empty configuration path gracefully in `loadIAMManagerFromConfig`.
- Add integration test `iam_optional_test.go` to verify empty config behavior.

* fix(iamapi): fix signature mismatch in NewIdentityAccessManagementWithStore

* fix(iamapi): properly initialize FilerClient instead of passing nil

* fix(iamapi): properly initialize filer client for IAM management

- Instead of passing `nil`, construct a `wdclient.FilerClient` using the provided `Filers` addresses.
- Ensure `NewIdentityAccessManagementWithStore` receives a valid `filerClient` to avoid potential nil pointer dereferences or limited functionality.

* clean: remove dead code in s3api_server.go

* refactor(s3api): improve IAM initialization, safety and anonymous access security

* fix(s3api): ensure IAM config loads from filer after client init

* fix(s3): resolve test failures in integration, CORS, and tagging tests

- Fix CORS tests by providing explicit anonymous permissions config
- Fix S3 integration tests by setting admin credentials in init
- Align tagging test credentials in CI with IAM defaults
- Added goroutine to retry IAM config load in iamapi server

* fix(s3): allow anonymous access to health targets and S3 Tables when identities are present

* fix(ci): use /healthz for Caddy health check in awscli tests

* iam, s3api: expose DefaultAllow from IAM and Policy Engine

This allows checking the global "Open by Default" configuration from
other components like S3 Tables.

* s3api/s3tables: support DefaultAllow in permission logic and handler

Updated CheckPermissionWithContext to respect the DefaultAllow flag
in PolicyContext. This enables "Open by Default" behavior for
unauthenticated access in zero-config environments. Added a targeted
unit test to verify the logic.

* s3api/s3tables: propagate DefaultAllow through handlers

Propagated the DefaultAllow flag to individual handlers for
namespaces, buckets, tables, policies, and tagging. This ensures
consistent "Open by Default" behavior across all S3 Tables API
endpoints.

* s3api: wire up DefaultAllow for S3 Tables API initialization

Updated registerS3TablesRoutes to query the global IAM configuration
and set the DefaultAllow flag on the S3 Tables API server. This
completes the end-to-end propagation required for anonymous access in
zero-config environments. Added a SetDefaultAllow method to
S3TablesApiServer to facilitate this.

* s3api: fix tests by adding DefaultAllow to mock IAM integrations

The IAMIntegration interface was updated to include DefaultAllow(),
breaking several mock implementations in tests. This commit fixes
the build errors by adding the missing method to the mocks.

* env

* ensure ports

* env

* env

* fix default allow

* add one more test using non-anonymous user

* debug

* add more debug

* less logs
2026-02-16 13:59:13 -08:00

586 lines
17 KiB
Go

// Package catalog provides integration tests for the Iceberg REST Catalog API.
// These tests use DuckDB running in Docker to verify catalog operations.
package catalog
import (
"bytes"
"context"
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"os"
"os/exec"
"path/filepath"
"strings"
"testing"
"time"
)
// TestEnvironment contains the test environment configuration
type TestEnvironment struct {
seaweedDir string
weedBinary string
dataDir string
s3Port int
s3GrpcPort int
icebergPort int
masterPort int
masterGrpcPort int
filerPort int
filerGrpcPort int
volumePort int
volumeGrpcPort int
weedProcess *exec.Cmd
weedCancel context.CancelFunc
dockerAvailable bool
}
// hasDocker checks if Docker is available
func hasDocker() bool {
cmd := exec.Command("docker", "version")
return cmd.Run() == nil
}
// getFreePort returns an available ephemeral port and its listener
func getFreePort() (int, net.Listener, error) {
listener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
return 0, nil, err
}
addr := listener.Addr().(*net.TCPAddr)
return addr.Port, listener, nil
}
// NewTestEnvironment creates a new test environment
func NewTestEnvironment(t *testing.T) *TestEnvironment {
t.Helper()
// Find the SeaweedFS root directory
wd, err := os.Getwd()
if err != nil {
t.Fatalf("Failed to get working directory: %v", err)
}
// Navigate up to find the SeaweedFS root (contains go.mod)
seaweedDir := wd
for i := 0; i < 5; i++ {
if _, err := os.Stat(filepath.Join(seaweedDir, "go.mod")); err == nil {
break
}
seaweedDir = filepath.Dir(seaweedDir)
}
// Check for weed binary
weedBinary := filepath.Join(seaweedDir, "weed", "weed")
if _, err := os.Stat(weedBinary); os.IsNotExist(err) {
// Try system PATH
weedBinary = "weed"
if _, err := exec.LookPath(weedBinary); err != nil {
t.Skip("weed binary not found, skipping integration test")
}
}
// Create temporary data directory
dataDir, err := os.MkdirTemp("", "seaweed-iceberg-test-*")
if err != nil {
t.Fatalf("Failed to create temp dir: %v", err)
}
// Allocate free ephemeral ports for each service
var listeners []net.Listener
defer func() {
for _, l := range listeners {
l.Close()
}
}()
var l net.Listener
s3Port, l, err := getFreePort()
if err != nil {
t.Fatalf("Failed to get free port for S3: %v", err)
}
listeners = append(listeners, l)
icebergPort, l, err := getFreePort()
if err != nil {
t.Fatalf("Failed to get free port for Iceberg: %v", err)
}
listeners = append(listeners, l)
s3GrpcPort, l, err := getFreePort()
if err != nil {
t.Fatalf("Failed to get free port for S3 gRPC: %v", err)
}
listeners = append(listeners, l)
masterPort, l, err := getFreePort()
if err != nil {
t.Fatalf("Failed to get free port for Master: %v", err)
}
listeners = append(listeners, l)
masterGrpcPort, l, err := getFreePort()
if err != nil {
t.Fatalf("Failed to get free port for Master gRPC: %v", err)
}
listeners = append(listeners, l)
filerPort, l, err := getFreePort()
if err != nil {
t.Fatalf("Failed to get free port for Filer: %v", err)
}
listeners = append(listeners, l)
filerGrpcPort, l, err := getFreePort()
if err != nil {
t.Fatalf("Failed to get free port for Filer gRPC: %v", err)
}
listeners = append(listeners, l)
volumePort, l, err := getFreePort()
if err != nil {
t.Fatalf("Failed to get free port for Volume: %v", err)
}
listeners = append(listeners, l)
volumeGrpcPort, l, err := getFreePort()
if err != nil {
t.Fatalf("Failed to get free port for Volume gRPC: %v", err)
}
listeners = append(listeners, l)
return &TestEnvironment{
seaweedDir: seaweedDir,
weedBinary: weedBinary,
dataDir: dataDir,
s3Port: s3Port,
s3GrpcPort: s3GrpcPort,
icebergPort: icebergPort,
masterPort: masterPort,
masterGrpcPort: masterGrpcPort,
filerPort: filerPort,
filerGrpcPort: filerGrpcPort,
volumePort: volumePort,
volumeGrpcPort: volumeGrpcPort,
dockerAvailable: hasDocker(),
}
}
// StartSeaweedFS starts a SeaweedFS mini cluster
func (env *TestEnvironment) StartSeaweedFS(t *testing.T) {
t.Helper()
ctx, cancel := context.WithCancel(context.Background())
env.weedCancel = cancel
masterDir := filepath.Join(env.dataDir, "master")
filerDir := filepath.Join(env.dataDir, "filer")
volumeDir := filepath.Join(env.dataDir, "volume")
for _, dir := range []string{masterDir, filerDir, volumeDir} {
if err := os.MkdirAll(dir, 0755); err != nil {
t.Fatalf("Failed to create directory %s: %v", dir, err)
}
}
cmd := exec.CommandContext(ctx, env.weedBinary, "mini",
"-master.port", fmt.Sprintf("%d", env.masterPort),
"-master.port.grpc", fmt.Sprintf("%d", env.masterGrpcPort),
"-volume.port", fmt.Sprintf("%d", env.volumePort),
"-volume.port.grpc", fmt.Sprintf("%d", env.volumeGrpcPort),
"-filer.port", fmt.Sprintf("%d", env.filerPort),
"-filer.port.grpc", fmt.Sprintf("%d", env.filerGrpcPort),
"-s3.port", fmt.Sprintf("%d", env.s3Port),
"-s3.port.grpc", fmt.Sprintf("%d", env.s3GrpcPort),
"-s3.port.iceberg", fmt.Sprintf("%d", env.icebergPort),
"-ip.bind", "0.0.0.0",
"-dir", env.dataDir,
)
cmd.Stdout = os.Stdout
cmd.Stderr = os.Stderr
if err := cmd.Start(); err != nil {
t.Fatalf("Failed to start SeaweedFS: %v", err)
}
env.weedProcess = cmd
// Wait for services to be ready
if !env.waitForService(fmt.Sprintf("http://127.0.0.1:%d/v1/config", env.icebergPort), 30*time.Second) {
t.Fatalf("Iceberg REST API did not become ready")
}
}
// waitForService waits for a service to become available
func (env *TestEnvironment) waitForService(url string, timeout time.Duration) bool {
client := &http.Client{Timeout: 2 * time.Second}
deadline := time.Now().Add(timeout)
for time.Now().Before(deadline) {
resp, err := client.Get(url)
if err == nil {
resp.Body.Close()
if resp.StatusCode == http.StatusOK {
return true
}
}
time.Sleep(500 * time.Millisecond)
}
return false
}
// Cleanup stops SeaweedFS and cleans up resources
func (env *TestEnvironment) Cleanup(t *testing.T) {
t.Helper()
if env.weedCancel != nil {
env.weedCancel()
}
if env.weedProcess != nil {
// Give process time to shut down gracefully
time.Sleep(2 * time.Second)
env.weedProcess.Wait()
}
if env.dataDir != "" {
os.RemoveAll(env.dataDir)
}
}
// IcebergURL returns the Iceberg REST Catalog URL
func (env *TestEnvironment) IcebergURL() string {
return fmt.Sprintf("http://127.0.0.1:%d", env.icebergPort)
}
// TestIcebergConfig tests the /v1/config endpoint
func TestIcebergConfig(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
env := NewTestEnvironment(t)
defer env.Cleanup(t)
env.StartSeaweedFS(t)
// Test GET /v1/config
resp, err := http.Get(env.IcebergURL() + "/v1/config")
if err != nil {
t.Fatalf("Failed to get config: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
t.Errorf("Expected status 200, got %d", resp.StatusCode)
}
body, err := io.ReadAll(resp.Body)
if err != nil {
t.Fatalf("Failed to read response body: %v", err)
}
// Verify response contains required fields
bodyStr := string(body)
if !strings.Contains(bodyStr, "defaults") || !strings.Contains(bodyStr, "overrides") {
t.Errorf("Config response missing required fields: %s", bodyStr)
}
}
// TestIcebergNamespaces tests namespace operations
func TestIcebergNamespaces(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
env := NewTestEnvironment(t)
defer env.Cleanup(t)
env.StartSeaweedFS(t)
// Create the default table bucket first via S3
createTableBucket(t, env, "warehouse")
// Test GET /v1/namespaces (should return empty list initially)
resp, err := http.Get(env.IcebergURL() + "/v1/namespaces")
if err != nil {
t.Fatalf("Failed to list namespaces: %v", err)
}
defer resp.Body.Close()
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
t.Fatalf("Expected status 200, got %d: %s", resp.StatusCode, body)
}
}
// TestStageCreateAndFinalizeFlow verifies staged create remains invisible until assert-create commit finalizes table creation.
func TestStageCreateAndFinalizeFlow(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
env := NewTestEnvironment(t)
defer env.Cleanup(t)
env.StartSeaweedFS(t)
createTableBucket(t, env, "warehouse")
namespace := "stage_ns"
tableName := "orders"
status, _, err := doIcebergJSONRequest(env, http.MethodPost, "/v1/namespaces", map[string]any{
"namespace": []string{namespace},
})
if err != nil {
t.Fatalf("Create namespace request failed: %v", err)
}
if status != http.StatusOK && status != http.StatusConflict {
t.Fatalf("Create namespace status = %d, want 200 or 409", status)
}
status, badReqResp, err := doIcebergJSONRequest(env, http.MethodPost, fmt.Sprintf("/v1/namespaces/%s/tables", namespace), map[string]any{
"stage-create": true,
})
if err != nil {
t.Fatalf("Stage create missing-name request failed: %v", err)
}
if status != http.StatusBadRequest {
t.Fatalf("Stage create missing-name status = %d, want 400", status)
}
errorObj, _ := badReqResp["error"].(map[string]any)
if got := errorObj["type"]; got != "BadRequestException" {
t.Fatalf("error.type = %v, want BadRequestException", got)
}
msg, _ := errorObj["message"].(string)
if !strings.Contains(strings.ToLower(msg), "table name is required") {
t.Fatalf("error.message = %v, want it to include %q", errorObj["message"], "table name is required")
}
status, stageResp, err := doIcebergJSONRequest(env, http.MethodPost, fmt.Sprintf("/v1/namespaces/%s/tables", namespace), map[string]any{
"name": tableName,
"stage-create": true,
})
if err != nil {
t.Fatalf("Stage create request failed: %v", err)
}
if status != http.StatusOK {
t.Fatalf("Stage create status = %d, want 200", status)
}
stageLocation, _ := stageResp["metadata-location"].(string)
if !strings.HasSuffix(stageLocation, "/metadata/v1.metadata.json") {
t.Fatalf("stage metadata-location = %q, want suffix /metadata/v1.metadata.json", stageLocation)
}
status, _, err = doIcebergJSONRequest(env, http.MethodGet, fmt.Sprintf("/v1/namespaces/%s/tables/%s", namespace, tableName), nil)
if err != nil {
t.Fatalf("Load staged table request failed: %v", err)
}
if status != http.StatusNotFound {
t.Fatalf("Load staged table status = %d, want 404", status)
}
status, commitResp, err := doIcebergJSONRequest(env, http.MethodPost, fmt.Sprintf("/v1/namespaces/%s/tables/%s", namespace, tableName), map[string]any{
"requirements": []map[string]any{
{"type": "assert-create"},
},
"updates": []any{},
})
if err != nil {
t.Fatalf("Finalize commit request failed: %v", err)
}
if status != http.StatusOK {
t.Fatalf("Finalize commit status = %d, want 200", status)
}
commitLocation, _ := commitResp["metadata-location"].(string)
if !strings.HasSuffix(commitLocation, "/metadata/v1.metadata.json") {
t.Fatalf("final metadata-location = %q, want suffix /metadata/v1.metadata.json", commitLocation)
}
status, loadResp, err := doIcebergJSONRequest(env, http.MethodGet, fmt.Sprintf("/v1/namespaces/%s/tables/%s", namespace, tableName), nil)
if err != nil {
t.Fatalf("Load finalized table request failed: %v", err)
}
if status != http.StatusOK {
t.Fatalf("Load finalized table status = %d, want 200", status)
}
loadLocation, _ := loadResp["metadata-location"].(string)
if loadLocation != commitLocation {
t.Fatalf("loaded metadata-location = %q, want %q", loadLocation, commitLocation)
}
}
// TestCommitMissingTableWithoutAssertCreate ensures missing-table commits still require assert-create for creation.
func TestCommitMissingTableWithoutAssertCreate(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
env := NewTestEnvironment(t)
defer env.Cleanup(t)
env.StartSeaweedFS(t)
createTableBucket(t, env, "warehouse")
namespace := "stage_missing_assert_ns"
tableName := "missing_table"
status, _, err := doIcebergJSONRequest(env, http.MethodPost, "/v1/namespaces", map[string]any{
"namespace": []string{namespace},
})
if err != nil {
t.Fatalf("Create namespace request failed: %v", err)
}
if status != http.StatusOK && status != http.StatusConflict {
t.Fatalf("Create namespace status = %d, want 200 or 409", status)
}
status, _, err = doIcebergJSONRequest(env, http.MethodPost, fmt.Sprintf("/v1/namespaces/%s/tables/%s", namespace, tableName), map[string]any{
"requirements": []any{},
"updates": []any{},
})
if err != nil {
t.Fatalf("Commit missing table request failed: %v", err)
}
if status != http.StatusNotFound {
t.Fatalf("Commit missing table status = %d, want 404", status)
}
}
// doIcebergJSONRequest decodes JSON object responses used by catalog tests.
func doIcebergJSONRequest(env *TestEnvironment, method, path string, payload any) (int, map[string]any, error) {
url := env.IcebergURL() + path
var bodyReader io.Reader
if payload != nil {
data, err := json.Marshal(payload)
if err != nil {
return 0, nil, err
}
bodyReader = bytes.NewReader(data)
}
req, err := http.NewRequest(method, url, bodyReader)
if err != nil {
return 0, nil, err
}
if payload != nil {
req.Header.Set("Content-Type", "application/json")
}
resp, err := http.DefaultClient.Do(req)
if err != nil {
return 0, nil, err
}
defer resp.Body.Close()
data, err := io.ReadAll(resp.Body)
if err != nil {
return resp.StatusCode, nil, err
}
if len(data) == 0 {
return resp.StatusCode, nil, nil
}
var decoded map[string]any
if err := json.Unmarshal(data, &decoded); err != nil {
return resp.StatusCode, nil, fmt.Errorf("failed to decode %s %s response: %w body=%s", method, path, err, string(data))
}
return resp.StatusCode, decoded, nil
}
// createTableBucket creates a table bucket via the S3Tables REST API
func createTableBucket(t *testing.T, env *TestEnvironment, bucketName string) {
t.Helper()
// Use S3Tables REST API to create the bucket
endpoint := fmt.Sprintf("http://localhost:%d/buckets", env.s3Port)
reqBody := fmt.Sprintf(`{"name":"%s"}`, bucketName)
req, err := http.NewRequest(http.MethodPut, endpoint, strings.NewReader(reqBody))
if err != nil {
t.Fatalf("Failed to create request: %v", err)
}
req.Header.Set("Content-Type", "application/x-amz-json-1.1")
resp, err := http.DefaultClient.Do(req)
if err != nil {
t.Fatalf("Failed to create table bucket %s: %v", bucketName, err)
}
defer resp.Body.Close()
body, _ := io.ReadAll(resp.Body)
t.Logf("Create table bucket %s response: status=%d, body=%s", bucketName, resp.StatusCode, string(body))
if resp.StatusCode != http.StatusOK && resp.StatusCode != http.StatusConflict {
t.Fatalf("Failed to create table bucket %s, status %d: %s", bucketName, resp.StatusCode, body)
}
t.Logf("Created table bucket %s", bucketName)
}
// TestDuckDBIntegration tests Iceberg catalog operations using DuckDB
// This test requires Docker to be available
func TestDuckDBIntegration(t *testing.T) {
if testing.Short() {
t.Skip("Skipping integration test in short mode")
}
env := NewTestEnvironment(t)
defer env.Cleanup(t)
if !env.dockerAvailable {
t.Skip("Docker not available, skipping DuckDB integration test")
}
env.StartSeaweedFS(t)
// Create a temporary SQL file for DuckDB to execute
sqlFile := filepath.Join(env.dataDir, "test.sql")
sqlContent := fmt.Sprintf(`
-- Install and load Iceberg extension
INSTALL iceberg;
LOAD iceberg;
-- List namespaces via Iceberg REST catalog (basic connectivity test)
-- Note: Full operations require setting up S3 credentials which is beyond this test
SELECT 'Iceberg extension loaded successfully' as result;
`)
if err := os.WriteFile(sqlFile, []byte(sqlContent), 0644); err != nil {
t.Fatalf("Failed to write SQL file: %v", err)
}
// Run DuckDB in Docker to test Iceberg connectivity
// Use host.docker.internal to connect to the host's Iceberg port
cmd := exec.Command("docker", "run", "--rm",
"-v", fmt.Sprintf("%s:/test", env.dataDir),
"--add-host", "host.docker.internal:host-gateway",
"--entrypoint", "duckdb",
"duckdb/duckdb:latest",
"-init", "/test/test.sql",
"-c", "SELECT 1",
)
output, err := cmd.CombinedOutput()
if err != nil {
t.Logf("DuckDB output: %s", output)
// Check for expected errors in certain CI environments
outputStr := string(output)
if strings.Contains(outputStr, "iceberg extension is not available") ||
strings.Contains(outputStr, "Failed to load") {
t.Skip("Skipping DuckDB test: Iceberg extension not available in Docker image")
}
// Any other error is unexpected
t.Fatalf("DuckDB command failed unexpectedly. Output: %s\nError: %v", output, err)
}
// Verify the test completed successfully
outputStr := string(output)
t.Logf("DuckDB output: %s", outputStr)
if !strings.Contains(outputStr, "Iceberg extension loaded successfully") {
t.Errorf("Expected success message in output, got: %s", outputStr)
}
}