* full integration with iceberg-go * Table Commit Operations (handleUpdateTable) * s3tables: fix Iceberg v2 compliance and namespace properties This commit ensures SeaweedFS Iceberg REST Catalog is compliant with Iceberg Format Version 2 by: - Using iceberg-go's table.NewMetadataWithUUID for strict v2 compliance. - Explicitly initializing namespace properties to empty maps. - Removing omitempty from required Iceberg response fields. - Fixing CommitTableRequest unmarshaling using table.Requirements and table.Updates. * s3tables: automate Iceberg integration tests - Added Makefile for local test execution and cluster management. - Added docker-compose for PyIceberg compatibility kit. - Added Go integration test harness for PyIceberg. - Updated GitHub CI to run Iceberg catalog tests automatically. * s3tables: update PyIceberg test suite for compatibility - Updated test_rest_catalog.py to use latest PyIceberg transaction APIs. - Updated Dockerfile to include pyarrow and pandas dependencies. - Improved namespace and table handling in integration tests. * s3tables: address review feedback on Iceberg Catalog - Implemented robust metadata version parsing and incrementing. - Ensured table metadata changes are persisted during commit (handleUpdateTable). - Standardized namespace property initialization for consistency. - Fixed unused variable and incorrect struct field build errors. * s3tables: finalize Iceberg REST Catalog and optimize tests - Implemented robust metadata versioning and persistence. - Standardized namespace property initialization. - Optimized integration tests using pre-built Docker image. - Added strict property persistence validation to test suite. - Fixed build errors from previous partial updates. * Address PR review: fix Table UUID stability, implement S3Tables UpdateTable, and support full metadata persistence individually * fix: Iceberg catalog stable UUIDs, metadata persistence, and file writing - Ensure table UUIDs are stable (do not regenerate on load). - Persist full table metadata (Iceberg JSON) in s3tables extended attributes. - Add `MetadataVersion` to explicitly track version numbers, replacing regex parsing. - Implement `saveMetadataFile` to persist metadata JSON files to the Filer on commit. - Update `CreateTable` and `UpdateTable` handlers to use the new logic. * test: bind weed mini to 0.0.0.0 in integration tests to fix Docker connectivity * Iceberg: fix metadata handling in REST catalog - Add nil guard in createTable - Fix updateTable to correctly load existing metadata from storage - Ensure full metadata persistence on updates - Populate loadTable result with parsed metadata * S3Tables: add auth checks and fix response fields in UpdateTable - Add CheckPermissionWithContext to UpdateTable handler - Include TableARN and MetadataLocation in UpdateTable response - Use ErrCodeConflict (409) for version token mismatches * Tests: improve Iceberg catalog test infrastructure and cleanup - Makefile: use PID file for precise process killing - test_rest_catalog.py: remove unused variables and fix f-strings * Iceberg: fix variable shadowing in UpdateTable - Rename inner loop variable `req` to `requirement` to avoid shadowing outer request variable * S3Tables: simplify MetadataVersion initialization - Use `max(req.MetadataVersion, 1)` instead of anonymous function * Tests: remove unicode characters from S3 tables integration test logs - Remove unicode checkmarks from test output for cleaner logs * Iceberg: improve metadata persistence robustness - Fix MetadataLocation in LoadTableResult to fallback to generated location - Improve saveMetadataFile to ensure directory hierarchy existence and robust error handling
81 lines
2.4 KiB
Go
81 lines
2.4 KiB
Go
// Package catalog provides integration tests for the Iceberg REST Catalog API.
|
|
// This file adds PyIceberg-based compatibility tests using Docker.
|
|
package catalog
|
|
|
|
import (
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"testing"
|
|
)
|
|
|
|
// TestPyIcebergRestCatalog tests the Iceberg REST Catalog using PyIceberg client in Docker.
|
|
// This provides a more comprehensive test than DuckDB as PyIceberg fully exercises the REST API.
|
|
//
|
|
// Prerequisites:
|
|
// - Docker must be available
|
|
// - SeaweedFS must be running with Iceberg REST enabled
|
|
//
|
|
// To run manually:
|
|
//
|
|
// cd test/s3tables/catalog
|
|
// docker compose -f docker-compose.test.yaml up --build
|
|
func TestPyIcebergRestCatalog(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("Skipping integration test in short mode")
|
|
}
|
|
|
|
env := NewTestEnvironment(t)
|
|
defer env.Cleanup(t)
|
|
|
|
if !env.dockerAvailable {
|
|
t.Skip("Docker not available, skipping PyIceberg integration test")
|
|
}
|
|
|
|
env.StartSeaweedFS(t)
|
|
|
|
// Create the test bucket first
|
|
bucketName := "pyiceberg-compat-test"
|
|
createTableBucket(t, env, bucketName)
|
|
|
|
// Build the test working directory path
|
|
testDir := filepath.Join(env.seaweedDir, "test", "s3tables", "catalog")
|
|
|
|
// Run PyIceberg test using Docker
|
|
catalogURL := fmt.Sprintf("http://host.docker.internal:%d", env.icebergPort)
|
|
s3Endpoint := fmt.Sprintf("http://host.docker.internal:%d", env.s3Port)
|
|
warehouse := fmt.Sprintf("s3://%s/", bucketName)
|
|
|
|
// Build the test image first for faster repeated runs
|
|
buildCmd := exec.Command("docker", "build", "-t", "iceberg-rest-test", "-f", "Dockerfile.pyiceberg", ".")
|
|
buildCmd.Dir = testDir
|
|
if out, err := buildCmd.CombinedOutput(); err != nil {
|
|
t.Fatalf("Failed to build test image: %v\n%s", err, string(out))
|
|
}
|
|
|
|
cmd := exec.Command("docker", "run", "--rm",
|
|
"--add-host", "host.docker.internal:host-gateway",
|
|
"-e", fmt.Sprintf("AWS_ACCESS_KEY_ID=%s", "test"),
|
|
"-e", fmt.Sprintf("AWS_SECRET_ACCESS_KEY=%s", "test"),
|
|
"-e", fmt.Sprintf("AWS_ENDPOINT_URL=%s", s3Endpoint),
|
|
"-v", fmt.Sprintf("%s:/app:ro", testDir),
|
|
"iceberg-rest-test",
|
|
"python3", "/app/test_rest_catalog.py",
|
|
"--catalog-url", catalogURL,
|
|
"--warehouse", warehouse,
|
|
"--prefix", bucketName,
|
|
)
|
|
cmd.Dir = testDir
|
|
cmd.Stdout = os.Stdout
|
|
cmd.Stderr = os.Stderr
|
|
|
|
t.Logf("Running PyIceberg REST catalog test...")
|
|
t.Logf(" Catalog URL: %s", catalogURL)
|
|
t.Logf(" Warehouse: %s", warehouse)
|
|
|
|
if err := cmd.Run(); err != nil {
|
|
t.Errorf("PyIceberg test failed: %v", err)
|
|
}
|
|
}
|