s3tables: redesign Iceberg REST Catalog using iceberg-go and automate integration tests (#8197)

* full integration with iceberg-go

* Table Commit Operations (handleUpdateTable)

* s3tables: fix Iceberg v2 compliance and namespace properties

This commit ensures SeaweedFS Iceberg REST Catalog is compliant with
Iceberg Format Version 2 by:
- Using iceberg-go's table.NewMetadataWithUUID for strict v2 compliance.
- Explicitly initializing namespace properties to empty maps.
- Removing omitempty from required Iceberg response fields.
- Fixing CommitTableRequest unmarshaling using table.Requirements and table.Updates.

* s3tables: automate Iceberg integration tests

- Added Makefile for local test execution and cluster management.
- Added docker-compose for PyIceberg compatibility kit.
- Added Go integration test harness for PyIceberg.
- Updated GitHub CI to run Iceberg catalog tests automatically.

* s3tables: update PyIceberg test suite for compatibility

- Updated test_rest_catalog.py to use latest PyIceberg transaction APIs.
- Updated Dockerfile to include pyarrow and pandas dependencies.
- Improved namespace and table handling in integration tests.

* s3tables: address review feedback on Iceberg Catalog

- Implemented robust metadata version parsing and incrementing.
- Ensured table metadata changes are persisted during commit (handleUpdateTable).
- Standardized namespace property initialization for consistency.
- Fixed unused variable and incorrect struct field build errors.

* s3tables: finalize Iceberg REST Catalog and optimize tests

- Implemented robust metadata versioning and persistence.
- Standardized namespace property initialization.
- Optimized integration tests using pre-built Docker image.
- Added strict property persistence validation to test suite.
- Fixed build errors from previous partial updates.

* Address PR review: fix Table UUID stability, implement S3Tables UpdateTable, and support full metadata persistence individually

* fix: Iceberg catalog stable UUIDs, metadata persistence, and file writing

- Ensure table UUIDs are stable (do not regenerate on load).
- Persist full table metadata (Iceberg JSON) in s3tables extended attributes.
- Add `MetadataVersion` to explicitly track version numbers, replacing regex parsing.
- Implement `saveMetadataFile` to persist metadata JSON files to the Filer on commit.
- Update `CreateTable` and `UpdateTable` handlers to use the new logic.

* test: bind weed mini to 0.0.0.0 in integration tests to fix Docker connectivity

* Iceberg: fix metadata handling in REST catalog

- Add nil guard in createTable
- Fix updateTable to correctly load existing metadata from storage
- Ensure full metadata persistence on updates
- Populate loadTable result with parsed metadata

* S3Tables: add auth checks and fix response fields in UpdateTable

- Add CheckPermissionWithContext to UpdateTable handler
- Include TableARN and MetadataLocation in UpdateTable response
- Use ErrCodeConflict (409) for version token mismatches

* Tests: improve Iceberg catalog test infrastructure and cleanup

- Makefile: use PID file for precise process killing
- test_rest_catalog.py: remove unused variables and fix f-strings

* Iceberg: fix variable shadowing in UpdateTable

- Rename inner loop variable `req` to `requirement` to avoid shadowing outer request variable

* S3Tables: simplify MetadataVersion initialization

- Use `max(req.MetadataVersion, 1)` instead of anonymous function

* Tests: remove unicode characters from S3 tables integration test logs

- Remove unicode checkmarks from test output for cleaner logs

* Iceberg: improve metadata persistence robustness

- Fix MetadataLocation in LoadTableResult to fallback to generated location
- Improve saveMetadataFile to ensure directory hierarchy existence and robust error handling
This commit is contained in:
Chris Lu
2026-02-03 15:30:04 -08:00
committed by GitHub
parent 47fc9e771f
commit b244bb58aa
21 changed files with 1485 additions and 232 deletions

View File

@@ -77,13 +77,13 @@ func testTableBucketLifecycle(t *testing.T, client *S3TablesClient) {
createResp, err := client.CreateTableBucket(bucketName, nil)
require.NoError(t, err, "Failed to create table bucket")
assert.Contains(t, createResp.ARN, bucketName)
t.Logf("Created table bucket: %s", createResp.ARN)
t.Logf("Created table bucket: %s", createResp.ARN)
// Get table bucket
getResp, err := client.GetTableBucket(createResp.ARN)
require.NoError(t, err, "Failed to get table bucket")
assert.Equal(t, bucketName, getResp.Name)
t.Logf("Got table bucket: %s", getResp.Name)
t.Logf("Got table bucket: %s", getResp.Name)
// List table buckets
listResp, err := client.ListTableBuckets("", "", 0)
@@ -96,12 +96,12 @@ func testTableBucketLifecycle(t *testing.T, client *S3TablesClient) {
}
}
assert.True(t, found, "Created bucket should appear in list")
t.Logf("Listed table buckets, found %d buckets", len(listResp.TableBuckets))
t.Logf("Listed table buckets, found %d buckets", len(listResp.TableBuckets))
// Delete table bucket
err = client.DeleteTableBucket(createResp.ARN)
require.NoError(t, err, "Failed to delete table bucket")
t.Logf("Deleted table bucket: %s", bucketName)
t.Logf("Deleted table bucket: %s", bucketName)
// Verify bucket is deleted
_, err = client.GetTableBucket(createResp.ARN)
@@ -123,13 +123,13 @@ func testNamespaceLifecycle(t *testing.T, client *S3TablesClient) {
createNsResp, err := client.CreateNamespace(bucketARN, []string{namespaceName})
require.NoError(t, err, "Failed to create namespace")
assert.Equal(t, []string{namespaceName}, createNsResp.Namespace)
t.Logf("Created namespace: %s", namespaceName)
t.Logf("Created namespace: %s", namespaceName)
// Get namespace
getNsResp, err := client.GetNamespace(bucketARN, []string{namespaceName})
require.NoError(t, err, "Failed to get namespace")
assert.Equal(t, []string{namespaceName}, getNsResp.Namespace)
t.Logf("Got namespace: %v", getNsResp.Namespace)
t.Logf("Got namespace: %v", getNsResp.Namespace)
// List namespaces
listNsResp, err := client.ListNamespaces(bucketARN, "", "", 0)
@@ -142,12 +142,12 @@ func testNamespaceLifecycle(t *testing.T, client *S3TablesClient) {
}
}
assert.True(t, found, "Created namespace should appear in list")
t.Logf("Listed namespaces, found %d namespaces", len(listNsResp.Namespaces))
t.Logf("Listed namespaces, found %d namespaces", len(listNsResp.Namespaces))
// Delete namespace
err = client.DeleteNamespace(bucketARN, []string{namespaceName})
require.NoError(t, err, "Failed to delete namespace")
t.Logf("Deleted namespace: %s", namespaceName)
t.Logf("Deleted namespace: %s", namespaceName)
// Verify namespace is deleted
_, err = client.GetNamespace(bucketARN, []string{namespaceName})
@@ -188,14 +188,14 @@ func testTableLifecycle(t *testing.T, client *S3TablesClient) {
require.NoError(t, err, "Failed to create table")
assert.NotEmpty(t, createTableResp.TableARN)
assert.NotEmpty(t, createTableResp.VersionToken)
t.Logf("Created table: %s (version: %s)", createTableResp.TableARN, createTableResp.VersionToken)
t.Logf("Created table: %s (version: %s)", createTableResp.TableARN, createTableResp.VersionToken)
// Get table
getTableResp, err := client.GetTable(bucketARN, []string{namespaceName}, tableName)
require.NoError(t, err, "Failed to get table")
assert.Equal(t, tableName, getTableResp.Name)
assert.Equal(t, "ICEBERG", getTableResp.Format)
t.Logf("Got table: %s (format: %s)", getTableResp.Name, getTableResp.Format)
t.Logf("Got table: %s (format: %s)", getTableResp.Name, getTableResp.Format)
// List tables
listTablesResp, err := client.ListTables(bucketARN, []string{namespaceName}, "", "", 0)
@@ -208,12 +208,12 @@ func testTableLifecycle(t *testing.T, client *S3TablesClient) {
}
}
assert.True(t, found, "Created table should appear in list")
t.Logf("Listed tables, found %d tables", len(listTablesResp.Tables))
t.Logf("Listed tables, found %d tables", len(listTablesResp.Tables))
// Delete table
err = client.DeleteTable(bucketARN, []string{namespaceName}, tableName)
require.NoError(t, err, "Failed to delete table")
t.Logf("Deleted table: %s", tableName)
t.Logf("Deleted table: %s", tableName)
// Verify table is deleted
_, err = client.GetTable(bucketARN, []string{namespaceName}, tableName)
@@ -234,18 +234,18 @@ func testTableBucketPolicy(t *testing.T, client *S3TablesClient) {
policy := `{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":"*","Action":"s3tables:*","Resource":"*"}]}`
err = client.PutTableBucketPolicy(bucketARN, policy)
require.NoError(t, err, "Failed to put table bucket policy")
t.Logf("Put table bucket policy")
t.Logf("Put table bucket policy")
// Get bucket policy
getPolicyResp, err := client.GetTableBucketPolicy(bucketARN)
require.NoError(t, err, "Failed to get table bucket policy")
assert.Equal(t, policy, getPolicyResp.ResourcePolicy)
t.Logf("Got table bucket policy")
t.Logf("Got table bucket policy")
// Delete bucket policy
err = client.DeleteTableBucketPolicy(bucketARN)
require.NoError(t, err, "Failed to delete table bucket policy")
t.Logf("Deleted table bucket policy")
t.Logf("Deleted table bucket policy")
// Verify policy is deleted
_, err = client.GetTableBucketPolicy(bucketARN)
@@ -285,34 +285,34 @@ func testTablePolicy(t *testing.T, client *S3TablesClient) {
require.NoError(t, err, "Failed to create table")
defer client.DeleteTable(bucketARN, []string{namespaceName}, tableName)
t.Logf("Created table: %s", createTableResp.TableARN)
t.Logf("Created table: %s", createTableResp.TableARN)
// Verify no policy exists initially
_, err = client.GetTablePolicy(bucketARN, []string{namespaceName}, tableName)
assert.Error(t, err, "Policy should not exist initially")
t.Logf("Verified no policy exists initially")
t.Logf("Verified no policy exists initially")
// Put table policy
policy := `{"Version":"2012-10-17","Statement":[{"Effect":"Allow","Principal":"*","Action":"s3tables:*","Resource":"*"}]}`
err = client.PutTablePolicy(bucketARN, []string{namespaceName}, tableName, policy)
require.NoError(t, err, "Failed to put table policy")
t.Logf("Put table policy")
t.Logf("Put table policy")
// Get table policy
getPolicyResp, err := client.GetTablePolicy(bucketARN, []string{namespaceName}, tableName)
require.NoError(t, err, "Failed to get table policy")
assert.Equal(t, policy, getPolicyResp.ResourcePolicy)
t.Logf("Got table policy")
t.Logf("Got table policy")
// Delete table policy
err = client.DeleteTablePolicy(bucketARN, []string{namespaceName}, tableName)
require.NoError(t, err, "Failed to delete table policy")
t.Logf("Deleted table policy")
t.Logf("Deleted table policy")
// Verify policy is deleted
_, err = client.GetTablePolicy(bucketARN, []string{namespaceName}, tableName)
assert.Error(t, err, "Policy should not exist after deletion")
t.Logf("Verified policy deletion")
t.Logf("Verified policy deletion")
}
func testTagging(t *testing.T, client *S3TablesClient) {
@@ -330,25 +330,25 @@ func testTagging(t *testing.T, client *S3TablesClient) {
listTagsResp, err := client.ListTagsForResource(bucketARN)
require.NoError(t, err, "Failed to list tags")
assert.Equal(t, "test", listTagsResp.Tags["Environment"])
t.Logf("Listed tags: %v", listTagsResp.Tags)
t.Logf("Listed tags: %v", listTagsResp.Tags)
// Add more tags
newTags := map[string]string{"Department": "Engineering"}
err = client.TagResource(bucketARN, newTags)
require.NoError(t, err, "Failed to tag resource")
t.Logf("Added tags")
t.Logf("Added tags")
// Verify tags
listTagsResp, err = client.ListTagsForResource(bucketARN)
require.NoError(t, err, "Failed to list tags")
assert.Equal(t, "test", listTagsResp.Tags["Environment"])
assert.Equal(t, "Engineering", listTagsResp.Tags["Department"])
t.Logf("Verified tags: %v", listTagsResp.Tags)
t.Logf("Verified tags: %v", listTagsResp.Tags)
// Remove a tag
err = client.UntagResource(bucketARN, []string{"Environment"})
require.NoError(t, err, "Failed to untag resource")
t.Logf("Removed tag")
t.Logf("Removed tag")
// Verify tag is removed
listTagsResp, err = client.ListTagsForResource(bucketARN)
@@ -356,7 +356,7 @@ func testTagging(t *testing.T, client *S3TablesClient) {
_, hasEnvironment := listTagsResp.Tags["Environment"]
assert.False(t, hasEnvironment, "Environment tag should be removed")
assert.Equal(t, "Engineering", listTagsResp.Tags["Department"])
t.Logf("Verified tag removal")
t.Logf("Verified tag removal")
}
func testTargetOperations(t *testing.T, client *S3TablesClient) {