s3tables: redesign Iceberg REST Catalog using iceberg-go and automate integration tests (#8197)

* full integration with iceberg-go

* Table Commit Operations (handleUpdateTable)

* s3tables: fix Iceberg v2 compliance and namespace properties

This commit ensures SeaweedFS Iceberg REST Catalog is compliant with
Iceberg Format Version 2 by:
- Using iceberg-go's table.NewMetadataWithUUID for strict v2 compliance.
- Explicitly initializing namespace properties to empty maps.
- Removing omitempty from required Iceberg response fields.
- Fixing CommitTableRequest unmarshaling using table.Requirements and table.Updates.

* s3tables: automate Iceberg integration tests

- Added Makefile for local test execution and cluster management.
- Added docker-compose for PyIceberg compatibility kit.
- Added Go integration test harness for PyIceberg.
- Updated GitHub CI to run Iceberg catalog tests automatically.

* s3tables: update PyIceberg test suite for compatibility

- Updated test_rest_catalog.py to use latest PyIceberg transaction APIs.
- Updated Dockerfile to include pyarrow and pandas dependencies.
- Improved namespace and table handling in integration tests.

* s3tables: address review feedback on Iceberg Catalog

- Implemented robust metadata version parsing and incrementing.
- Ensured table metadata changes are persisted during commit (handleUpdateTable).
- Standardized namespace property initialization for consistency.
- Fixed unused variable and incorrect struct field build errors.

* s3tables: finalize Iceberg REST Catalog and optimize tests

- Implemented robust metadata versioning and persistence.
- Standardized namespace property initialization.
- Optimized integration tests using pre-built Docker image.
- Added strict property persistence validation to test suite.
- Fixed build errors from previous partial updates.

* Address PR review: fix Table UUID stability, implement S3Tables UpdateTable, and support full metadata persistence individually

* fix: Iceberg catalog stable UUIDs, metadata persistence, and file writing

- Ensure table UUIDs are stable (do not regenerate on load).
- Persist full table metadata (Iceberg JSON) in s3tables extended attributes.
- Add `MetadataVersion` to explicitly track version numbers, replacing regex parsing.
- Implement `saveMetadataFile` to persist metadata JSON files to the Filer on commit.
- Update `CreateTable` and `UpdateTable` handlers to use the new logic.

* test: bind weed mini to 0.0.0.0 in integration tests to fix Docker connectivity

* Iceberg: fix metadata handling in REST catalog

- Add nil guard in createTable
- Fix updateTable to correctly load existing metadata from storage
- Ensure full metadata persistence on updates
- Populate loadTable result with parsed metadata

* S3Tables: add auth checks and fix response fields in UpdateTable

- Add CheckPermissionWithContext to UpdateTable handler
- Include TableARN and MetadataLocation in UpdateTable response
- Use ErrCodeConflict (409) for version token mismatches

* Tests: improve Iceberg catalog test infrastructure and cleanup

- Makefile: use PID file for precise process killing
- test_rest_catalog.py: remove unused variables and fix f-strings

* Iceberg: fix variable shadowing in UpdateTable

- Rename inner loop variable `req` to `requirement` to avoid shadowing outer request variable

* S3Tables: simplify MetadataVersion initialization

- Use `max(req.MetadataVersion, 1)` instead of anonymous function

* Tests: remove unicode characters from S3 tables integration test logs

- Remove unicode checkmarks from test output for cleaner logs

* Iceberg: improve metadata persistence robustness

- Fix MetadataLocation in LoadTableResult to fallback to generated location
- Improve saveMetadataFile to ensure directory hierarchy existence and robust error handling
This commit is contained in:
Chris Lu
2026-02-03 15:30:04 -08:00
committed by GitHub
parent 47fc9e771f
commit b244bb58aa
21 changed files with 1485 additions and 232 deletions

View File

@@ -66,7 +66,7 @@ func (h *S3TablesHandler) handlePutTableBucketPolicy(w http.ResponseWriter, r *h
}
// Check if bucket exists and get metadata for ownership check
bucketPath := getTableBucketPath(bucketName)
bucketPath := GetTableBucketPath(bucketName)
var bucketMetadata tableBucketMetadata
err = filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
data, err := h.getExtendedAttribute(r.Context(), client, bucketPath, ExtendedKeyMetadata)
@@ -133,7 +133,7 @@ func (h *S3TablesHandler) handleGetTableBucketPolicy(w http.ResponseWriter, r *h
return err
}
bucketPath := getTableBucketPath(bucketName)
bucketPath := GetTableBucketPath(bucketName)
var policy []byte
var bucketMetadata tableBucketMetadata
@@ -204,7 +204,7 @@ func (h *S3TablesHandler) handleDeleteTableBucketPolicy(w http.ResponseWriter, r
return err
}
bucketPath := getTableBucketPath(bucketName)
bucketPath := GetTableBucketPath(bucketName)
// Check if bucket exists and get metadata for ownership check
var bucketMetadata tableBucketMetadata
@@ -301,8 +301,8 @@ func (h *S3TablesHandler) handlePutTablePolicy(w http.ResponseWriter, r *http.Re
h.writeError(w, http.StatusBadRequest, ErrCodeInvalidRequest, err.Error())
return err
}
tablePath := getTablePath(bucketName, namespaceName, tableName)
bucketPath := getTableBucketPath(bucketName)
tablePath := GetTablePath(bucketName, namespaceName, tableName)
bucketPath := GetTableBucketPath(bucketName)
var metadata tableMetadataInternal
var bucketPolicy string
@@ -396,8 +396,8 @@ func (h *S3TablesHandler) handleGetTablePolicy(w http.ResponseWriter, r *http.Re
h.writeError(w, http.StatusBadRequest, ErrCodeInvalidRequest, err.Error())
return err
}
tablePath := getTablePath(bucketName, namespaceName, tableName)
bucketPath := getTableBucketPath(bucketName)
tablePath := GetTablePath(bucketName, namespaceName, tableName)
bucketPath := GetTableBucketPath(bucketName)
var policy []byte
var metadata tableMetadataInternal
var bucketPolicy string
@@ -497,8 +497,8 @@ func (h *S3TablesHandler) handleDeleteTablePolicy(w http.ResponseWriter, r *http
h.writeError(w, http.StatusBadRequest, ErrCodeInvalidRequest, err.Error())
return err
}
tablePath := getTablePath(bucketName, namespaceName, tableName)
bucketPath := getTableBucketPath(bucketName)
tablePath := GetTablePath(bucketName, namespaceName, tableName)
bucketPath := GetTableBucketPath(bucketName)
// Check if table exists
var metadata tableMetadataInternal
@@ -604,7 +604,7 @@ func (h *S3TablesHandler) handleTagResource(w http.ResponseWriter, r *http.Reque
// Fetch bucket policy if we have a bucket name
if bucketName != "" {
bucketPath := getTableBucketPath(bucketName)
bucketPath := GetTableBucketPath(bucketName)
policyData, err := h.getExtendedAttribute(r.Context(), client, bucketPath, ExtendedKeyPolicy)
if err != nil {
if !errors.Is(err, ErrAttributeNotFound) {
@@ -722,7 +722,7 @@ func (h *S3TablesHandler) handleListTagsForResource(w http.ResponseWriter, r *ht
// Fetch bucket policy if we have a bucket name
if bucketName != "" {
bucketPath := getTableBucketPath(bucketName)
bucketPath := GetTableBucketPath(bucketName)
policyData, err := h.getExtendedAttribute(r.Context(), client, bucketPath, ExtendedKeyPolicy)
if err != nil {
if !errors.Is(err, ErrAttributeNotFound) {
@@ -828,7 +828,7 @@ func (h *S3TablesHandler) handleUntagResource(w http.ResponseWriter, r *http.Req
// Fetch bucket policy if we have a bucket name
if bucketName != "" {
bucketPath := getTableBucketPath(bucketName)
bucketPath := GetTableBucketPath(bucketName)
policyData, err := h.getExtendedAttribute(r.Context(), client, bucketPath, ExtendedKeyPolicy)
if err != nil {
if !errors.Is(err, ErrAttributeNotFound) {
@@ -914,13 +914,13 @@ func (h *S3TablesHandler) resolveResourcePath(resourceARN string) (path string,
// Try parsing as table ARN first
bucketName, namespace, tableName, err := parseTableFromARN(resourceARN)
if err == nil {
return getTablePath(bucketName, namespace, tableName), ExtendedKeyTags, ResourceTypeTable, nil
return GetTablePath(bucketName, namespace, tableName), ExtendedKeyTags, ResourceTypeTable, nil
}
// Try parsing as bucket ARN
bucketName, err = parseBucketNameFromARN(resourceARN)
if err == nil {
return getTableBucketPath(bucketName), ExtendedKeyTags, ResourceTypeBucket, nil
return GetTableBucketPath(bucketName), ExtendedKeyTags, ResourceTypeBucket, nil
}
return "", "", "", fmt.Errorf("invalid resource ARN: %s", resourceARN)