Files
seaweedFS/weed/s3api/s3tables/handler_bucket_create.go
Chris Lu b244bb58aa s3tables: redesign Iceberg REST Catalog using iceberg-go and automate integration tests (#8197)
* full integration with iceberg-go

* Table Commit Operations (handleUpdateTable)

* s3tables: fix Iceberg v2 compliance and namespace properties

This commit ensures SeaweedFS Iceberg REST Catalog is compliant with
Iceberg Format Version 2 by:
- Using iceberg-go's table.NewMetadataWithUUID for strict v2 compliance.
- Explicitly initializing namespace properties to empty maps.
- Removing omitempty from required Iceberg response fields.
- Fixing CommitTableRequest unmarshaling using table.Requirements and table.Updates.

* s3tables: automate Iceberg integration tests

- Added Makefile for local test execution and cluster management.
- Added docker-compose for PyIceberg compatibility kit.
- Added Go integration test harness for PyIceberg.
- Updated GitHub CI to run Iceberg catalog tests automatically.

* s3tables: update PyIceberg test suite for compatibility

- Updated test_rest_catalog.py to use latest PyIceberg transaction APIs.
- Updated Dockerfile to include pyarrow and pandas dependencies.
- Improved namespace and table handling in integration tests.

* s3tables: address review feedback on Iceberg Catalog

- Implemented robust metadata version parsing and incrementing.
- Ensured table metadata changes are persisted during commit (handleUpdateTable).
- Standardized namespace property initialization for consistency.
- Fixed unused variable and incorrect struct field build errors.

* s3tables: finalize Iceberg REST Catalog and optimize tests

- Implemented robust metadata versioning and persistence.
- Standardized namespace property initialization.
- Optimized integration tests using pre-built Docker image.
- Added strict property persistence validation to test suite.
- Fixed build errors from previous partial updates.

* Address PR review: fix Table UUID stability, implement S3Tables UpdateTable, and support full metadata persistence individually

* fix: Iceberg catalog stable UUIDs, metadata persistence, and file writing

- Ensure table UUIDs are stable (do not regenerate on load).
- Persist full table metadata (Iceberg JSON) in s3tables extended attributes.
- Add `MetadataVersion` to explicitly track version numbers, replacing regex parsing.
- Implement `saveMetadataFile` to persist metadata JSON files to the Filer on commit.
- Update `CreateTable` and `UpdateTable` handlers to use the new logic.

* test: bind weed mini to 0.0.0.0 in integration tests to fix Docker connectivity

* Iceberg: fix metadata handling in REST catalog

- Add nil guard in createTable
- Fix updateTable to correctly load existing metadata from storage
- Ensure full metadata persistence on updates
- Populate loadTable result with parsed metadata

* S3Tables: add auth checks and fix response fields in UpdateTable

- Add CheckPermissionWithContext to UpdateTable handler
- Include TableARN and MetadataLocation in UpdateTable response
- Use ErrCodeConflict (409) for version token mismatches

* Tests: improve Iceberg catalog test infrastructure and cleanup

- Makefile: use PID file for precise process killing
- test_rest_catalog.py: remove unused variables and fix f-strings

* Iceberg: fix variable shadowing in UpdateTable

- Rename inner loop variable `req` to `requirement` to avoid shadowing outer request variable

* S3Tables: simplify MetadataVersion initialization

- Use `max(req.MetadataVersion, 1)` instead of anonymous function

* Tests: remove unicode characters from S3 tables integration test logs

- Remove unicode checkmarks from test output for cleaner logs

* Iceberg: improve metadata persistence robustness

- Fix MetadataLocation in LoadTableResult to fallback to generated location
- Improve saveMetadataFile to ensure directory hierarchy existence and robust error handling
2026-02-03 15:30:04 -08:00

151 lines
4.5 KiB
Go

package s3tables
import (
"encoding/json"
"errors"
"fmt"
"net/http"
"time"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
)
// handleCreateTableBucket creates a new table bucket
func (h *S3TablesHandler) handleCreateTableBucket(w http.ResponseWriter, r *http.Request, filerClient FilerClient) error {
// Check permission
principal := h.getAccountID(r)
if !CanCreateTableBucket(principal, principal, "") {
h.writeError(w, http.StatusForbidden, ErrCodeAccessDenied, "not authorized to create table buckets")
return NewAuthError("CreateTableBucket", principal, "not authorized to create table buckets")
}
var req CreateTableBucketRequest
if err := h.readRequestBody(r, &req); err != nil {
h.writeError(w, http.StatusBadRequest, ErrCodeInvalidRequest, err.Error())
return err
}
// Validate bucket name
if err := validateBucketName(req.Name); err != nil {
h.writeError(w, http.StatusBadRequest, ErrCodeInvalidRequest, err.Error())
return err
}
bucketPath := GetTableBucketPath(req.Name)
// Check if bucket already exists and ensure no conflict with object store buckets
tableBucketExists := false
s3BucketExists := false
err := filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
resp, err := client.GetFilerConfiguration(r.Context(), &filer_pb.GetFilerConfigurationRequest{})
if err != nil {
return err
}
bucketsPath := resp.DirBuckets
if bucketsPath == "" {
bucketsPath = s3_constants.DefaultBucketsPath
}
_, err = filer_pb.LookupEntry(r.Context(), client, &filer_pb.LookupDirectoryEntryRequest{
Directory: bucketsPath,
Name: req.Name,
})
if err != nil {
if !errors.Is(err, filer_pb.ErrNotFound) {
return err
}
} else {
s3BucketExists = true
}
_, err = filer_pb.LookupEntry(r.Context(), client, &filer_pb.LookupDirectoryEntryRequest{
Directory: TablesPath,
Name: req.Name,
})
if err != nil {
if errors.Is(err, filer_pb.ErrNotFound) {
return nil
}
return err
}
tableBucketExists = true
return nil
})
if err != nil {
glog.Errorf("S3Tables: failed to check bucket existence: %v", err)
h.writeError(w, http.StatusInternalServerError, ErrCodeInternalError, "failed to check bucket existence")
return err
}
if s3BucketExists {
h.writeError(w, http.StatusConflict, ErrCodeBucketAlreadyExists, fmt.Sprintf("bucket name %s is already used by an object store bucket", req.Name))
return fmt.Errorf("bucket name conflicts with object store bucket")
}
if tableBucketExists {
h.writeError(w, http.StatusConflict, ErrCodeBucketAlreadyExists, fmt.Sprintf("table bucket %s already exists", req.Name))
return fmt.Errorf("bucket already exists")
}
// Create the bucket directory and set metadata as extended attributes
now := time.Now()
metadata := &tableBucketMetadata{
Name: req.Name,
CreatedAt: now,
OwnerAccountID: h.getAccountID(r),
}
metadataBytes, err := json.Marshal(metadata)
if err != nil {
glog.Errorf("S3Tables: failed to marshal metadata: %v", err)
h.writeError(w, http.StatusInternalServerError, ErrCodeInternalError, "failed to marshal metadata")
return err
}
err = filerClient.WithFilerClient(false, func(client filer_pb.SeaweedFilerClient) error {
// Ensure root tables directory exists
if !h.entryExists(r.Context(), client, TablesPath) {
if err := h.createDirectory(r.Context(), client, TablesPath); err != nil {
return fmt.Errorf("failed to create root tables directory: %w", err)
}
}
// Create bucket directory
if err := h.createDirectory(r.Context(), client, bucketPath); err != nil {
return err
}
// Set metadata as extended attribute
if err := h.setExtendedAttribute(r.Context(), client, bucketPath, ExtendedKeyMetadata, metadataBytes); err != nil {
return err
}
// Set tags if provided
if len(req.Tags) > 0 {
tagsBytes, err := json.Marshal(req.Tags)
if err != nil {
return fmt.Errorf("failed to marshal tags: %w", err)
}
if err := h.setExtendedAttribute(r.Context(), client, bucketPath, ExtendedKeyTags, tagsBytes); err != nil {
return err
}
}
return nil
})
if err != nil {
glog.Errorf("S3Tables: failed to create table bucket %s: %v", req.Name, err)
h.writeError(w, http.StatusInternalServerError, ErrCodeInternalError, "failed to create table bucket")
return err
}
resp := &CreateTableBucketResponse{
ARN: h.generateTableBucketARN(metadata.OwnerAccountID, req.Name),
}
h.writeJSON(w, http.StatusOK, resp)
return nil
}