Files
seaweedFS/test/s3tables/catalog/test_rest_catalog.py
Chris Lu b244bb58aa s3tables: redesign Iceberg REST Catalog using iceberg-go and automate integration tests (#8197)
* full integration with iceberg-go

* Table Commit Operations (handleUpdateTable)

* s3tables: fix Iceberg v2 compliance and namespace properties

This commit ensures SeaweedFS Iceberg REST Catalog is compliant with
Iceberg Format Version 2 by:
- Using iceberg-go's table.NewMetadataWithUUID for strict v2 compliance.
- Explicitly initializing namespace properties to empty maps.
- Removing omitempty from required Iceberg response fields.
- Fixing CommitTableRequest unmarshaling using table.Requirements and table.Updates.

* s3tables: automate Iceberg integration tests

- Added Makefile for local test execution and cluster management.
- Added docker-compose for PyIceberg compatibility kit.
- Added Go integration test harness for PyIceberg.
- Updated GitHub CI to run Iceberg catalog tests automatically.

* s3tables: update PyIceberg test suite for compatibility

- Updated test_rest_catalog.py to use latest PyIceberg transaction APIs.
- Updated Dockerfile to include pyarrow and pandas dependencies.
- Improved namespace and table handling in integration tests.

* s3tables: address review feedback on Iceberg Catalog

- Implemented robust metadata version parsing and incrementing.
- Ensured table metadata changes are persisted during commit (handleUpdateTable).
- Standardized namespace property initialization for consistency.
- Fixed unused variable and incorrect struct field build errors.

* s3tables: finalize Iceberg REST Catalog and optimize tests

- Implemented robust metadata versioning and persistence.
- Standardized namespace property initialization.
- Optimized integration tests using pre-built Docker image.
- Added strict property persistence validation to test suite.
- Fixed build errors from previous partial updates.

* Address PR review: fix Table UUID stability, implement S3Tables UpdateTable, and support full metadata persistence individually

* fix: Iceberg catalog stable UUIDs, metadata persistence, and file writing

- Ensure table UUIDs are stable (do not regenerate on load).
- Persist full table metadata (Iceberg JSON) in s3tables extended attributes.
- Add `MetadataVersion` to explicitly track version numbers, replacing regex parsing.
- Implement `saveMetadataFile` to persist metadata JSON files to the Filer on commit.
- Update `CreateTable` and `UpdateTable` handlers to use the new logic.

* test: bind weed mini to 0.0.0.0 in integration tests to fix Docker connectivity

* Iceberg: fix metadata handling in REST catalog

- Add nil guard in createTable
- Fix updateTable to correctly load existing metadata from storage
- Ensure full metadata persistence on updates
- Populate loadTable result with parsed metadata

* S3Tables: add auth checks and fix response fields in UpdateTable

- Add CheckPermissionWithContext to UpdateTable handler
- Include TableARN and MetadataLocation in UpdateTable response
- Use ErrCodeConflict (409) for version token mismatches

* Tests: improve Iceberg catalog test infrastructure and cleanup

- Makefile: use PID file for precise process killing
- test_rest_catalog.py: remove unused variables and fix f-strings

* Iceberg: fix variable shadowing in UpdateTable

- Rename inner loop variable `req` to `requirement` to avoid shadowing outer request variable

* S3Tables: simplify MetadataVersion initialization

- Use `max(req.MetadataVersion, 1)` instead of anonymous function

* Tests: remove unicode characters from S3 tables integration test logs

- Remove unicode checkmarks from test output for cleaner logs

* Iceberg: improve metadata persistence robustness

- Fix MetadataLocation in LoadTableResult to fallback to generated location
- Improve saveMetadataFile to ensure directory hierarchy existence and robust error handling
2026-02-03 15:30:04 -08:00

237 lines
7.1 KiB
Python

#!/usr/bin/env python3
"""
Iceberg REST Catalog Compatibility Test for SeaweedFS
This script tests the Iceberg REST Catalog API compatibility of the
SeaweedFS Iceberg REST Catalog implementation.
Usage:
python3 test_rest_catalog.py --catalog-url http://localhost:8182
Requirements:
pip install pyiceberg[s3fs]
"""
import argparse
import sys
from pyiceberg.catalog import load_catalog
from pyiceberg.schema import Schema
from pyiceberg.types import (
IntegerType,
LongType,
StringType,
NestedField,
)
from pyiceberg.exceptions import (
NamespaceAlreadyExistsError,
NoSuchNamespaceError,
TableAlreadyExistsError,
NoSuchTableError,
)
def test_config_endpoint(catalog):
"""Test that the catalog config endpoint returns valid configuration."""
print("Testing /v1/config endpoint...")
# The catalog is already loaded which means config endpoint worked
print(" /v1/config endpoint working")
return True
def test_namespace_operations(catalog, prefix):
"""Test namespace CRUD operations."""
print("Testing namespace operations...")
namespace = (f"{prefix.replace('-', '_')}_test_ns",)
# List initial namespaces
namespaces = catalog.list_namespaces()
print(f" Initial namespaces: {namespaces}")
# Create namespace
try:
catalog.create_namespace(namespace)
print(f" Created namespace: {namespace}")
except NamespaceAlreadyExistsError:
print(f" ! Namespace already exists: {namespace}")
# List namespaces (should include our new one)
namespaces = catalog.list_namespaces()
if namespace in namespaces:
print(" Namespace appears in list")
else:
print(f" Namespace not found in list: {namespaces}")
return False
# Get namespace properties
try:
props = catalog.load_namespace_properties(namespace)
print(f" Loaded namespace properties: {props}")
except NoSuchNamespaceError:
print(f" Failed to load namespace properties")
return False
return True
def test_table_operations(catalog, prefix):
"""Test table CRUD operations."""
print("Testing table operations...")
namespace = (f"{prefix.replace('-', '_')}_test_ns",)
table_name = "test_table"
table_id = namespace + (table_name,)
# Define a simple schema
schema = Schema(
NestedField(field_id=1, name="id", field_type=LongType(), required=True),
NestedField(field_id=2, name="name", field_type=StringType(), required=False),
NestedField(field_id=3, name="age", field_type=IntegerType(), required=False),
)
# Create table
try:
table = catalog.create_table(
identifier=table_id,
schema=schema,
)
print(f" Created table: {table_id}")
except TableAlreadyExistsError:
print(f" ! Table already exists: {table_id}")
_ = catalog.load_table(table_id)
# List tables
tables = catalog.list_tables(namespace)
if table_name in [t[1] for t in tables]:
print(" Table appears in list")
else:
print(f" Table not found in list: {tables}")
return False
# Load table
try:
loaded_table = catalog.load_table(table_id)
print(f" Loaded table: {loaded_table.name()}")
print(f" Schema: {loaded_table.schema()}")
print(f" Location: {loaded_table.location()}")
except NoSuchTableError:
print(f" Failed to load table")
return False
return True
def test_table_update(catalog, prefix):
"""Test table update/commit operations."""
print("Testing table update operations...")
namespace = (f"{prefix.replace('-', '_')}_test_ns",)
table_name = "test_table"
table_id = namespace + (table_name,)
try:
table = catalog.load_table(table_id)
# Update table properties
with table.transaction() as transaction:
transaction.set_properties({"test.property": "test.value"})
print(" Updated table properties")
# Reload and verify
table = catalog.load_table(table_id)
if table.properties.get("test.property") == "test.value":
print(" Property update verified")
else:
print(" ! Property update failed or not persisted")
return False
except Exception as e:
print(f" Table update failed: {e}")
return False
return True
def test_cleanup(catalog, prefix):
"""Test table and namespace deletion."""
print("Testing cleanup operations...")
namespace = (f"{prefix.replace('-', '_')}_test_ns",)
table_id = namespace + ("test_table",)
# Drop table
try:
catalog.drop_table(table_id)
print(f" Dropped table: {table_id}")
except NoSuchTableError:
print(f" ! Table already deleted: {table_id}")
# Drop namespace
try:
catalog.drop_namespace(namespace)
print(f" Dropped namespace: {namespace}")
except NoSuchNamespaceError:
print(f" ! Namespace already deleted: {namespace}")
except Exception as e:
print(f" ? Namespace drop error (may be expected): {e}")
return True
def main():
parser = argparse.ArgumentParser(description="Test Iceberg REST Catalog compatibility")
parser.add_argument("--catalog-url", required=True, help="Iceberg REST Catalog URL (e.g., http://localhost:8182)")
parser.add_argument("--warehouse", default="s3://iceberg-test/", help="Warehouse location")
parser.add_argument("--prefix", required=True, help="Table bucket prefix")
parser.add_argument("--skip-cleanup", action="store_true", help="Skip cleanup at the end")
args = parser.parse_args()
print(f"Connecting to Iceberg REST Catalog at: {args.catalog_url}")
print(f"Warehouse: {args.warehouse}")
print(f"Prefix: {args.prefix}")
print()
# Load the REST catalog
catalog = load_catalog(
"rest",
**{
"type": "rest",
"uri": args.catalog_url,
"warehouse": args.warehouse,
"prefix": args.prefix,
}
)
# Run tests
tests = [
("Config Endpoint", lambda: test_config_endpoint(catalog)),
("Namespace Operations", lambda: test_namespace_operations(catalog, args.prefix)),
("Table Operations", lambda: test_table_operations(catalog, args.prefix)),
("Table Update", lambda: test_table_update(catalog, args.prefix)),
]
if not args.skip_cleanup:
tests.append(("Cleanup", lambda: test_cleanup(catalog, args.prefix)))
passed = 0
failed = 0
for name, test_fn in tests:
print(f"\n{'='*50}")
try:
if test_fn():
passed += 1
print(f"PASSED: {name}")
else:
failed += 1
print(f"FAILED: {name}")
except Exception as e:
failed += 1
print(f"ERROR in {name}: {e}")
print(f"\n{'='*50}")
print(f"Results: {passed} passed, {failed} failed")
return 0 if failed == 0 else 1
if __name__ == "__main__":
sys.exit(main())