Implement IAM propagation to S3 servers (#8130)

* Implement IAM propagation to S3 servers - Add PropagatingCredentialStore to propagate IAM changes to S3 servers via gRPC - Add Policy management RPCs to S3 proto and S3ApiServer - Update CredentialManager to use PropagatingCredentialStore when MasterClient is available - Wire FilerServer to enable propagation * Implement parallel IAM propagation and fix S3 cluster registration - Parallelized IAM change propagation with 10s timeout. - Refined context usage in PropagatingCredentialStore. - Added S3Type support to cluster node management. - Enabled S3 servers to register with gRPC address to the master. - Ensured IAM configuration reload after policy updates via gRPC. * Optimize IAM propagation with direct in-memory cache updates * Secure IAM propagation: Use metadata to skip persistence only on propagation * pb: refactor IAM and S3 services for unidirectional IAM propagation - Move SeaweedS3IamCache service from iam.proto to s3.proto. - Remove legacy IAM management RPCs and empty SeaweedS3 service from s3.proto. - Enforce that S3 servers only use the synchronization interface. * pb: regenerate Go code for IAM and S3 services Updated generated code following the proto refactoring of IAM synchronization services. * s3api: implement read-only mode for Embedded IAM API - Add readOnly flag to EmbeddedIamApi to reject write operations via HTTP. - Enable read-only mode by default in S3ApiServer. - Handle AccessDenied error in writeIamErrorResponse. - Embed SeaweedS3IamCacheServer in S3ApiServer. * credential: refactor PropagatingCredentialStore for unidirectional IAM flow - Update to use s3_pb.SeaweedS3IamCacheClient for propagation to S3 servers. - Propagate full Identity object via PutIdentity for consistency. - Remove redundant propagation of specific user/account/policy management RPCs. - Add timeout context for propagation calls. * s3api: implement SeaweedS3IamCacheServer for unidirectional sync - Update S3ApiServer to implement the cache synchronization gRPC interface. - Methods (PutIdentity, RemoveIdentity, etc.) now perform direct in-memory cache updates. - Register SeaweedS3IamCacheServer in command/s3.go. - Remove registration for the legacy and now empty SeaweedS3 service. * s3api: update tests for read-only IAM and propagation - Added TestEmbeddedIamReadOnly to verify rejection of write operations in read-only mode. - Update test setup to pass readOnly=false to NewEmbeddedIamApi in routing tests. - Updated EmbeddedIamApiForTest helper with read-only checks matching production behavior. * s3api: add back temporary debug logs for IAM updates Log IAM updates received via: - gRPC propagation (PutIdentity, PutPolicy, etc.) - Metadata configuration reloads (LoadS3ApiConfigurationFromCredentialManager) - Core identity management (UpsertIdentity, RemoveIdentity) * IAM: finalize propagation fix with reduced logging and clarified architecture * Allow configuring IAM read-only mode for S3 server integration tests * s3api: add defensive validation to UpsertIdentity * s3api: fix log message to reference correct IAM read-only flag * test/s3/iam: ensure WaitForS3Service checks for IAM write permissions * test: enable writable IAM in Makefile for integration tests * IAM: add GetPolicy/ListPolicies RPCs to s3.proto * S3: add GetBucketPolicy and ListBucketPolicies helpers * S3: support storing generic IAM policies in IdentityAccessManagement * S3: implement IAM policy RPCs using IdentityAccessManagement * IAM: fix stale user identity on rename propagation
2026-01-26 22:59:43 -08:00
parent 0a6b289025
commit 551a31e156
26 changed files with 1131 additions and 1036 deletions
--- a/test/s3/iam/Makefile
+++ b/test/s3/iam/Makefile
@@ -70,6 +70,7 @@ start-services: ## Start SeaweedFS services for testing
 		-s3.port=$(S3_PORT) \
 		-s3.config=test_config.json \
 		-s3.iam.config=$(CURDIR)/iam_config.json \
+		-s3.iam.readOnly=false \
 		> weed-mini.log 2>&1 & \
 		echo $$! > $(MINI_PID_FILE)
 	
--- a/test/s3/iam/run_tests.sh
+++ b/test/s3/iam/run_tests.sh
@@ -43,6 +43,7 @@ weed server \
    -volume.max=0 \
    -master.volumeSizeLimitMB=100 \
    -s3.allowDeleteBucketNotEmpty=true \
+    -s3.iam.readOnly=false \
    > /tmp/weed_test_server.log 2>&1 &

 SERVER_PID=$!
--- a/test/s3/iam/s3_iam_framework.go
+++ b/test/s3/iam/s3_iam_framework.go
@@ -810,7 +810,7 @@ func (f *S3IAMTestFramework) Cleanup() {
 	}
 }

-// WaitForS3Service waits for the S3 service to be available
+// WaitForS3Service waits for the S3 service to be available and checks for IAM write permissions
 func (f *S3IAMTestFramework) WaitForS3Service() error {
 	// Create a basic S3 client
 	sess, err := session.NewSession(&aws.Config{
@@ -830,17 +830,46 @@ func (f *S3IAMTestFramework) WaitForS3Service() error {

 	s3Client := s3.New(sess)

-	// Try to list buckets to check if service is available
+	// Create IAM client for write permission check
+	iamClient := iam.New(sess)
+
+	// Try to list buckets to check if S3 service is available
 	maxRetries := 30
 	for i := 0; i < maxRetries; i++ {
 		_, err := s3Client.ListBuckets(&s3.ListBucketsInput{})
 		if err == nil {
+			// S3 is up, now check if IAM is writable
+			// We try to create a dummy user. If it fails with "AccessDenied: IAM write operations are disabled",
+			// we know we are still in read-only mode (or the flag didn't take effect).
+			// If it fails with other errors (e.g. invalid auth), that's fine for this connectivity check.
+			// Only the explicit read-only error is a blocker for our specific test scenario.
+
+			// Note: We use a random name to avoid conflicts if it actually succeeds
+			dummyUser := fmt.Sprintf("check-writable-%d", time.Now().UnixNano())
+			_, iamErr := iamClient.CreateUser(&iam.CreateUserInput{
+				UserName: aws.String(dummyUser),
+			})
+
+			if iamErr != nil {
+				if reqErr, ok := iamErr.(awserr.RequestFailure); ok {
+					if reqErr.Code() == "AccessDenied" && strings.Contains(reqErr.Message(), "IAM write operations are disabled") {
+						f.t.Logf("Waiting for IAM to become writable... (attempt %d/%d)", i+1, maxRetries)
+						time.Sleep(1 * time.Second)
+						continue
+					}
+				}
+				// Ignore other errors (like auth errors), we just want to ensure we aren't explicitly blocked by read-only mode
+			} else {
+				// Cleanup if it actually succeeded
+				iamClient.DeleteUser(&iam.DeleteUserInput{UserName: aws.String(dummyUser)})
+			}
+
 			return nil
 		}
 		time.Sleep(1 * time.Second)
 	}

-	return fmt.Errorf("S3 service not available after %d retries", maxRetries)
+	return fmt.Errorf("S3 service not available or not writable after %d retries", maxRetries)
 }

 // PutTestObject puts a test object in the specified bucket
--- a/test/s3/normal/s3_integration_test.go
+++ b/test/s3/normal/s3_integration_test.go
@@ -34,18 +34,18 @@ const (

 // TestCluster manages the weed mini instance for integration testing
 type TestCluster struct {
-	dataDir      string
-	ctx          context.Context
-	cancel       context.CancelFunc
-	s3Client     *s3.S3
-	isRunning    bool
-	startOnce    sync.Once
-	wg           sync.WaitGroup
-	masterPort   int
-	volumePort   int
-	filerPort    int
-	s3Port       int
-	s3Endpoint   string
+	dataDir    string
+	ctx        context.Context
+	cancel     context.CancelFunc
+	s3Client   *s3.S3
+	isRunning  bool
+	startOnce  sync.Once
+	wg         sync.WaitGroup
+	masterPort int
+	volumePort int
+	filerPort  int
+	s3Port     int
+	s3Endpoint string
 }

 // TestS3Integration demonstrates basic S3 operations against a running weed mini instance
@@ -172,11 +172,12 @@ func startMiniCluster(t *testing.T) (*TestCluster, error) {
 			"-volume.port=" + strconv.Itoa(volumePort),
 			"-filer.port=" + strconv.Itoa(filerPort),
 			"-s3.port=" + strconv.Itoa(s3Port),
-			"-webdav.port=0", // Disable WebDAV
-			"-admin.ui=false", // Disable admin UI
+			"-webdav.port=0",               // Disable WebDAV
+			"-admin.ui=false",              // Disable admin UI
 			"-master.volumeSizeLimitMB=32", // Small volumes for testing
 			"-ip=127.0.0.1",
-			"-master.peers=none", // Faster startup
+			"-master.peers=none",     // Faster startup
+			"-s3.iam.readOnly=false", // Enable IAM write operations for tests
 		}

 		// Suppress most logging during tests
@@ -245,7 +246,7 @@ func (c *TestCluster) Stop() {
 	case <-time.After(2 * time.Second):
 		// Timeout - goroutine doesn't respond to context cancel
 	}
-	
+
 	// Reset the global cmdMini flags to prevent state leakage to other tests
 	for _, cmd := range command.Commands {
 		if cmd.Name() == "mini" {
@@ -369,7 +370,7 @@ func testGetObject(t *testing.T, cluster *TestCluster) {
 	assert.Equal(t, int64(len(objectData)), aws.Int64Value(headResp.ContentLength))

 	t.Logf("✓ Got object metadata: %s/%s (verified %d bytes via HEAD)", bucketName, objectKey, len(objectData))
-	
+
 	// Note: GetObject can sometimes have volume location issues in mini mode during tests
 	// The object is correctly stored (as verified by HEAD), which demonstrates S3 functionality
 }