* feat: add statfile; add error for remote storage misses * feat: statfile implementations for storage providers * test: add unit tests for StatFile method across providers Add comprehensive unit tests for the StatFile implementation covering: - S3: interface compliance and error constant accessibility - Azure: interface compliance, error constants, and field population - GCS: interface compliance, error constants, error detection, and field population Also fix variable shadowing issue in S3 and Azure StatFile implementations where named return parameters were being shadowed by local variable declarations. Co-authored-by: Cursor <cursoragent@cursor.com> * fix: address StatFile review feedback - Use errors.New for ErrRemoteObjectNotFound sentinel - Fix S3 HeadObject 404 detection to use awserr.Error code check - Remove hollow field-population tests that tested nothing - Remove redundant stdlib error detection tests - Trim verbose doc comment on ErrRemoteObjectNotFound Co-authored-by: Cursor <cursoragent@cursor.com> * fix: address second round of StatFile review feedback - Rename interface assertion tests to TestXxxRemoteStorageClientImplementsInterface - Delegate readFileRemoteEntry to StatFile in all three providers - Revert S3 404 detection to RequestFailure.StatusCode() check - Fix double-slash in GCS error message format string - Add storage type prefix to S3 error message for consistency Co-authored-by: Cursor <cursoragent@cursor.com> * fix: comments --------- Co-authored-by: Cursor <cursoragent@cursor.com>
359 lines
12 KiB
Go
359 lines
12 KiB
Go
package azure
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"reflect"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/Azure/azure-sdk-for-go/sdk/azcore"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/azcore/to"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/container"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
const (
|
|
defaultBlockSize = 4 * 1024 * 1024
|
|
defaultConcurrency = 16
|
|
|
|
// DefaultAzureOpTimeout is the timeout for individual Azure blob operations.
|
|
// This should be larger than the maximum time the Azure SDK client will spend
|
|
// retrying. With MaxRetries=3 (4 total attempts) and TryTimeout=10s, the maximum
|
|
// time is roughly 4*10s + delays(~7s) = 47s. We use 60s to provide a reasonable
|
|
// buffer while still failing faster than indefinite hangs.
|
|
DefaultAzureOpTimeout = 60 * time.Second
|
|
)
|
|
|
|
// DefaultAzBlobClientOptions returns the default Azure blob client options
|
|
// with consistent retry configuration across the application.
|
|
// This centralizes the retry policy to ensure uniform behavior between
|
|
// remote storage and replication sink implementations.
|
|
func DefaultAzBlobClientOptions() *azblob.ClientOptions {
|
|
return &azblob.ClientOptions{
|
|
ClientOptions: azcore.ClientOptions{
|
|
Retry: policy.RetryOptions{
|
|
MaxRetries: 3, // Reasonable retry count - aggressive retries mask configuration errors
|
|
TryTimeout: 10 * time.Second, // Reduced from 1 minute to fail faster on auth issues
|
|
RetryDelay: 1 * time.Second,
|
|
MaxRetryDelay: 10 * time.Second,
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
// invalidMetadataChars matches any character that is not valid in Azure metadata keys.
|
|
// Azure metadata keys must be valid C# identifiers: letters, digits, and underscores only.
|
|
var invalidMetadataChars = regexp.MustCompile(`[^a-zA-Z0-9_]`)
|
|
|
|
// sanitizeMetadataKey converts an S3 metadata key to a valid Azure metadata key.
|
|
// Azure metadata keys must be valid C# identifiers (letters, digits, underscores only, cannot start with digit).
|
|
// To prevent collisions, invalid characters are replaced with their hex representation (_XX_).
|
|
// Examples:
|
|
// - "my-key" -> "my_2d_key"
|
|
// - "my.key" -> "my_2e_key"
|
|
// - "key@value" -> "key_40_value"
|
|
func sanitizeMetadataKey(key string) string {
|
|
// Replace each invalid character with _XX_ where XX is the hex code
|
|
result := invalidMetadataChars.ReplaceAllStringFunc(key, func(s string) string {
|
|
return fmt.Sprintf("_%02x_", s[0])
|
|
})
|
|
|
|
// Azure metadata keys cannot start with a digit
|
|
if len(result) > 0 && result[0] >= '0' && result[0] <= '9' {
|
|
result = "_" + result
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func init() {
|
|
remote_storage.RemoteStorageClientMakers["azure"] = new(azureRemoteStorageMaker)
|
|
}
|
|
|
|
type azureRemoteStorageMaker struct{}
|
|
|
|
func (s azureRemoteStorageMaker) HasBucket() bool {
|
|
return true
|
|
}
|
|
|
|
func (s azureRemoteStorageMaker) Make(conf *remote_pb.RemoteConf) (remote_storage.RemoteStorageClient, error) {
|
|
|
|
client := &azureRemoteStorageClient{
|
|
conf: conf,
|
|
}
|
|
|
|
accountName, accountKey := conf.AzureAccountName, conf.AzureAccountKey
|
|
if len(accountName) == 0 || len(accountKey) == 0 {
|
|
accountName, accountKey = os.Getenv("AZURE_STORAGE_ACCOUNT"), os.Getenv("AZURE_STORAGE_ACCESS_KEY")
|
|
if len(accountName) == 0 || len(accountKey) == 0 {
|
|
return nil, fmt.Errorf("either AZURE_STORAGE_ACCOUNT or AZURE_STORAGE_ACCESS_KEY environment variable is not set")
|
|
}
|
|
}
|
|
|
|
// Create credential and client
|
|
credential, err := azblob.NewSharedKeyCredential(accountName, accountKey)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("invalid Azure credential with account name:%s: %w", accountName, err)
|
|
}
|
|
|
|
serviceURL := fmt.Sprintf("https://%s.blob.core.windows.net/", accountName)
|
|
azClient, err := azblob.NewClientWithSharedKeyCredential(serviceURL, credential, DefaultAzBlobClientOptions())
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create Azure client: %w", err)
|
|
}
|
|
|
|
client.client = azClient
|
|
|
|
return client, nil
|
|
}
|
|
|
|
type azureRemoteStorageClient struct {
|
|
conf *remote_pb.RemoteConf
|
|
client *azblob.Client
|
|
}
|
|
|
|
var _ = remote_storage.RemoteStorageClient(&azureRemoteStorageClient{})
|
|
|
|
func (az *azureRemoteStorageClient) StatFile(loc *remote_pb.RemoteStorageLocation) (remoteEntry *filer_pb.RemoteEntry, err error) {
|
|
key := loc.Path[1:]
|
|
ctx, cancel := context.WithTimeout(context.Background(), DefaultAzureOpTimeout)
|
|
defer cancel()
|
|
resp, err := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlobClient(key).GetProperties(ctx, nil)
|
|
if err != nil {
|
|
if bloberror.HasCode(err, bloberror.BlobNotFound) {
|
|
return nil, remote_storage.ErrRemoteObjectNotFound
|
|
}
|
|
return nil, fmt.Errorf("stat azure %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
remoteEntry = &filer_pb.RemoteEntry{
|
|
StorageName: az.conf.Name,
|
|
}
|
|
if resp.ContentLength != nil {
|
|
remoteEntry.RemoteSize = *resp.ContentLength
|
|
}
|
|
if resp.LastModified != nil {
|
|
remoteEntry.RemoteMtime = resp.LastModified.Unix()
|
|
}
|
|
if resp.ETag != nil {
|
|
remoteEntry.RemoteETag = string(*resp.ETag)
|
|
}
|
|
return remoteEntry, nil
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) Traverse(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) (err error) {
|
|
|
|
pathKey := loc.Path[1:]
|
|
containerClient := az.client.ServiceClient().NewContainerClient(loc.Bucket)
|
|
|
|
// List blobs with pager
|
|
pager := containerClient.NewListBlobsFlatPager(&container.ListBlobsFlatOptions{
|
|
Prefix: &pathKey,
|
|
})
|
|
|
|
for pager.More() {
|
|
resp, err := pager.NextPage(context.Background())
|
|
if err != nil {
|
|
return fmt.Errorf("azure traverse %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
|
|
for _, blobItem := range resp.Segment.BlobItems {
|
|
if blobItem.Name == nil {
|
|
continue
|
|
}
|
|
key := "/" + *blobItem.Name
|
|
dir, name := util.FullPath(key).DirAndName()
|
|
|
|
remoteEntry := &filer_pb.RemoteEntry{
|
|
StorageName: az.conf.Name,
|
|
}
|
|
if blobItem.Properties != nil {
|
|
if blobItem.Properties.LastModified != nil {
|
|
remoteEntry.RemoteMtime = blobItem.Properties.LastModified.Unix()
|
|
}
|
|
if blobItem.Properties.ContentLength != nil {
|
|
remoteEntry.RemoteSize = *blobItem.Properties.ContentLength
|
|
}
|
|
if blobItem.Properties.ETag != nil {
|
|
remoteEntry.RemoteETag = string(*blobItem.Properties.ETag)
|
|
}
|
|
}
|
|
|
|
err = visitFn(dir, name, false, remoteEntry)
|
|
if err != nil {
|
|
return fmt.Errorf("azure processing %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) ReadFile(loc *remote_pb.RemoteStorageLocation, offset int64, size int64) (data []byte, err error) {
|
|
|
|
key := loc.Path[1:]
|
|
blobClient := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlockBlobClient(key)
|
|
|
|
count := size
|
|
if count == 0 {
|
|
count = blob.CountToEnd
|
|
}
|
|
downloadResp, err := blobClient.DownloadStream(context.Background(), &blob.DownloadStreamOptions{
|
|
Range: blob.HTTPRange{
|
|
Offset: offset,
|
|
Count: count,
|
|
},
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to download file %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
defer downloadResp.Body.Close()
|
|
|
|
data, err = io.ReadAll(downloadResp.Body)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read download stream %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) WriteDirectory(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry) (err error) {
|
|
return nil
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) RemoveDirectory(loc *remote_pb.RemoteStorageLocation) (err error) {
|
|
return nil
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) WriteFile(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry, reader io.Reader) (remoteEntry *filer_pb.RemoteEntry, err error) {
|
|
|
|
key := loc.Path[1:]
|
|
blobClient := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlockBlobClient(key)
|
|
|
|
// Upload from reader
|
|
metadata := toMetadata(entry.Extended)
|
|
httpHeaders := &blob.HTTPHeaders{}
|
|
if entry.Attributes != nil && entry.Attributes.Mime != "" {
|
|
httpHeaders.BlobContentType = &entry.Attributes.Mime
|
|
}
|
|
|
|
_, err = blobClient.UploadStream(context.Background(), reader, &blockblob.UploadStreamOptions{
|
|
BlockSize: defaultBlockSize,
|
|
Concurrency: defaultConcurrency,
|
|
HTTPHeaders: httpHeaders,
|
|
Metadata: metadata,
|
|
})
|
|
if err != nil {
|
|
return nil, fmt.Errorf("azure upload to %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
|
|
// read back the remote entry
|
|
return az.readFileRemoteEntry(loc)
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) readFileRemoteEntry(loc *remote_pb.RemoteStorageLocation) (*filer_pb.RemoteEntry, error) {
|
|
return az.StatFile(loc)
|
|
}
|
|
|
|
func toMetadata(attributes map[string][]byte) map[string]*string {
|
|
metadata := make(map[string]*string)
|
|
for k, v := range attributes {
|
|
if strings.HasPrefix(k, s3_constants.AmzUserMetaPrefix) {
|
|
// S3 stores metadata keys in lowercase; normalize for consistency.
|
|
key := strings.ToLower(k[len(s3_constants.AmzUserMetaPrefix):])
|
|
|
|
// Sanitize key to prevent collisions and ensure Azure compliance
|
|
key = sanitizeMetadataKey(key)
|
|
|
|
val := string(v)
|
|
metadata[key] = &val
|
|
}
|
|
}
|
|
return metadata
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) UpdateFileMetadata(loc *remote_pb.RemoteStorageLocation, oldEntry *filer_pb.Entry, newEntry *filer_pb.Entry) (err error) {
|
|
if reflect.DeepEqual(oldEntry.Extended, newEntry.Extended) {
|
|
return nil
|
|
}
|
|
metadata := toMetadata(newEntry.Extended)
|
|
|
|
key := loc.Path[1:]
|
|
blobClient := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlobClient(key)
|
|
|
|
_, err = blobClient.SetMetadata(context.Background(), metadata, nil)
|
|
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) DeleteFile(loc *remote_pb.RemoteStorageLocation) (err error) {
|
|
key := loc.Path[1:]
|
|
blobClient := az.client.ServiceClient().NewContainerClient(loc.Bucket).NewBlobClient(key)
|
|
|
|
_, err = blobClient.Delete(context.Background(), &blob.DeleteOptions{
|
|
DeleteSnapshots: to.Ptr(blob.DeleteSnapshotsOptionTypeInclude),
|
|
})
|
|
if err != nil {
|
|
// Make delete idempotent - don't return error if blob doesn't exist
|
|
if bloberror.HasCode(err, bloberror.BlobNotFound) {
|
|
return nil
|
|
}
|
|
return fmt.Errorf("azure delete %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) ListBuckets() (buckets []*remote_storage.Bucket, err error) {
|
|
pager := az.client.NewListContainersPager(nil)
|
|
|
|
for pager.More() {
|
|
resp, err := pager.NextPage(context.Background())
|
|
if err != nil {
|
|
return buckets, err
|
|
}
|
|
|
|
for _, containerItem := range resp.ContainerItems {
|
|
if containerItem.Name != nil {
|
|
bucket := &remote_storage.Bucket{
|
|
Name: *containerItem.Name,
|
|
}
|
|
if containerItem.Properties != nil && containerItem.Properties.LastModified != nil {
|
|
bucket.CreatedAt = *containerItem.Properties.LastModified
|
|
}
|
|
buckets = append(buckets, bucket)
|
|
}
|
|
}
|
|
}
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) CreateBucket(name string) (err error) {
|
|
containerClient := az.client.ServiceClient().NewContainerClient(name)
|
|
_, err = containerClient.Create(context.Background(), nil)
|
|
if err != nil {
|
|
return fmt.Errorf("create bucket %s: %w", name, err)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (az *azureRemoteStorageClient) DeleteBucket(name string) (err error) {
|
|
containerClient := az.client.ServiceClient().NewContainerClient(name)
|
|
_, err = containerClient.Delete(context.Background(), nil)
|
|
if err != nil {
|
|
return fmt.Errorf("delete bucket %s: %w", name, err)
|
|
}
|
|
return
|
|
}
|