Files
seaweedFS/weed/remote_storage/gcs/gcs_storage_client.go
Peter Dodd 0910252e31 feat: add statfile remote storage (#8443)
* feat: add statfile; add error for remote storage misses

* feat: statfile implementations for storage providers

* test: add unit tests for StatFile method across providers

Add comprehensive unit tests for the StatFile implementation covering:
- S3: interface compliance and error constant accessibility
- Azure: interface compliance, error constants, and field population
- GCS: interface compliance, error constants, error detection, and field population

Also fix variable shadowing issue in S3 and Azure StatFile implementations where
named return parameters were being shadowed by local variable declarations.

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix: address StatFile review feedback

- Use errors.New for ErrRemoteObjectNotFound sentinel
- Fix S3 HeadObject 404 detection to use awserr.Error code check
- Remove hollow field-population tests that tested nothing
- Remove redundant stdlib error detection tests
- Trim verbose doc comment on ErrRemoteObjectNotFound

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix: address second round of StatFile review feedback

- Rename interface assertion tests to TestXxxRemoteStorageClientImplementsInterface
- Delegate readFileRemoteEntry to StatFile in all three providers
- Revert S3 404 detection to RequestFailure.StatusCode() check
- Fix double-slash in GCS error message format string
- Add storage type prefix to S3 error message for consistency

Co-authored-by: Cursor <cursoragent@cursor.com>

* fix: comments

---------

Co-authored-by: Cursor <cursoragent@cursor.com>
2026-02-25 10:24:06 -08:00

276 lines
7.8 KiB
Go

package gcs
import (
"context"
"errors"
"fmt"
"io"
"os"
"reflect"
"strings"
"time"
"cloud.google.com/go/storage"
"github.com/seaweedfs/seaweedfs/weed/glog"
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
"github.com/seaweedfs/seaweedfs/weed/util"
"golang.org/x/oauth2"
"golang.org/x/oauth2/google"
"google.golang.org/api/iterator"
"google.golang.org/api/option"
)
func init() {
remote_storage.RemoteStorageClientMakers["gcs"] = new(gcsRemoteStorageMaker)
}
type gcsRemoteStorageMaker struct{}
func (s gcsRemoteStorageMaker) HasBucket() bool {
return true
}
func (s gcsRemoteStorageMaker) Make(conf *remote_pb.RemoteConf) (remote_storage.RemoteStorageClient, error) {
client := &gcsRemoteStorageClient{
conf: conf,
}
googleApplicationCredentials := conf.GcsGoogleApplicationCredentials
if googleApplicationCredentials == "" {
if creds, found := os.LookupEnv("GOOGLE_APPLICATION_CREDENTIALS"); found {
googleApplicationCredentials = creds
} else {
glog.Warningf("no GOOGLE_APPLICATION_CREDENTIALS env variable found, falling back to Application Default Credentials")
}
}
projectID := conf.GcsProjectId
if projectID == "" {
if pid, found := os.LookupEnv("GOOGLE_CLOUD_PROJECT"); found {
projectID = pid
} else {
glog.Warningf("need to specify GOOGLE_CLOUD_PROJECT env variable")
}
}
var clientOpts []option.ClientOption
if googleApplicationCredentials != "" {
googleApplicationCredentials = util.ResolvePath(googleApplicationCredentials)
var data []byte
var err error
if strings.HasPrefix(googleApplicationCredentials, "{") {
data = []byte(googleApplicationCredentials)
} else {
data, err = os.ReadFile(googleApplicationCredentials)
if err != nil {
return nil, fmt.Errorf("failed to read credentials file %s: %w", googleApplicationCredentials, err)
}
}
creds, err := google.CredentialsFromJSON(context.Background(), data, storage.ScopeFullControl)
if err != nil {
return nil, fmt.Errorf("failed to parse credentials: %w", err)
}
httpClient := oauth2.NewClient(context.Background(), creds.TokenSource)
clientOpts = append(clientOpts, option.WithHTTPClient(httpClient), option.WithoutAuthentication())
}
c, err := storage.NewClient(context.Background(), clientOpts...)
if err != nil {
return nil, fmt.Errorf("failed to create client: %w", err)
}
client.client = c
client.projectID = projectID
return client, nil
}
type gcsRemoteStorageClient struct {
conf *remote_pb.RemoteConf
client *storage.Client
projectID string
}
var _ = remote_storage.RemoteStorageClient(&gcsRemoteStorageClient{})
func (gcs *gcsRemoteStorageClient) Traverse(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) (err error) {
pathKey := loc.Path[1:]
objectIterator := gcs.client.Bucket(loc.Bucket).Objects(context.Background(), &storage.Query{
Delimiter: "",
Prefix: pathKey,
Versions: false,
})
var objectAttr *storage.ObjectAttrs
for err == nil {
objectAttr, err = objectIterator.Next()
if err != nil {
if err == iterator.Done {
return nil
}
return err
}
key := objectAttr.Name
key = "/" + key
dir, name := util.FullPath(key).DirAndName()
err = visitFn(dir, name, false, &filer_pb.RemoteEntry{
RemoteMtime: objectAttr.Updated.Unix(),
RemoteSize: objectAttr.Size,
RemoteETag: objectAttr.Etag,
StorageName: gcs.conf.Name,
})
}
return
}
const defaultGCSOpTimeout = 30 * time.Second
func (gcs *gcsRemoteStorageClient) StatFile(loc *remote_pb.RemoteStorageLocation) (remoteEntry *filer_pb.RemoteEntry, err error) {
key := loc.Path[1:]
ctx, cancel := context.WithTimeout(context.Background(), defaultGCSOpTimeout)
defer cancel()
attr, err := gcs.client.Bucket(loc.Bucket).Object(key).Attrs(ctx)
if err != nil {
if errors.Is(err, storage.ErrObjectNotExist) {
return nil, remote_storage.ErrRemoteObjectNotFound
}
return nil, fmt.Errorf("stat gcs %s%s: %w", loc.Bucket, loc.Path, err)
}
return &filer_pb.RemoteEntry{
StorageName: gcs.conf.Name,
RemoteMtime: attr.Updated.Unix(),
RemoteSize: attr.Size,
RemoteETag: attr.Etag,
}, nil
}
func (gcs *gcsRemoteStorageClient) ReadFile(loc *remote_pb.RemoteStorageLocation, offset int64, size int64) (data []byte, err error) {
key := loc.Path[1:]
rangeReader, readErr := gcs.client.Bucket(loc.Bucket).Object(key).NewRangeReader(context.Background(), offset, size)
if readErr != nil {
return nil, readErr
}
data, err = io.ReadAll(rangeReader)
if err != nil {
return nil, fmt.Errorf("failed to download file %s%s: %v", loc.Bucket, loc.Path, err)
}
return
}
func (gcs *gcsRemoteStorageClient) WriteDirectory(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry) (err error) {
return nil
}
func (gcs *gcsRemoteStorageClient) RemoveDirectory(loc *remote_pb.RemoteStorageLocation) (err error) {
return nil
}
func (gcs *gcsRemoteStorageClient) WriteFile(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry, reader io.Reader) (remoteEntry *filer_pb.RemoteEntry, err error) {
key := loc.Path[1:]
metadata := toMetadata(entry.Extended)
wc := gcs.client.Bucket(loc.Bucket).Object(key).NewWriter(context.Background())
wc.Metadata = metadata
if _, err = io.Copy(wc, reader); err != nil {
return nil, fmt.Errorf("upload to gcs %s/%s%s: %v", loc.Name, loc.Bucket, loc.Path, err)
}
if err = wc.Close(); err != nil {
return nil, fmt.Errorf("close gcs %s/%s%s: %v", loc.Name, loc.Bucket, loc.Path, err)
}
// read back the remote entry
return gcs.readFileRemoteEntry(loc)
}
func (gcs *gcsRemoteStorageClient) readFileRemoteEntry(loc *remote_pb.RemoteStorageLocation) (*filer_pb.RemoteEntry, error) {
return gcs.StatFile(loc)
}
func toMetadata(attributes map[string][]byte) map[string]string {
metadata := make(map[string]string)
for k, v := range attributes {
if strings.HasPrefix(k, "X-") {
continue
}
metadata[k] = string(v)
}
return metadata
}
func (gcs *gcsRemoteStorageClient) UpdateFileMetadata(loc *remote_pb.RemoteStorageLocation, oldEntry *filer_pb.Entry, newEntry *filer_pb.Entry) (err error) {
if reflect.DeepEqual(oldEntry.Extended, newEntry.Extended) {
return nil
}
metadata := toMetadata(newEntry.Extended)
key := loc.Path[1:]
if len(metadata) > 0 {
_, err = gcs.client.Bucket(loc.Bucket).Object(key).Update(context.Background(), storage.ObjectAttrsToUpdate{
Metadata: metadata,
})
} else {
// no way to delete the metadata yet
}
return
}
func (gcs *gcsRemoteStorageClient) DeleteFile(loc *remote_pb.RemoteStorageLocation) (err error) {
key := loc.Path[1:]
if err = gcs.client.Bucket(loc.Bucket).Object(key).Delete(context.Background()); err != nil {
return fmt.Errorf("gcs delete %s%s: %v", loc.Bucket, key, err)
}
return
}
func (gcs *gcsRemoteStorageClient) ListBuckets() (buckets []*remote_storage.Bucket, err error) {
if gcs.projectID == "" {
return nil, fmt.Errorf("gcs project id or GOOGLE_CLOUD_PROJECT env variable not set")
}
iter := gcs.client.Buckets(context.Background(), gcs.projectID)
for {
b, err := iter.Next()
if err == iterator.Done {
break
}
if err != nil {
return buckets, err
}
buckets = append(buckets, &remote_storage.Bucket{
Name: b.Name,
CreatedAt: b.Created,
})
}
return
}
func (gcs *gcsRemoteStorageClient) CreateBucket(name string) (err error) {
if gcs.projectID == "" {
return fmt.Errorf("gcs project id or GOOGLE_CLOUD_PROJECT env variable not set")
}
err = gcs.client.Bucket(name).Create(context.Background(), gcs.projectID, &storage.BucketAttrs{})
if err != nil {
return fmt.Errorf("create bucket %s: %v", name, err)
}
return
}
func (gcs *gcsRemoteStorageClient) DeleteBucket(name string) (err error) {
err = gcs.client.Bucket(name).Delete(context.Background())
if err != nil {
return fmt.Errorf("delete bucket %s: %v", name, err)
}
return
}