* feat: add statfile; add error for remote storage misses * feat: statfile implementations for storage providers * test: add unit tests for StatFile method across providers Add comprehensive unit tests for the StatFile implementation covering: - S3: interface compliance and error constant accessibility - Azure: interface compliance, error constants, and field population - GCS: interface compliance, error constants, error detection, and field population Also fix variable shadowing issue in S3 and Azure StatFile implementations where named return parameters were being shadowed by local variable declarations. Co-authored-by: Cursor <cursoragent@cursor.com> * fix: address StatFile review feedback - Use errors.New for ErrRemoteObjectNotFound sentinel - Fix S3 HeadObject 404 detection to use awserr.Error code check - Remove hollow field-population tests that tested nothing - Remove redundant stdlib error detection tests - Trim verbose doc comment on ErrRemoteObjectNotFound Co-authored-by: Cursor <cursoragent@cursor.com> * fix: address second round of StatFile review feedback - Rename interface assertion tests to TestXxxRemoteStorageClientImplementsInterface - Delegate readFileRemoteEntry to StatFile in all three providers - Revert S3 404 detection to RequestFailure.StatusCode() check - Fix double-slash in GCS error message format string - Add storage type prefix to S3 error message for consistency Co-authored-by: Cursor <cursoragent@cursor.com> * fix: comments --------- Co-authored-by: Cursor <cursoragent@cursor.com>
276 lines
7.8 KiB
Go
276 lines
7.8 KiB
Go
package gcs
|
|
|
|
import (
|
|
"context"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"reflect"
|
|
"strings"
|
|
"time"
|
|
|
|
"cloud.google.com/go/storage"
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/remote_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/remote_storage"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
"golang.org/x/oauth2"
|
|
"golang.org/x/oauth2/google"
|
|
"google.golang.org/api/iterator"
|
|
"google.golang.org/api/option"
|
|
)
|
|
|
|
func init() {
|
|
remote_storage.RemoteStorageClientMakers["gcs"] = new(gcsRemoteStorageMaker)
|
|
}
|
|
|
|
type gcsRemoteStorageMaker struct{}
|
|
|
|
func (s gcsRemoteStorageMaker) HasBucket() bool {
|
|
return true
|
|
}
|
|
|
|
func (s gcsRemoteStorageMaker) Make(conf *remote_pb.RemoteConf) (remote_storage.RemoteStorageClient, error) {
|
|
client := &gcsRemoteStorageClient{
|
|
conf: conf,
|
|
}
|
|
|
|
googleApplicationCredentials := conf.GcsGoogleApplicationCredentials
|
|
|
|
if googleApplicationCredentials == "" {
|
|
if creds, found := os.LookupEnv("GOOGLE_APPLICATION_CREDENTIALS"); found {
|
|
googleApplicationCredentials = creds
|
|
} else {
|
|
glog.Warningf("no GOOGLE_APPLICATION_CREDENTIALS env variable found, falling back to Application Default Credentials")
|
|
}
|
|
}
|
|
|
|
projectID := conf.GcsProjectId
|
|
if projectID == "" {
|
|
if pid, found := os.LookupEnv("GOOGLE_CLOUD_PROJECT"); found {
|
|
projectID = pid
|
|
} else {
|
|
glog.Warningf("need to specify GOOGLE_CLOUD_PROJECT env variable")
|
|
}
|
|
}
|
|
|
|
var clientOpts []option.ClientOption
|
|
|
|
if googleApplicationCredentials != "" {
|
|
googleApplicationCredentials = util.ResolvePath(googleApplicationCredentials)
|
|
var data []byte
|
|
var err error
|
|
if strings.HasPrefix(googleApplicationCredentials, "{") {
|
|
data = []byte(googleApplicationCredentials)
|
|
} else {
|
|
data, err = os.ReadFile(googleApplicationCredentials)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to read credentials file %s: %w", googleApplicationCredentials, err)
|
|
}
|
|
}
|
|
creds, err := google.CredentialsFromJSON(context.Background(), data, storage.ScopeFullControl)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to parse credentials: %w", err)
|
|
}
|
|
httpClient := oauth2.NewClient(context.Background(), creds.TokenSource)
|
|
clientOpts = append(clientOpts, option.WithHTTPClient(httpClient), option.WithoutAuthentication())
|
|
}
|
|
|
|
c, err := storage.NewClient(context.Background(), clientOpts...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to create client: %w", err)
|
|
}
|
|
|
|
client.client = c
|
|
client.projectID = projectID
|
|
return client, nil
|
|
}
|
|
|
|
type gcsRemoteStorageClient struct {
|
|
conf *remote_pb.RemoteConf
|
|
client *storage.Client
|
|
projectID string
|
|
}
|
|
|
|
var _ = remote_storage.RemoteStorageClient(&gcsRemoteStorageClient{})
|
|
|
|
func (gcs *gcsRemoteStorageClient) Traverse(loc *remote_pb.RemoteStorageLocation, visitFn remote_storage.VisitFunc) (err error) {
|
|
|
|
pathKey := loc.Path[1:]
|
|
|
|
objectIterator := gcs.client.Bucket(loc.Bucket).Objects(context.Background(), &storage.Query{
|
|
Delimiter: "",
|
|
Prefix: pathKey,
|
|
Versions: false,
|
|
})
|
|
|
|
var objectAttr *storage.ObjectAttrs
|
|
for err == nil {
|
|
objectAttr, err = objectIterator.Next()
|
|
if err != nil {
|
|
if err == iterator.Done {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
|
|
key := objectAttr.Name
|
|
key = "/" + key
|
|
dir, name := util.FullPath(key).DirAndName()
|
|
err = visitFn(dir, name, false, &filer_pb.RemoteEntry{
|
|
RemoteMtime: objectAttr.Updated.Unix(),
|
|
RemoteSize: objectAttr.Size,
|
|
RemoteETag: objectAttr.Etag,
|
|
StorageName: gcs.conf.Name,
|
|
})
|
|
}
|
|
return
|
|
}
|
|
|
|
const defaultGCSOpTimeout = 30 * time.Second
|
|
|
|
func (gcs *gcsRemoteStorageClient) StatFile(loc *remote_pb.RemoteStorageLocation) (remoteEntry *filer_pb.RemoteEntry, err error) {
|
|
key := loc.Path[1:]
|
|
ctx, cancel := context.WithTimeout(context.Background(), defaultGCSOpTimeout)
|
|
defer cancel()
|
|
attr, err := gcs.client.Bucket(loc.Bucket).Object(key).Attrs(ctx)
|
|
if err != nil {
|
|
if errors.Is(err, storage.ErrObjectNotExist) {
|
|
return nil, remote_storage.ErrRemoteObjectNotFound
|
|
}
|
|
return nil, fmt.Errorf("stat gcs %s%s: %w", loc.Bucket, loc.Path, err)
|
|
}
|
|
return &filer_pb.RemoteEntry{
|
|
StorageName: gcs.conf.Name,
|
|
RemoteMtime: attr.Updated.Unix(),
|
|
RemoteSize: attr.Size,
|
|
RemoteETag: attr.Etag,
|
|
}, nil
|
|
}
|
|
|
|
func (gcs *gcsRemoteStorageClient) ReadFile(loc *remote_pb.RemoteStorageLocation, offset int64, size int64) (data []byte, err error) {
|
|
|
|
key := loc.Path[1:]
|
|
rangeReader, readErr := gcs.client.Bucket(loc.Bucket).Object(key).NewRangeReader(context.Background(), offset, size)
|
|
if readErr != nil {
|
|
return nil, readErr
|
|
}
|
|
data, err = io.ReadAll(rangeReader)
|
|
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to download file %s%s: %v", loc.Bucket, loc.Path, err)
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (gcs *gcsRemoteStorageClient) WriteDirectory(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry) (err error) {
|
|
return nil
|
|
}
|
|
|
|
func (gcs *gcsRemoteStorageClient) RemoveDirectory(loc *remote_pb.RemoteStorageLocation) (err error) {
|
|
return nil
|
|
}
|
|
|
|
func (gcs *gcsRemoteStorageClient) WriteFile(loc *remote_pb.RemoteStorageLocation, entry *filer_pb.Entry, reader io.Reader) (remoteEntry *filer_pb.RemoteEntry, err error) {
|
|
|
|
key := loc.Path[1:]
|
|
|
|
metadata := toMetadata(entry.Extended)
|
|
wc := gcs.client.Bucket(loc.Bucket).Object(key).NewWriter(context.Background())
|
|
wc.Metadata = metadata
|
|
if _, err = io.Copy(wc, reader); err != nil {
|
|
return nil, fmt.Errorf("upload to gcs %s/%s%s: %v", loc.Name, loc.Bucket, loc.Path, err)
|
|
}
|
|
if err = wc.Close(); err != nil {
|
|
return nil, fmt.Errorf("close gcs %s/%s%s: %v", loc.Name, loc.Bucket, loc.Path, err)
|
|
}
|
|
|
|
// read back the remote entry
|
|
return gcs.readFileRemoteEntry(loc)
|
|
|
|
}
|
|
|
|
func (gcs *gcsRemoteStorageClient) readFileRemoteEntry(loc *remote_pb.RemoteStorageLocation) (*filer_pb.RemoteEntry, error) {
|
|
return gcs.StatFile(loc)
|
|
}
|
|
|
|
func toMetadata(attributes map[string][]byte) map[string]string {
|
|
metadata := make(map[string]string)
|
|
for k, v := range attributes {
|
|
if strings.HasPrefix(k, "X-") {
|
|
continue
|
|
}
|
|
metadata[k] = string(v)
|
|
}
|
|
return metadata
|
|
}
|
|
|
|
func (gcs *gcsRemoteStorageClient) UpdateFileMetadata(loc *remote_pb.RemoteStorageLocation, oldEntry *filer_pb.Entry, newEntry *filer_pb.Entry) (err error) {
|
|
if reflect.DeepEqual(oldEntry.Extended, newEntry.Extended) {
|
|
return nil
|
|
}
|
|
metadata := toMetadata(newEntry.Extended)
|
|
|
|
key := loc.Path[1:]
|
|
|
|
if len(metadata) > 0 {
|
|
_, err = gcs.client.Bucket(loc.Bucket).Object(key).Update(context.Background(), storage.ObjectAttrsToUpdate{
|
|
Metadata: metadata,
|
|
})
|
|
} else {
|
|
// no way to delete the metadata yet
|
|
}
|
|
|
|
return
|
|
}
|
|
func (gcs *gcsRemoteStorageClient) DeleteFile(loc *remote_pb.RemoteStorageLocation) (err error) {
|
|
key := loc.Path[1:]
|
|
if err = gcs.client.Bucket(loc.Bucket).Object(key).Delete(context.Background()); err != nil {
|
|
return fmt.Errorf("gcs delete %s%s: %v", loc.Bucket, key, err)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (gcs *gcsRemoteStorageClient) ListBuckets() (buckets []*remote_storage.Bucket, err error) {
|
|
if gcs.projectID == "" {
|
|
return nil, fmt.Errorf("gcs project id or GOOGLE_CLOUD_PROJECT env variable not set")
|
|
}
|
|
iter := gcs.client.Buckets(context.Background(), gcs.projectID)
|
|
for {
|
|
b, err := iter.Next()
|
|
if err == iterator.Done {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return buckets, err
|
|
}
|
|
buckets = append(buckets, &remote_storage.Bucket{
|
|
Name: b.Name,
|
|
CreatedAt: b.Created,
|
|
})
|
|
}
|
|
return
|
|
}
|
|
|
|
func (gcs *gcsRemoteStorageClient) CreateBucket(name string) (err error) {
|
|
if gcs.projectID == "" {
|
|
return fmt.Errorf("gcs project id or GOOGLE_CLOUD_PROJECT env variable not set")
|
|
}
|
|
err = gcs.client.Bucket(name).Create(context.Background(), gcs.projectID, &storage.BucketAttrs{})
|
|
if err != nil {
|
|
return fmt.Errorf("create bucket %s: %v", name, err)
|
|
}
|
|
return
|
|
}
|
|
|
|
func (gcs *gcsRemoteStorageClient) DeleteBucket(name string) (err error) {
|
|
err = gcs.client.Bucket(name).Delete(context.Background())
|
|
if err != nil {
|
|
return fmt.Errorf("delete bucket %s: %v", name, err)
|
|
}
|
|
return
|
|
}
|