* fix: move table location mappings to /etc/s3tables to avoid bucket name validation Fixes #8362 - table location mappings were stored under /buckets/.table-location-mappings which fails bucket name validation because it starts with a dot. Moving them to /etc/s3tables resolves the migration error for upgrades. Changes: - Table location mappings now stored under /etc/s3tables - Ensure parent /etc directory exists before creating /etc/s3tables - Normal writes go to new location only (no legacy compatibility) - Removed bucket name validation exception for old location * refactor: simplify lookupTableLocationMapping by removing redundant mappingPath parameter The mappingPath function parameter was redundant as the path can be derived from mappingDir and bucket using path.Join. This simplifies the code and reduces the risk of path mismatches between parameters.
483 lines
15 KiB
Go
483 lines
15 KiB
Go
package s3tables
|
|
|
|
import (
|
|
"crypto/rand"
|
|
"crypto/sha1"
|
|
"encoding/hex"
|
|
"fmt"
|
|
"net/url"
|
|
"path"
|
|
"regexp"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
)
|
|
|
|
const (
|
|
bucketNamePatternStr = `[a-z0-9-]+`
|
|
tableNamespacePatternStr = `[a-z0-9_.]+`
|
|
tableNamePatternStr = `[a-z0-9_]+`
|
|
)
|
|
|
|
const (
|
|
tableLocationMappingsDirPath = "/etc/s3tables"
|
|
tableObjectRootDirName = ".objects"
|
|
)
|
|
|
|
var (
|
|
bucketARNPattern = regexp.MustCompile(`^arn:aws:s3tables:[^:]*:[^:]*:bucket/(` + bucketNamePatternStr + `)$`)
|
|
tableARNPattern = regexp.MustCompile(`^arn:aws:s3tables:[^:]*:[^:]*:bucket/(` + bucketNamePatternStr + `)/table/(` + tableNamespacePatternStr + `)/(` + tableNamePatternStr + `)$`)
|
|
tagPattern = regexp.MustCompile(`^([\p{L}\p{Z}\p{N}_.:/=+\-@]*)$`)
|
|
)
|
|
|
|
// ARN parsing functions
|
|
|
|
// parseBucketNameFromARN extracts bucket name from table bucket ARN
|
|
// ARN format: arn:aws:s3tables:{region}:{account}:bucket/{bucket-name}
|
|
func parseBucketNameFromARN(arn string) (string, error) {
|
|
matches := bucketARNPattern.FindStringSubmatch(arn)
|
|
if len(matches) != 2 {
|
|
return "", fmt.Errorf("invalid bucket ARN: %s", arn)
|
|
}
|
|
bucketName := matches[1]
|
|
if !isValidBucketName(bucketName) {
|
|
return "", fmt.Errorf("invalid bucket name in ARN: %s", bucketName)
|
|
}
|
|
return bucketName, nil
|
|
}
|
|
|
|
// ParseBucketNameFromARN is a wrapper to validate bucket ARN for other packages.
|
|
func ParseBucketNameFromARN(arn string) (string, error) {
|
|
return parseBucketNameFromARN(arn)
|
|
}
|
|
|
|
// parseTableFromARN extracts bucket name, namespace, and table name from ARN
|
|
// ARN format: arn:aws:s3tables:{region}:{account}:bucket/{bucket-name}/table/{namespace}/{table-name}
|
|
func parseTableFromARN(arn string) (bucketName, namespace, tableName string, err error) {
|
|
matches := tableARNPattern.FindStringSubmatch(arn)
|
|
if len(matches) != 4 {
|
|
return "", "", "", fmt.Errorf("invalid table ARN: %s", arn)
|
|
}
|
|
|
|
// Validate bucket name
|
|
bucketName = matches[1]
|
|
if err := validateBucketName(bucketName); err != nil {
|
|
return "", "", "", fmt.Errorf("invalid bucket name in ARN: %v", err)
|
|
}
|
|
|
|
namespace, err = validateNamespace([]string{matches[2]})
|
|
if err != nil {
|
|
return "", "", "", fmt.Errorf("invalid namespace in ARN: %v", err)
|
|
}
|
|
|
|
// URL decode and validate the table name from the ARN path component
|
|
tableNameUnescaped, err := url.PathUnescape(matches[3])
|
|
if err != nil {
|
|
return "", "", "", fmt.Errorf("invalid table name encoding in ARN: %v", err)
|
|
}
|
|
if _, err := validateTableName(tableNameUnescaped); err != nil {
|
|
return "", "", "", fmt.Errorf("invalid table name in ARN: %v", err)
|
|
}
|
|
return bucketName, namespace, tableNameUnescaped, nil
|
|
}
|
|
|
|
// Path helpers
|
|
|
|
// GetTableBucketPath returns the filer path for a table bucket
|
|
func GetTableBucketPath(bucketName string) string {
|
|
return path.Join(TablesPath, bucketName)
|
|
}
|
|
|
|
// GetNamespacePath returns the filer path for a namespace
|
|
func GetNamespacePath(bucketName, namespace string) string {
|
|
return path.Join(TablesPath, bucketName, namespace)
|
|
}
|
|
|
|
// GetTablePath returns the filer path for a table
|
|
func GetTablePath(bucketName, namespace, tableName string) string {
|
|
return path.Join(TablesPath, bucketName, namespace, tableName)
|
|
}
|
|
|
|
// GetTableObjectRootDir returns the root path for table bucket object storage
|
|
func GetTableObjectRootDir() string {
|
|
return path.Join(TablesPath, tableObjectRootDirName)
|
|
}
|
|
|
|
// GetTableObjectBucketPath returns the filer path for table bucket object storage
|
|
func GetTableObjectBucketPath(bucketName string) string {
|
|
return path.Join(GetTableObjectRootDir(), bucketName)
|
|
}
|
|
|
|
// GetTableLocationMappingDir returns the root path for table location bucket mappings
|
|
func GetTableLocationMappingDir() string {
|
|
return tableLocationMappingsDirPath
|
|
}
|
|
|
|
// GetTableLocationMappingPath returns the filer path for a table location bucket mapping
|
|
func GetTableLocationMappingPath(tableLocationBucket string) string {
|
|
return path.Join(GetTableLocationMappingDir(), tableLocationBucket)
|
|
}
|
|
|
|
// GetTableLocationMappingEntryPath returns the filer path for a table-specific mapping entry.
|
|
// Each table gets its own entry so multiple tables can share the same external table-location bucket.
|
|
func GetTableLocationMappingEntryPath(tableLocationBucket, tablePath string) string {
|
|
return path.Join(GetTableLocationMappingPath(tableLocationBucket), tableLocationMappingEntryName(tablePath))
|
|
}
|
|
|
|
func tableLocationMappingEntryName(tablePath string) string {
|
|
normalized := path.Clean("/" + strings.TrimSpace(strings.TrimPrefix(tablePath, "/")))
|
|
sum := sha1.Sum([]byte(normalized))
|
|
return hex.EncodeToString(sum[:])
|
|
}
|
|
|
|
func tableBucketPathFromTablePath(tablePath string) (string, bool) {
|
|
normalized := path.Clean("/" + strings.TrimSpace(strings.TrimPrefix(tablePath, "/")))
|
|
tablesPrefix := strings.TrimSuffix(TablesPath, "/") + "/"
|
|
if !strings.HasPrefix(normalized, tablesPrefix) {
|
|
return "", false
|
|
}
|
|
|
|
remaining := strings.TrimPrefix(normalized, tablesPrefix)
|
|
bucketName, _, _ := strings.Cut(remaining, "/")
|
|
if bucketName == "" {
|
|
return "", false
|
|
}
|
|
return path.Join(TablesPath, bucketName), true
|
|
}
|
|
|
|
// Metadata structures
|
|
|
|
type tableBucketMetadata struct {
|
|
Name string `json:"name"`
|
|
CreatedAt time.Time `json:"createdAt"`
|
|
OwnerAccountID string `json:"ownerAccountId"`
|
|
}
|
|
|
|
// namespaceMetadata stores metadata for a namespace
|
|
type namespaceMetadata struct {
|
|
Namespace []string `json:"namespace"`
|
|
CreatedAt time.Time `json:"createdAt"`
|
|
OwnerAccountID string `json:"ownerAccountId"`
|
|
Properties map[string]string `json:"properties,omitempty"`
|
|
}
|
|
|
|
// tableMetadataInternal stores metadata for a table
|
|
type tableMetadataInternal struct {
|
|
Name string `json:"name"`
|
|
Namespace string `json:"namespace"`
|
|
Format string `json:"format"`
|
|
CreatedAt time.Time `json:"createdAt"`
|
|
ModifiedAt time.Time `json:"modifiedAt"`
|
|
OwnerAccountID string `json:"ownerAccountId"`
|
|
VersionToken string `json:"versionToken"`
|
|
MetadataVersion int `json:"metadataVersion"`
|
|
MetadataLocation string `json:"metadataLocation,omitempty"`
|
|
Metadata *TableMetadata `json:"metadata,omitempty"`
|
|
}
|
|
|
|
// IsTableBucketEntry returns true when the entry is marked as a table bucket.
|
|
func IsTableBucketEntry(entry *filer_pb.Entry) bool {
|
|
if entry == nil || entry.Extended == nil {
|
|
return false
|
|
}
|
|
_, ok := entry.Extended[ExtendedKeyTableBucket]
|
|
return ok
|
|
}
|
|
|
|
// Utility functions
|
|
|
|
// validateBucketName validates bucket name and returns an error if invalid.
|
|
// Bucket names must contain only lowercase letters, numbers, and hyphens.
|
|
// Length must be between 3 and 63 characters.
|
|
// Must start and end with a letter or digit.
|
|
// Reserved prefixes/suffixes are rejected.
|
|
func validateBucketName(name string) error {
|
|
if name == "" {
|
|
return fmt.Errorf("bucket name is required")
|
|
}
|
|
|
|
if len(name) < 3 || len(name) > 63 {
|
|
return fmt.Errorf("bucket name must be between 3 and 63 characters")
|
|
}
|
|
|
|
// Must start and end with a letter or digit
|
|
start := name[0]
|
|
end := name[len(name)-1]
|
|
if !((start >= 'a' && start <= 'z') || (start >= '0' && start <= '9')) {
|
|
return fmt.Errorf("bucket name must start with a letter or digit")
|
|
}
|
|
if !((end >= 'a' && end <= 'z') || (end >= '0' && end <= '9')) {
|
|
return fmt.Errorf("bucket name must end with a letter or digit")
|
|
}
|
|
|
|
// Allowed characters: a-z, 0-9, -
|
|
for i := 0; i < len(name); i++ {
|
|
ch := name[i]
|
|
if (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '-' {
|
|
continue
|
|
}
|
|
return fmt.Errorf("bucket name can only contain lowercase letters, numbers, and hyphens")
|
|
}
|
|
|
|
// Reserved prefixes
|
|
reservedPrefixes := []string{"xn--", "sthree-", "amzn-s3-demo-", "aws"}
|
|
for _, p := range reservedPrefixes {
|
|
if strings.HasPrefix(name, p) {
|
|
return fmt.Errorf("bucket name cannot start with reserved prefix: %s", p)
|
|
}
|
|
}
|
|
|
|
// Reserved suffixes
|
|
reservedSuffixes := []string{"-s3alias", "--ol-s3", "--x-s3", "--table-s3"}
|
|
for _, s := range reservedSuffixes {
|
|
if strings.HasSuffix(name, s) {
|
|
return fmt.Errorf("bucket name cannot end with reserved suffix: %s", s)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// ValidateBucketName validates bucket name and returns an error if invalid.
|
|
func ValidateBucketName(name string) error {
|
|
return validateBucketName(name)
|
|
}
|
|
|
|
func parseTableLocationBucket(metadataLocation string) (string, bool) {
|
|
if !strings.HasPrefix(metadataLocation, "s3://") {
|
|
return "", false
|
|
}
|
|
trimmed := strings.TrimPrefix(metadataLocation, "s3://")
|
|
trimmed = strings.TrimSuffix(trimmed, "/")
|
|
if trimmed == "" {
|
|
return "", false
|
|
}
|
|
bucket, _, _ := strings.Cut(trimmed, "/")
|
|
if bucket == "" || !strings.HasSuffix(bucket, "--table-s3") {
|
|
return "", false
|
|
}
|
|
return bucket, true
|
|
}
|
|
|
|
// BuildBucketARN builds a bucket ARN with the provided region and account ID.
|
|
// If region is empty, the ARN will omit the region field.
|
|
func BuildBucketARN(region, accountID, bucketName string) (string, error) {
|
|
if bucketName == "" {
|
|
return "", fmt.Errorf("bucket name is required")
|
|
}
|
|
if err := validateBucketName(bucketName); err != nil {
|
|
return "", err
|
|
}
|
|
if accountID == "" {
|
|
accountID = DefaultAccountID
|
|
}
|
|
return buildARN(region, accountID, fmt.Sprintf("bucket/%s", bucketName)), nil
|
|
}
|
|
|
|
// BuildTableARN builds a table ARN with the provided region and account ID.
|
|
func BuildTableARN(region, accountID, bucketName, namespace, tableName string) (string, error) {
|
|
if bucketName == "" {
|
|
return "", fmt.Errorf("bucket name is required")
|
|
}
|
|
if err := validateBucketName(bucketName); err != nil {
|
|
return "", err
|
|
}
|
|
if namespace == "" {
|
|
return "", fmt.Errorf("namespace is required")
|
|
}
|
|
normalizedNamespace, err := validateNamespace([]string{namespace})
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if tableName == "" {
|
|
return "", fmt.Errorf("table name is required")
|
|
}
|
|
normalizedTable, err := validateTableName(tableName)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
if accountID == "" {
|
|
accountID = DefaultAccountID
|
|
}
|
|
return buildARN(region, accountID, fmt.Sprintf("bucket/%s/table/%s/%s", bucketName, normalizedNamespace, normalizedTable)), nil
|
|
}
|
|
|
|
func buildARN(region, accountID, resourcePath string) string {
|
|
return fmt.Sprintf("arn:aws:s3tables:%s:%s:%s", region, accountID, resourcePath)
|
|
}
|
|
|
|
// ValidateTags validates tags for S3 Tables.
|
|
func ValidateTags(tags map[string]string) error {
|
|
if len(tags) > 10 {
|
|
return fmt.Errorf("validate tags: %d tags more than 10", len(tags))
|
|
}
|
|
for k, v := range tags {
|
|
if len(k) > 128 {
|
|
return fmt.Errorf("validate tags: tag key longer than 128")
|
|
}
|
|
if !tagPattern.MatchString(k) {
|
|
return fmt.Errorf("validate tags key %s error, incorrect key", k)
|
|
}
|
|
if len(v) > 256 {
|
|
return fmt.Errorf("validate tags: tag value longer than 256")
|
|
}
|
|
if !tagPattern.MatchString(v) {
|
|
return fmt.Errorf("validate tags value %s error, incorrect value", v)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// isValidBucketName validates bucket name characters (kept for compatibility)
|
|
// Deprecated: use validateBucketName instead
|
|
func isValidBucketName(name string) bool {
|
|
return validateBucketName(name) == nil
|
|
}
|
|
|
|
// generateVersionToken generates a unique, unpredictable version token
|
|
func generateVersionToken() string {
|
|
b := make([]byte, 16)
|
|
if _, err := rand.Read(b); err != nil {
|
|
// Fallback to timestamp if crypto/rand fails
|
|
return fmt.Sprintf("%x", time.Now().UnixNano())
|
|
}
|
|
return hex.EncodeToString(b)
|
|
}
|
|
|
|
// splitPath splits a path into directory and name components using stdlib
|
|
func splitPath(p string) (dir, name string) {
|
|
dir = path.Dir(p)
|
|
name = path.Base(p)
|
|
return
|
|
}
|
|
|
|
func validateNamespacePart(name string) error {
|
|
if len(name) < 1 || len(name) > 255 {
|
|
return fmt.Errorf("namespace name must be between 1 and 255 characters")
|
|
}
|
|
|
|
// Prevent path traversal and multi-segment paths
|
|
if name == "." || name == ".." {
|
|
return fmt.Errorf("namespace name cannot be '.' or '..'")
|
|
}
|
|
if strings.Contains(name, "/") {
|
|
return fmt.Errorf("namespace name cannot contain '/'")
|
|
}
|
|
|
|
// Must start and end with a letter or digit
|
|
start := name[0]
|
|
end := name[len(name)-1]
|
|
if !((start >= 'a' && start <= 'z') || (start >= '0' && start <= '9')) {
|
|
return fmt.Errorf("namespace name must start with a letter or digit")
|
|
}
|
|
if !((end >= 'a' && end <= 'z') || (end >= '0' && end <= '9')) {
|
|
return fmt.Errorf("namespace name must end with a letter or digit")
|
|
}
|
|
|
|
// Allowed characters: a-z, 0-9, _
|
|
for _, ch := range name {
|
|
if (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '_' {
|
|
continue
|
|
}
|
|
return fmt.Errorf("invalid namespace name: only 'a-z', '0-9', and '_' are allowed")
|
|
}
|
|
|
|
// Reserved prefix
|
|
if strings.HasPrefix(name, "aws") {
|
|
return fmt.Errorf("namespace name cannot start with reserved prefix 'aws'")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func normalizeNamespace(namespace []string) ([]string, error) {
|
|
if len(namespace) == 0 {
|
|
return nil, fmt.Errorf("namespace is required")
|
|
}
|
|
|
|
parts := namespace
|
|
if len(namespace) == 1 {
|
|
parts = strings.Split(namespace[0], ".")
|
|
}
|
|
|
|
normalized := make([]string, 0, len(parts))
|
|
for _, part := range parts {
|
|
if err := validateNamespacePart(part); err != nil {
|
|
return nil, err
|
|
}
|
|
normalized = append(normalized, part)
|
|
}
|
|
return normalized, nil
|
|
}
|
|
|
|
// validateNamespace validates namespace identifiers and returns an internal namespace key.
|
|
// A single dotted namespace value is interpreted as multi-level namespace for compatibility
|
|
// with path-style APIs, for example "analytics.daily" => ["analytics", "daily"].
|
|
func validateNamespace(namespace []string) (string, error) {
|
|
parts, err := normalizeNamespace(namespace)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
return flattenNamespace(parts), nil
|
|
}
|
|
|
|
// ValidateNamespace is a wrapper to validate namespace for other packages.
|
|
func ValidateNamespace(namespace []string) (string, error) {
|
|
return validateNamespace(namespace)
|
|
}
|
|
|
|
// ParseNamespace parses a namespace string into namespace parts.
|
|
func ParseNamespace(namespace string) ([]string, error) {
|
|
return normalizeNamespace([]string{namespace})
|
|
}
|
|
|
|
// validateTableName validates a table name
|
|
func validateTableName(name string) (string, error) {
|
|
if len(name) < 1 || len(name) > 255 {
|
|
return "", fmt.Errorf("table name must be between 1 and 255 characters")
|
|
}
|
|
if name == "." || name == ".." || strings.Contains(name, "/") {
|
|
return "", fmt.Errorf("invalid table name: cannot be '.', '..' or contain '/'")
|
|
}
|
|
|
|
// First character must be a letter or digit
|
|
start := name[0]
|
|
if !((start >= 'a' && start <= 'z') || (start >= '0' && start <= '9')) {
|
|
return "", fmt.Errorf("table name must start with a letter or digit")
|
|
}
|
|
|
|
// Allowed characters: a-z, 0-9, _
|
|
for _, ch := range name {
|
|
if (ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9') || ch == '_' {
|
|
continue
|
|
}
|
|
return "", fmt.Errorf("invalid table name: only 'a-z', '0-9', and '_' are allowed")
|
|
}
|
|
return name, nil
|
|
}
|
|
|
|
// ValidateTableName is a wrapper to validate table name for other packages.
|
|
func ValidateTableName(name string) (string, error) {
|
|
return validateTableName(name)
|
|
}
|
|
|
|
// flattenNamespace joins namespace elements into a single string (using dots as per AWS S3 Tables)
|
|
func flattenNamespace(namespace []string) string {
|
|
if len(namespace) == 0 {
|
|
return ""
|
|
}
|
|
return strings.Join(namespace, ".")
|
|
}
|
|
|
|
func expandNamespace(namespace string) []string {
|
|
if namespace == "" {
|
|
return nil
|
|
}
|
|
parts, err := ParseNamespace(namespace)
|
|
if err != nil {
|
|
return []string{namespace}
|
|
}
|
|
return parts
|
|
}
|