s3: fix remote object not caching (#7790)

* s3: fix remote object not caching

* s3: address review comments for remote object caching

- Fix leading slash in object name by using strings.TrimPrefix
- Return cached entry from CacheRemoteObjectToLocalCluster to get updated local chunk locations
- Reuse existing helper function instead of inline gRPC call

* s3/filer: add singleflight deduplication for remote object caching

- Add singleflight.Group to FilerServer to deduplicate concurrent cache operations
- Wrap CacheRemoteObjectToLocalCluster with singleflight to ensure only one
  caching operation runs per object when multiple clients request the same file
- Add early-return check for already-cached objects
- S3 API calls filer gRPC with timeout and graceful fallback on error
- Clear negative bucket cache when bucket is created via weed shell
- Add integration tests for remote cache with singleflight deduplication

This benefits all clients (S3, HTTP, Hadoop) accessing remote-mounted objects
by preventing redundant cache operations and improving concurrent access performance.

Fixes: https://github.com/seaweedfs/seaweedfs/discussions/7599

* fix: data race in concurrent remote object caching

- Add mutex to protect chunks slice from concurrent append
- Add mutex to protect fetchAndWriteErr from concurrent read/write
- Fix incorrect error check (was checking assignResult.Error instead of parseErr)
- Rename inner variable to avoid shadowing fetchAndWriteErr

* fix: address code review comments

- Remove duplicate remote caching block in GetObjectHandler, keep only singleflight version
- Add mutex protection for concurrent chunk slice and error access (data race fix)
- Use lazy initialization for S3 client in tests to avoid panic during package load
- Fix markdown linting: add language specifier to code fence, blank lines around tables
- Add 'all' target to Makefile as alias for test-with-server
- Remove unused 'util' import

* style: remove emojis from test files

* fix: add defensive checks and sort chunks by offset

- Add nil check and type assertion check for singleflight result
- Sort chunks by offset after concurrent fetching to maintain file order

* fix: improve test diagnostics and path normalization

- runWeedShell now returns error for better test diagnostics
- Add all targets to .PHONY in Makefile (logs-primary, logs-remote, health)
- Strip leading slash from normalizedObject to avoid double slashes in path

---------

Co-authored-by: chrislu <chris.lu@gmail.com>
Co-authored-by: Chris Lu <chrislusf@users.noreply.github.com>
This commit is contained in:
G-OD
2025-12-16 20:41:04 +00:00
committed by GitHub
parent 697b56003d
commit 504b258258
13 changed files with 992 additions and 36 deletions

View File

@@ -0,0 +1,210 @@
# Remote Storage Cache Integration Tests
# Tests the remote object caching functionality with singleflight deduplication
# Uses two SeaweedFS instances: primary (with caching) and secondary (as remote storage)
.PHONY: all help build-weed check-deps start-remote stop-remote start-primary stop-primary \
setup-remote test test-with-server clean logs logs-primary logs-remote health
all: test-with-server
# Configuration
WEED_BINARY := ../../../weed/weed_binary
# Primary SeaweedFS (the one being tested - has remote caching)
PRIMARY_S3_PORT := 8333
PRIMARY_FILER_PORT := 8888
PRIMARY_MASTER_PORT := 9333
PRIMARY_VOLUME_PORT := 8080
PRIMARY_METRICS_PORT := 9324
PRIMARY_DIR := ./test-primary-data
# Secondary SeaweedFS (acts as "remote" S3 storage)
REMOTE_S3_PORT := 8334
REMOTE_FILER_PORT := 8889
REMOTE_MASTER_PORT := 9334
REMOTE_VOLUME_PORT := 8081
REMOTE_METRICS_PORT := 9325
REMOTE_DIR := ./test-remote-data
# Test configuration
TEST_TIMEOUT := 10m
TEST_PATTERN := TestRemoteCache
# Buckets
REMOTE_BUCKET := remotesourcebucket
# Default target
help:
@echo "Remote Storage Cache Integration Tests"
@echo ""
@echo "Uses two SeaweedFS instances:"
@echo " - Primary (port $(PRIMARY_S3_PORT)): Being tested, has remote caching"
@echo " - Remote (port $(REMOTE_S3_PORT)): Acts as remote S3 storage"
@echo ""
@echo "Available targets:"
@echo " help - Show this help message"
@echo " build-weed - Build the SeaweedFS binary"
@echo " check-deps - Check dependencies"
@echo " start-remote - Start remote SeaweedFS (secondary)"
@echo " stop-remote - Stop remote SeaweedFS"
@echo " start-primary - Start primary SeaweedFS"
@echo " stop-primary - Stop primary SeaweedFS"
@echo " setup-remote - Configure remote storage mount"
@echo " test - Run tests (assumes servers are running)"
@echo " test-with-server - Start servers, run tests, stop servers"
@echo " clean - Clean up all resources"
@echo " logs - Show server logs"
# Build the SeaweedFS binary
build-weed:
@echo "Building SeaweedFS binary..."
@cd ../../../weed && go build -o weed_binary .
@chmod +x $(WEED_BINARY)
@echo "SeaweedFS binary built"
check-deps: build-weed
@echo "Checking dependencies..."
@command -v go >/dev/null 2>&1 || (echo "Go is required" && exit 1)
@test -f $(WEED_BINARY) || (echo "SeaweedFS binary not found" && exit 1)
@echo "All dependencies available"
# Start remote SeaweedFS (acts as the "remote" S3 storage)
start-remote: check-deps
@echo "Starting remote SeaweedFS (secondary instance)..."
@rm -f remote-server.pid
@mkdir -p $(REMOTE_DIR)
@$(WEED_BINARY) server \
-s3 \
-s3.port=$(REMOTE_S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-filer \
-filer.port=$(REMOTE_FILER_PORT) \
-master.port=$(REMOTE_MASTER_PORT) \
-volume.port=$(REMOTE_VOLUME_PORT) \
-master.volumeSizeLimitMB=50 \
-volume.max=100 \
-dir=$(REMOTE_DIR) \
-volume.preStopSeconds=1 \
-metricsPort=$(REMOTE_METRICS_PORT) \
> remote-weed.log 2>&1 & echo $$! > remote-server.pid
@echo "Waiting for remote SeaweedFS to start..."
@for i in $$(seq 1 60); do \
if curl -s http://localhost:$(REMOTE_S3_PORT) >/dev/null 2>&1; then \
echo "Remote SeaweedFS started on port $(REMOTE_S3_PORT)"; \
exit 0; \
fi; \
sleep 1; \
done; \
echo "ERROR: Remote SeaweedFS failed to start"; \
cat remote-weed.log; \
exit 1
stop-remote:
@echo "Stopping remote SeaweedFS..."
@if [ -f remote-server.pid ]; then \
kill -TERM $$(cat remote-server.pid) 2>/dev/null || true; \
sleep 2; \
kill -KILL $$(cat remote-server.pid) 2>/dev/null || true; \
rm -f remote-server.pid; \
fi
@echo "Remote SeaweedFS stopped"
# Start primary SeaweedFS (the one being tested)
start-primary: check-deps
@echo "Starting primary SeaweedFS..."
@rm -f primary-server.pid
@mkdir -p $(PRIMARY_DIR)
@$(WEED_BINARY) server \
-s3 \
-s3.port=$(PRIMARY_S3_PORT) \
-s3.allowDeleteBucketNotEmpty=true \
-filer \
-filer.port=$(PRIMARY_FILER_PORT) \
-master.port=$(PRIMARY_MASTER_PORT) \
-volume.port=$(PRIMARY_VOLUME_PORT) \
-master.volumeSizeLimitMB=50 \
-volume.max=100 \
-dir=$(PRIMARY_DIR) \
-volume.preStopSeconds=1 \
-metricsPort=$(PRIMARY_METRICS_PORT) \
> primary-weed.log 2>&1 & echo $$! > primary-server.pid
@echo "Waiting for primary SeaweedFS to start..."
@for i in $$(seq 1 60); do \
if curl -s http://localhost:$(PRIMARY_S3_PORT) >/dev/null 2>&1; then \
echo "Primary SeaweedFS started on port $(PRIMARY_S3_PORT)"; \
exit 0; \
fi; \
sleep 1; \
done; \
echo "ERROR: Primary SeaweedFS failed to start"; \
cat primary-weed.log; \
exit 1
stop-primary:
@echo "Stopping primary SeaweedFS..."
@if [ -f primary-server.pid ]; then \
kill -TERM $$(cat primary-server.pid) 2>/dev/null || true; \
sleep 2; \
kill -KILL $$(cat primary-server.pid) 2>/dev/null || true; \
rm -f primary-server.pid; \
fi
@echo "Primary SeaweedFS stopped"
# Create bucket on remote and configure remote storage mount on primary
setup-remote:
@echo "Creating bucket on remote SeaweedFS..."
@curl -s -X PUT "http://localhost:$(REMOTE_S3_PORT)/$(REMOTE_BUCKET)" || echo "Bucket may already exist"
@sleep 1
@echo "Configuring remote storage on primary..."
@printf 'remote.configure -name=seaweedremote -type=s3 -s3.access_key=any -s3.secret_key=any -s3.endpoint=http://localhost:$(REMOTE_S3_PORT) -s3.region=us-east-1\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.configure done"
@sleep 2
@echo "Mounting remote bucket on primary..."
@printf 'remote.mount -dir=/buckets/remotemounted -remote=seaweedremote/$(REMOTE_BUCKET) -nonempty\nexit\n' | $(WEED_BINARY) shell -master=localhost:$(PRIMARY_MASTER_PORT) 2>&1 || echo "remote.mount done"
@sleep 1
@echo "Remote storage configured"
# Run tests
test: check-deps
@echo "Running remote cache tests..."
@go test -v -timeout=$(TEST_TIMEOUT) -run "$(TEST_PATTERN)" .
@echo "Tests completed"
# Full test workflow
test-with-server: start-remote start-primary
@sleep 3
@$(MAKE) setup-remote || (echo "Remote setup failed" && $(MAKE) stop-primary stop-remote && exit 1)
@sleep 2
@echo "Running remote cache tests..."
@$(MAKE) test || (echo "Tests failed" && tail -50 primary-weed.log && $(MAKE) stop-primary stop-remote && exit 1)
@$(MAKE) stop-primary stop-remote
@echo "All tests passed"
# Show logs
logs:
@echo "=== Primary SeaweedFS Logs ==="
@if [ -f primary-weed.log ]; then tail -50 primary-weed.log; else echo "No log file"; fi
@echo ""
@echo "=== Remote SeaweedFS Logs ==="
@if [ -f remote-weed.log ]; then tail -50 remote-weed.log; else echo "No log file"; fi
logs-primary:
@if [ -f primary-weed.log ]; then tail -f primary-weed.log; else echo "No log file"; fi
logs-remote:
@if [ -f remote-weed.log ]; then tail -f remote-weed.log; else echo "No log file"; fi
# Clean up
clean:
@$(MAKE) stop-primary
@$(MAKE) stop-remote
@rm -f primary-weed.log remote-weed.log primary-server.pid remote-server.pid
@rm -rf $(PRIMARY_DIR) $(REMOTE_DIR)
@rm -f remote_cache.test
@go clean -testcache
@echo "Cleanup completed"
# Health check
health:
@echo "Checking server status..."
@curl -s http://localhost:$(PRIMARY_S3_PORT) >/dev/null 2>&1 && echo "Primary S3 ($(PRIMARY_S3_PORT)): UP" || echo "Primary S3 ($(PRIMARY_S3_PORT)): DOWN"
@curl -s http://localhost:$(REMOTE_S3_PORT) >/dev/null 2>&1 && echo "Remote S3 ($(REMOTE_S3_PORT)): UP" || echo "Remote S3 ($(REMOTE_S3_PORT)): DOWN"

View File

@@ -0,0 +1,157 @@
# Remote Object Cache Integration Tests
This directory contains integration tests for the remote object caching feature with singleflight deduplication.
## Test Flow
Each test follows this pattern:
1. **Write to local** - Upload data to primary SeaweedFS (local storage)
2. **Uncache** - Push data to remote storage and remove local chunks
3. **Read** - Read data (triggers caching from remote back to local)
This tests the full remote caching workflow including singleflight deduplication.
## Architecture
```text
┌─────────────────────────────────────────────────────────────────┐
│ Test Client │
│ │
│ 1. PUT data to primary SeaweedFS │
│ 2. remote.cache.uncache (push to remote, purge local) │
│ 3. GET data (triggers caching from remote) │
│ 4. Verify singleflight deduplication │
└──────────────────────────────────┬──────────────────────────────┘
┌─────────────────┴─────────────────┐
▼ ▼
┌────────────────────────────────────┐ ┌────────────────────────────────┐
│ Primary SeaweedFS │ │ Remote SeaweedFS │
│ (port 8333) │ │ (port 8334) │
│ │ │ │
│ - Being tested │ │ - Acts as "remote" S3 │
│ - Has remote storage mounted │──▶│ - Receives uncached data │
│ - Caches remote objects │ │ - Serves data for caching │
│ - Singleflight deduplication │ │ │
└────────────────────────────────────┘ └────────────────────────────────┘
```
## What's Being Tested
1. **Basic Remote Caching**: Write → Uncache → Read workflow
2. **Singleflight Deduplication**: Concurrent reads only trigger ONE caching operation
3. **Large Object Caching**: 5MB files cache correctly
4. **Range Requests**: Partial reads work with cached objects
5. **Not Found Handling**: Proper error for non-existent objects
## Quick Start
### Run Full Test Suite (Recommended)
```bash
# Build SeaweedFS, start both servers, run tests, stop servers
make test-with-server
```
### Manual Steps
```bash
# 1. Build SeaweedFS binary
make build-weed
# 2. Start remote SeaweedFS (acts as "remote" storage)
make start-remote
# 3. Start primary SeaweedFS (the one being tested)
make start-primary
# 4. Configure remote storage mount
make setup-remote
# 5. Run tests
make test
# 6. Clean up
make clean
```
## Configuration
### Primary SeaweedFS (Being Tested)
| Service | Port |
|---------|------|
| S3 API | 8333 |
| Filer | 8888 |
| Master | 9333 |
| Volume | 8080 |
### Remote SeaweedFS (Remote Storage)
| Service | Port |
|---------|------|
| S3 API | 8334 |
| Filer | 8889 |
| Master | 9334 |
| Volume | 8081 |
## Makefile Targets
```bash
make help # Show all available targets
make build-weed # Build SeaweedFS binary
make start-remote # Start remote SeaweedFS
make start-primary # Start primary SeaweedFS
make setup-remote # Configure remote storage mount
make test # Run tests
make test-with-server # Full automated test workflow
make logs # Show server logs
make health # Check server status
make clean # Stop servers and clean up
```
## Test Details
### TestRemoteCacheBasic
Basic workflow test:
1. Write object to primary (local)
2. Uncache (push to remote, remove local chunks)
3. Read (triggers caching from remote)
4. Read again (from local cache - should be faster)
### TestRemoteCacheConcurrent
Singleflight deduplication test:
1. Write 1MB object
2. Uncache to remote
3. Launch 10 concurrent reads
4. All should succeed with correct data
5. Only ONE caching operation should run (singleflight)
### TestRemoteCacheLargeObject
Large file test (5MB) to verify chunked transfer works correctly.
### TestRemoteCacheRangeRequest
Tests HTTP range requests work correctly after caching.
### TestRemoteCacheNotFound
Tests proper error handling for non-existent objects.
## Troubleshooting
### View logs
```bash
make logs # Show recent logs from both servers
make logs-primary # Follow primary logs in real-time
make logs-remote # Follow remote logs in real-time
```
### Check server health
```bash
make health
```
### Clean up and retry
```bash
make clean
make test-with-server
```

View File

@@ -0,0 +1,375 @@
package remote_cache
import (
"bytes"
"fmt"
"io"
"net/http"
"os"
"os/exec"
"strings"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/aws/aws-sdk-go/aws"
"github.com/aws/aws-sdk-go/aws/credentials"
"github.com/aws/aws-sdk-go/aws/session"
"github.com/aws/aws-sdk-go/service/s3"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// Test configuration
// Uses two SeaweedFS instances:
// - Primary: The one being tested (has remote caching)
// - Remote: Acts as the "remote" S3 storage
const (
// Primary SeaweedFS
primaryEndpoint = "http://localhost:8333"
primaryMasterPort = "9333"
// Remote SeaweedFS (acts as remote storage)
remoteEndpoint = "http://localhost:8334"
// Credentials (anonymous access for testing)
accessKey = "any"
secretKey = "any"
// Bucket name - mounted on primary as remote storage
testBucket = "remotemounted"
// Path to weed binary
weedBinary = "../../../weed/weed_binary"
)
var (
primaryClient *s3.S3
primaryClientOnce sync.Once
)
func getPrimaryClient() *s3.S3 {
primaryClientOnce.Do(func() {
primaryClient = createS3Client(primaryEndpoint)
})
return primaryClient
}
func createS3Client(endpoint string) *s3.S3 {
sess, err := session.NewSession(&aws.Config{
Region: aws.String("us-east-1"),
Endpoint: aws.String(endpoint),
Credentials: credentials.NewStaticCredentials(accessKey, secretKey, ""),
DisableSSL: aws.Bool(!strings.HasPrefix(endpoint, "https")),
S3ForcePathStyle: aws.Bool(true),
})
if err != nil {
panic(fmt.Sprintf("failed to create session: %v", err))
}
return s3.New(sess)
}
// skipIfNotRunning skips the test if the servers aren't running
func skipIfNotRunning(t *testing.T) {
resp, err := http.Get(primaryEndpoint)
if err != nil {
t.Skipf("Primary SeaweedFS not running at %s: %v", primaryEndpoint, err)
}
resp.Body.Close()
resp, err = http.Get(remoteEndpoint)
if err != nil {
t.Skipf("Remote SeaweedFS not running at %s: %v", remoteEndpoint, err)
}
resp.Body.Close()
}
// runWeedShell executes a weed shell command
func runWeedShell(t *testing.T, command string) (string, error) {
cmd := exec.Command(weedBinary, "shell", "-master=localhost:"+primaryMasterPort)
cmd.Stdin = strings.NewReader(command + "\nexit\n")
output, err := cmd.CombinedOutput()
if err != nil {
t.Logf("weed shell command '%s' failed: %v, output: %s", command, err, string(output))
return string(output), err
}
return string(output), nil
}
// uploadToPrimary uploads an object to the primary SeaweedFS (local write)
func uploadToPrimary(t *testing.T, key string, data []byte) {
_, err := getPrimaryClient().PutObject(&s3.PutObjectInput{
Bucket: aws.String(testBucket),
Key: aws.String(key),
Body: bytes.NewReader(data),
})
require.NoError(t, err, "failed to upload to primary SeaweedFS")
}
// getFromPrimary gets an object from primary SeaweedFS
func getFromPrimary(t *testing.T, key string) []byte {
resp, err := getPrimaryClient().GetObject(&s3.GetObjectInput{
Bucket: aws.String(testBucket),
Key: aws.String(key),
})
require.NoError(t, err, "failed to get from primary SeaweedFS")
defer resp.Body.Close()
data, err := io.ReadAll(resp.Body)
require.NoError(t, err, "failed to read response body")
return data
}
// syncToRemote syncs local data to remote storage
func syncToRemote(t *testing.T) {
t.Log("Syncing to remote storage...")
output, err := runWeedShell(t, "remote.cache.uncache -dir=/buckets/"+testBucket+" -include=*")
if err != nil {
t.Logf("syncToRemote warning: %v", err)
}
t.Log(output)
time.Sleep(1 * time.Second)
}
// uncacheLocal purges the local cache, forcing data to be fetched from remote
func uncacheLocal(t *testing.T, pattern string) {
t.Logf("Purging local cache for pattern: %s", pattern)
output, err := runWeedShell(t, fmt.Sprintf("remote.uncache -dir=/buckets/%s -include=%s", testBucket, pattern))
if err != nil {
t.Logf("uncacheLocal warning: %v", err)
}
t.Log(output)
time.Sleep(500 * time.Millisecond)
}
// TestRemoteCacheBasic tests the basic caching workflow:
// 1. Write to local
// 2. Uncache (push to remote, remove local chunks)
// 3. Read (triggers caching from remote)
func TestRemoteCacheBasic(t *testing.T) {
skipIfNotRunning(t)
testKey := fmt.Sprintf("test-basic-%d.txt", time.Now().UnixNano())
testData := []byte("Hello, this is test data for remote caching!")
// Step 1: Write to local
t.Log("Step 1: Writing object to primary SeaweedFS (local)...")
uploadToPrimary(t, testKey, testData)
// Verify it's readable
result := getFromPrimary(t, testKey)
assert.Equal(t, testData, result, "initial read mismatch")
// Step 2: Uncache - push to remote and remove local chunks
t.Log("Step 2: Uncaching (pushing to remote, removing local chunks)...")
uncacheLocal(t, testKey)
// Step 3: Read - this should trigger caching from remote
t.Log("Step 3: Reading object (should trigger caching from remote)...")
start := time.Now()
result = getFromPrimary(t, testKey)
firstReadDuration := time.Since(start)
assert.Equal(t, testData, result, "data mismatch after cache")
t.Logf("First read (from remote) took %v", firstReadDuration)
// Step 4: Read again - should be from local cache
t.Log("Step 4: Reading again (should be from local cache)...")
start = time.Now()
result = getFromPrimary(t, testKey)
secondReadDuration := time.Since(start)
assert.Equal(t, testData, result, "data mismatch on cached read")
t.Logf("Second read (from cache) took %v", secondReadDuration)
t.Log("Basic caching test passed")
}
// TestRemoteCacheConcurrent tests that concurrent reads of the same
// remote object only trigger ONE caching operation (singleflight deduplication)
func TestRemoteCacheConcurrent(t *testing.T) {
skipIfNotRunning(t)
testKey := fmt.Sprintf("test-concurrent-%d.txt", time.Now().UnixNano())
// Use larger data to make caching take measurable time
testData := make([]byte, 1024*1024) // 1MB
for i := range testData {
testData[i] = byte(i % 256)
}
// Step 1: Write to local
t.Log("Step 1: Writing 1MB object to primary SeaweedFS...")
uploadToPrimary(t, testKey, testData)
// Verify it's readable
result := getFromPrimary(t, testKey)
assert.Equal(t, len(testData), len(result), "initial size mismatch")
// Step 2: Uncache
t.Log("Step 2: Uncaching (pushing to remote)...")
uncacheLocal(t, testKey)
// Step 3: Launch many concurrent reads - singleflight should deduplicate
numRequests := 10
var wg sync.WaitGroup
var successCount atomic.Int32
var errorCount atomic.Int32
results := make(chan []byte, numRequests)
t.Logf("Step 3: Launching %d concurrent requests...", numRequests)
startTime := time.Now()
for i := 0; i < numRequests; i++ {
wg.Add(1)
go func(idx int) {
defer wg.Done()
resp, err := getPrimaryClient().GetObject(&s3.GetObjectInput{
Bucket: aws.String(testBucket),
Key: aws.String(testKey),
})
if err != nil {
t.Logf("Request %d failed: %v", idx, err)
errorCount.Add(1)
return
}
defer resp.Body.Close()
data, err := io.ReadAll(resp.Body)
if err != nil {
t.Logf("Request %d read failed: %v", idx, err)
errorCount.Add(1)
return
}
results <- data
successCount.Add(1)
}(i)
}
wg.Wait()
close(results)
totalDuration := time.Since(startTime)
t.Logf("All %d requests completed in %v", numRequests, totalDuration)
t.Logf("Successful: %d, Failed: %d", successCount.Load(), errorCount.Load())
// Verify all successful requests returned correct data
for data := range results {
assert.Equal(t, len(testData), len(data), "data length mismatch")
}
// All requests should succeed
assert.Equal(t, int32(numRequests), successCount.Load(), "some requests failed")
assert.Equal(t, int32(0), errorCount.Load(), "no requests should fail")
t.Log("Concurrent caching test passed")
}
// TestRemoteCacheLargeObject tests caching of larger objects
func TestRemoteCacheLargeObject(t *testing.T) {
skipIfNotRunning(t)
testKey := fmt.Sprintf("test-large-%d.bin", time.Now().UnixNano())
// 5MB object
testData := make([]byte, 5*1024*1024)
for i := range testData {
testData[i] = byte(i % 256)
}
// Step 1: Write to local
t.Log("Step 1: Writing 5MB object to primary SeaweedFS...")
uploadToPrimary(t, testKey, testData)
// Verify it's readable
result := getFromPrimary(t, testKey)
assert.Equal(t, len(testData), len(result), "initial size mismatch")
// Step 2: Uncache
t.Log("Step 2: Uncaching...")
uncacheLocal(t, testKey)
// Step 3: Read from remote
t.Log("Step 3: Reading 5MB object (should cache from remote)...")
start := time.Now()
result = getFromPrimary(t, testKey)
duration := time.Since(start)
assert.Equal(t, len(testData), len(result), "size mismatch")
assert.Equal(t, testData, result, "data mismatch")
t.Logf("Large object cached in %v", duration)
t.Log("Large object caching test passed")
}
// TestRemoteCacheRangeRequest tests that range requests work after caching
func TestRemoteCacheRangeRequest(t *testing.T) {
skipIfNotRunning(t)
testKey := fmt.Sprintf("test-range-%d.txt", time.Now().UnixNano())
testData := []byte("0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ")
// Write, uncache, then test range request
t.Log("Writing and uncaching object...")
uploadToPrimary(t, testKey, testData)
uncacheLocal(t, testKey)
// Range request should work and trigger caching
t.Log("Testing range request (bytes 10-19)...")
resp, err := getPrimaryClient().GetObject(&s3.GetObjectInput{
Bucket: aws.String(testBucket),
Key: aws.String(testKey),
Range: aws.String("bytes=10-19"),
})
require.NoError(t, err)
defer resp.Body.Close()
rangeData, err := io.ReadAll(resp.Body)
require.NoError(t, err)
expected := testData[10:20] // "ABCDEFGHIJ"
assert.Equal(t, expected, rangeData, "range data mismatch")
t.Logf("Range request returned: %s", string(rangeData))
t.Log("Range request test passed")
}
// TestRemoteCacheNotFound tests that non-existent objects return proper errors
func TestRemoteCacheNotFound(t *testing.T) {
skipIfNotRunning(t)
testKey := fmt.Sprintf("non-existent-object-%d", time.Now().UnixNano())
_, err := getPrimaryClient().GetObject(&s3.GetObjectInput{
Bucket: aws.String(testBucket),
Key: aws.String(testKey),
})
assert.Error(t, err, "should get error for non-existent object")
t.Logf("Got expected error: %v", err)
t.Log("Not found test passed")
}
// TestMain sets up and tears down the test environment
func TestMain(m *testing.M) {
if !isServerRunning(primaryEndpoint) {
fmt.Println("WARNING: Primary SeaweedFS not running at", primaryEndpoint)
fmt.Println(" Run 'make test-with-server' to start servers automatically")
}
if !isServerRunning(remoteEndpoint) {
fmt.Println("WARNING: Remote SeaweedFS not running at", remoteEndpoint)
fmt.Println(" Run 'make test-with-server' to start servers automatically")
}
os.Exit(m.Run())
}
func isServerRunning(url string) bool {
resp, err := http.Get(url)
if err != nil {
return false
}
resp.Body.Close()
return true
}