fix(replication): resume partial chunk reads on EOF instead of re-downloading (#8607)
* fix(replication): resume partial chunk reads on EOF instead of re-downloading When replicating chunks and the source connection drops mid-transfer, accumulate the bytes already received and retry with a Range header to fetch only the remaining bytes. This avoids re-downloading potentially large chunks from scratch on each retry, reducing load on busy source servers and speeding up recovery. * test(replication): add tests for downloadWithRange including gzip partial reads Tests cover: - No offset (no Range header sent) - With offset (Range header verified) - Content-Disposition filename extraction - Partial read + resume: server drops connection mid-transfer, client resumes with Range from the offset of received bytes - Gzip partial read + resume: first response is gzip-encoded (Go auto- decompresses), connection drops, resume request gets decompressed data (Go doesn't add Accept-Encoding when Range is set, so the server decompresses), combined bytes match original * fix(replication): address PR review comments - Consolidate downloadWithRange into DownloadFile with optional offset parameter (variadic), eliminating code duplication (DRY) - Validate HTTP response status: require 206 + correct Content-Range when offset > 0, reject when server ignores Range header - Use if/else for fullData assignment for clarity - Add test for rejected Range (server returns 200 instead of 206) * refactor(replication): remove unused ReplicationSource interface The interface was never referenced and its signature didn't match the actual FilerSource.ReadPart method. --------- Co-authored-by: Copilot <copilot@github.com>
This commit is contained in:
@@ -3,7 +3,6 @@ package source
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
@@ -18,10 +17,6 @@ import (
|
||||
util_http "github.com/seaweedfs/seaweedfs/weed/util/http"
|
||||
)
|
||||
|
||||
type ReplicationSource interface {
|
||||
ReadPart(part string) io.ReadCloser
|
||||
}
|
||||
|
||||
type FilerSource struct {
|
||||
grpcAddress string
|
||||
grpcDialOption grpc.DialOption
|
||||
@@ -104,14 +99,14 @@ func (fs *FilerSource) LookupFileId(ctx context.Context, part string) (fileUrls
|
||||
return
|
||||
}
|
||||
|
||||
func (fs *FilerSource) ReadPart(fileId string) (filename string, header http.Header, resp *http.Response, err error) {
|
||||
func (fs *FilerSource) ReadPart(fileId string, offset int64) (filename string, header http.Header, resp *http.Response, err error) {
|
||||
|
||||
if fs.proxyByFiler {
|
||||
filename, header, resp, err = util_http.DownloadFile("http://"+fs.address+"/?proxyChunkId="+fileId, "")
|
||||
filename, header, resp, err = util_http.DownloadFile("http://"+fs.address+"/?proxyChunkId="+fileId, "", offset)
|
||||
if err != nil {
|
||||
glog.V(0).Infof("read part %s via filer proxy %s: %v", fileId, fs.address, err)
|
||||
glog.V(0).Infof("read part %s via filer proxy %s offset %d: %v", fileId, fs.address, offset, err)
|
||||
} else {
|
||||
glog.V(4).Infof("read part %s via filer proxy %s content-length:%s", fileId, fs.address, header.Get("Content-Length"))
|
||||
glog.V(4).Infof("read part %s via filer proxy %s offset %d content-length:%s", fileId, fs.address, offset, header.Get("Content-Length"))
|
||||
}
|
||||
return
|
||||
}
|
||||
@@ -122,11 +117,11 @@ func (fs *FilerSource) ReadPart(fileId string) (filename string, header http.Hea
|
||||
}
|
||||
|
||||
for _, fileUrl := range fileUrls {
|
||||
filename, header, resp, err = util_http.DownloadFile(fileUrl, "")
|
||||
filename, header, resp, err = util_http.DownloadFile(fileUrl, "", offset)
|
||||
if err != nil {
|
||||
glog.V(0).Infof("fail to read part %s from %s: %v", fileId, fileUrl, err)
|
||||
glog.V(0).Infof("fail to read part %s from %s offset %d: %v", fileId, fileUrl, offset, err)
|
||||
} else {
|
||||
glog.V(4).Infof("read part %s from %s content-length:%s", fileId, fileUrl, header.Get("Content-Length"))
|
||||
glog.V(4).Infof("read part %s from %s offset %d content-length:%s", fileId, fileUrl, offset, header.Get("Content-Length"))
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user