tier storage: support downloading the remote dat files

This commit is contained in:
Chris Lu
2019-12-25 09:53:13 -08:00
parent 3ebeae0c0b
commit d960b3474a
11 changed files with 775 additions and 256 deletions

View File

@@ -47,7 +47,7 @@ func (c *commandEcDecode) Do(args []string, commandEnv *CommandEnv, writer io.Wr
vid := needle.VolumeId(*volumeId)
// collect topology information
topologyInfo, err := collectTopologyInfoForEcDecode(ctx, commandEnv)
topologyInfo, err := collectTopologyInfo(ctx, commandEnv)
if err != nil {
return err
}
@@ -203,7 +203,7 @@ func collectEcShards(ctx context.Context, commandEnv *CommandEnv, nodeToEcIndexB
}
func collectTopologyInfoForEcDecode(ctx context.Context, commandEnv *CommandEnv) (topoInfo *master_pb.TopologyInfo, err error) {
func collectTopologyInfo(ctx context.Context, commandEnv *CommandEnv) (topoInfo *master_pb.TopologyInfo, err error) {
var resp *master_pb.VolumeListResponse
err = commandEnv.MasterClient.WithClient(ctx, func(client master_pb.SeaweedClient) error {

View File

@@ -0,0 +1,167 @@
package shell
import (
"context"
"flag"
"fmt"
"io"
"google.golang.org/grpc"
"github.com/chrislusf/seaweedfs/weed/operation"
"github.com/chrislusf/seaweedfs/weed/pb/master_pb"
"github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
)
func init() {
Commands = append(Commands, &commandVolumeTierDownload{})
}
type commandVolumeTierDownload struct {
}
func (c *commandVolumeTierDownload) Name() string {
return "volume.tier.download"
}
func (c *commandVolumeTierDownload) Help() string {
return `move the dat file of a volume to a remote tier
volume.tier.download [-collection=""]
volume.tier.download [-collection=""] -volumeId=<volume_id>
e.g.:
volume.tier.download -volumeId=7
volume.tier.download -volumeId=7
This command will download the dat file of a volume from a remote tier to a volume server in local cluster.
`
}
func (c *commandVolumeTierDownload) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
volumeId := tierCommand.Int("volumeId", 0, "the volume id")
collection := tierCommand.String("collection", "", "the collection name")
if err = tierCommand.Parse(args); err != nil {
return nil
}
ctx := context.Background()
vid := needle.VolumeId(*volumeId)
// collect topology information
topologyInfo, err := collectTopologyInfo(ctx, commandEnv)
if err != nil {
return err
}
// volumeId is provided
if vid != 0 {
return doVolumeTierDownload(ctx, commandEnv, writer, *collection, vid)
}
// apply to all volumes in the collection
// reusing collectVolumeIdsForEcEncode for now
volumeIds := collectRemoteVolumes(topologyInfo, *collection)
if err != nil {
return err
}
fmt.Printf("tier download volumes: %v\n", volumeIds)
for _, vid := range volumeIds {
if err = doVolumeTierDownload(ctx, commandEnv, writer, *collection, vid); err != nil {
return err
}
}
return nil
}
func collectRemoteVolumes(topoInfo *master_pb.TopologyInfo, selectedCollection string) (vids []needle.VolumeId) {
vidMap := make(map[uint32]bool)
eachDataNode(topoInfo, func(dc string, rack RackId, dn *master_pb.DataNodeInfo) {
for _, v := range dn.VolumeInfos {
if v.Collection == selectedCollection && v.RemoteStorageKey != "" && v.RemoteStorageName != "" {
vidMap[v.Id] = true
}
}
})
for vid := range vidMap {
vids = append(vids, needle.VolumeId(vid))
}
return
}
func doVolumeTierDownload(ctx context.Context, commandEnv *CommandEnv, writer io.Writer, collection string, vid needle.VolumeId) (err error) {
// find volume location
locations, found := commandEnv.MasterClient.GetLocations(uint32(vid))
if !found {
return fmt.Errorf("volume %d not found", vid)
}
// TODO parallelize this
for _, loc := range locations {
// copy the .dat file from remote tier to local
err = downloadDatFromRemoteTier(ctx, commandEnv.option.GrpcDialOption, writer, needle.VolumeId(vid), collection, loc.Url)
if err != nil {
return fmt.Errorf("download dat file for volume %d to %s: %v", vid, loc.Url, err)
}
}
return nil
}
func downloadDatFromRemoteTier(ctx context.Context, grpcDialOption grpc.DialOption, writer io.Writer, volumeId needle.VolumeId, collection string, targetVolumeServer string) error {
err := operation.WithVolumeServerClient(targetVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
stream, downloadErr := volumeServerClient.VolumeTierMoveDatFromRemote(ctx, &volume_server_pb.VolumeTierMoveDatFromRemoteRequest{
VolumeId: uint32(volumeId),
Collection: collection,
})
var lastProcessed int64
for {
resp, recvErr := stream.Recv()
if recvErr != nil {
if recvErr == io.EOF {
break
} else {
return recvErr
}
}
processingSpeed := float64(resp.Processed-lastProcessed) / 1024.0 / 1024.0
fmt.Fprintf(writer, "downloaded %.2f%%, %d bytes, %.2fMB/s\n", resp.ProcessedPercentage, resp.Processed, processingSpeed)
lastProcessed = resp.Processed
}
if downloadErr != nil {
return downloadErr
}
_, unmountErr := volumeServerClient.VolumeUnmount(ctx, &volume_server_pb.VolumeUnmountRequest{
VolumeId: uint32(volumeId),
})
if unmountErr != nil {
return unmountErr
}
_, mountErr := volumeServerClient.VolumeMount(ctx, &volume_server_pb.VolumeMountRequest{
VolumeId: uint32(volumeId),
})
if mountErr != nil {
return mountErr
}
return nil
})
return err
}

View File

@@ -7,24 +7,25 @@ import (
"io"
"time"
"google.golang.org/grpc"
"github.com/chrislusf/seaweedfs/weed/operation"
"github.com/chrislusf/seaweedfs/weed/pb/volume_server_pb"
"github.com/chrislusf/seaweedfs/weed/storage/needle"
"google.golang.org/grpc"
)
func init() {
Commands = append(Commands, &commandVolumeTier{})
Commands = append(Commands, &commandVolumeTierUpload{})
}
type commandVolumeTier struct {
type commandVolumeTierUpload struct {
}
func (c *commandVolumeTier) Name() string {
func (c *commandVolumeTierUpload) Name() string {
return "volume.tier.upload"
}
func (c *commandVolumeTier) Help() string {
func (c *commandVolumeTierUpload) Help() string {
return `move the dat file of a volume to a remote tier
volume.tier.upload [-collection=""] [-fullPercent=95] [-quietFor=1h]
@@ -53,7 +54,7 @@ func (c *commandVolumeTier) Help() string {
`
}
func (c *commandVolumeTier) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
func (c *commandVolumeTierUpload) Do(args []string, commandEnv *CommandEnv, writer io.Writer) (err error) {
tierCommand := flag.NewFlagSet(c.Name(), flag.ContinueOnError)
volumeId := tierCommand.Int("volumeId", 0, "the volume id")
@@ -71,7 +72,7 @@ func (c *commandVolumeTier) Do(args []string, commandEnv *CommandEnv, writer io.
// volumeId is provided
if vid != 0 {
return doVolumeTier(ctx, commandEnv, writer, *collection, vid, *dest, *keepLocalDatFile)
return doVolumeTierUpload(ctx, commandEnv, writer, *collection, vid, *dest, *keepLocalDatFile)
}
// apply to all volumes in the collection
@@ -80,9 +81,9 @@ func (c *commandVolumeTier) Do(args []string, commandEnv *CommandEnv, writer io.
if err != nil {
return err
}
fmt.Printf("tiering volumes: %v\n", volumeIds)
fmt.Printf("tier upload volumes: %v\n", volumeIds)
for _, vid := range volumeIds {
if err = doVolumeTier(ctx, commandEnv, writer, *collection, vid, *dest, *keepLocalDatFile); err != nil {
if err = doVolumeTierUpload(ctx, commandEnv, writer, *collection, vid, *dest, *keepLocalDatFile); err != nil {
return err
}
}
@@ -90,23 +91,20 @@ func (c *commandVolumeTier) Do(args []string, commandEnv *CommandEnv, writer io.
return nil
}
func doVolumeTier(ctx context.Context, commandEnv *CommandEnv, writer io.Writer, collection string, vid needle.VolumeId, dest string, keepLocalDatFile bool) (err error) {
func doVolumeTierUpload(ctx context.Context, commandEnv *CommandEnv, writer io.Writer, collection string, vid needle.VolumeId, dest string, keepLocalDatFile bool) (err error) {
// find volume location
locations, found := commandEnv.MasterClient.GetLocations(uint32(vid))
if !found {
return fmt.Errorf("volume %d not found", vid)
}
// mark the volume as readonly
/*
err = markVolumeReadonly(ctx, commandEnv.option.GrpcDialOption, needle.VolumeId(vid), locations)
if err != nil {
return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err)
}
*/
err = markVolumeReadonly(ctx, commandEnv.option.GrpcDialOption, needle.VolumeId(vid), locations)
if err != nil {
return fmt.Errorf("mark volume %d as readonly on %s: %v", vid, locations[0].Url, err)
}
// copy the .dat file to remote tier
err = copyDatToRemoteTier(ctx, commandEnv.option.GrpcDialOption, writer, needle.VolumeId(vid), collection, locations[0].Url, dest, keepLocalDatFile)
err = uploadDatToRemoteTier(ctx, commandEnv.option.GrpcDialOption, writer, needle.VolumeId(vid), collection, locations[0].Url, dest, keepLocalDatFile)
if err != nil {
return fmt.Errorf("copy dat file for volume %d on %s to %s: %v", vid, locations[0].Url, dest, err)
}
@@ -114,10 +112,10 @@ func doVolumeTier(ctx context.Context, commandEnv *CommandEnv, writer io.Writer,
return nil
}
func copyDatToRemoteTier(ctx context.Context, grpcDialOption grpc.DialOption, writer io.Writer, volumeId needle.VolumeId, collection string, sourceVolumeServer string, dest string, keepLocalDatFile bool) error {
func uploadDatToRemoteTier(ctx context.Context, grpcDialOption grpc.DialOption, writer io.Writer, volumeId needle.VolumeId, collection string, sourceVolumeServer string, dest string, keepLocalDatFile bool) error {
err := operation.WithVolumeServerClient(sourceVolumeServer, grpcDialOption, func(volumeServerClient volume_server_pb.VolumeServerClient) error {
stream, copyErr := volumeServerClient.VolumeTierCopyDatToRemote(ctx, &volume_server_pb.VolumeTierCopyDatToRemoteRequest{
stream, copyErr := volumeServerClient.VolumeTierMoveDatToRemote(ctx, &volume_server_pb.VolumeTierMoveDatToRemoteRequest{
VolumeId: uint32(volumeId),
Collection: collection,
DestinationBackendName: dest,