generate ec01~ec14, generate ecx file with sorted needle values

This commit is contained in:
Chris Lu
2019-05-18 22:46:24 -07:00
parent 12dc6608f0
commit 87f63b9c08
8 changed files with 189 additions and 48 deletions

View File

@@ -1,9 +1,11 @@
package erasure_coding
import (
"fmt"
"io"
"os"
"github.com/chrislusf/seaweedfs/weed/glog"
"github.com/klauspost/reedsolomon"
)
@@ -14,7 +16,46 @@ const (
ErasureCodingSmallBlockSize = 1024 * 1024 // 1MB
)
func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte) error {
func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
bufferSize := int64(len(buffers[0]))
batchCount := blockSize/bufferSize
if blockSize%bufferSize!=0 {
glog.Fatalf("unexpected block size %d buffer size %d", blockSize, bufferSize)
}
for b := int64(0); b < batchCount; b++ {
err := encodeDataOneBatch(file, enc, startOffset+b*bufferSize, blockSize, buffers, outputs)
if err != nil {
return err
}
}
return nil
}
func openEcFiles(baseFileName string) (files []*os.File, err error){
for i := 0; i< DataShardsCount+ParityShardsCount; i++{
fname := fmt.Sprintf("%s.ec%02d", baseFileName, i+1)
f, err := os.OpenFile(fname, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return files, fmt.Errorf("failed to open file %s: %v", fname, err)
}
files = append(files, f)
}
return
}
func closeEcFiles(files []*os.File){
for _, f := range files{
if f != nil {
f.Close()
}
}
}
func encodeDataOneBatch(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize int64, buffers [][]byte, outputs []*os.File) error {
// read data into buffers
for i := 0; i < DataShardsCount; i++ {
@@ -36,5 +77,12 @@ func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize i
return err
}
for i := 0; i < DataShardsCount+ParityShardsCount; i++ {
_, err := outputs[i].Write(buffers[i])
if err != nil {
return err
}
}
return nil
}

View File

@@ -1,9 +1,13 @@
package erasure_coding
import (
"fmt"
"os"
"testing"
"github.com/chrislusf/seaweedfs/weed/storage"
"github.com/chrislusf/seaweedfs/weed/storage/needle_map"
"github.com/chrislusf/seaweedfs/weed/storage/types"
"github.com/klauspost/reedsolomon"
)
@@ -11,8 +15,9 @@ func TestEncodingDecoding(t *testing.T) {
largeBlockSize := int64(10000)
smallBlockSize := int64(100)
bufferSize := 50
baseFileName := "1"
file, err := os.OpenFile("1.dat", os.O_RDONLY, 0)
file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
if err != nil {
t.Logf("failed to open dat file: %v", err)
}
@@ -22,31 +27,95 @@ func TestEncodingDecoding(t *testing.T) {
t.Logf("failed to stat dat file: %v", err)
}
remainingSize := fi.Size()
var processedSize int64
enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
if err != nil {
t.Logf("failed to create encoder: %v", err)
t.Logf("failed to stat dat file: %v", err)
}
file.Close()
err = writeSortedEcxFiles(baseFileName)
if err != nil {
t.Logf("writeSortedEcxFiles: %v", err)
}
buffers := make([][]byte, DataShardsCount+ParityShardsCount)
for i, _ := range buffers {
buffers[i] = make([]byte, bufferSize)
}
for remainingSize > largeBlockSize*DataShardsCount {
encodeData(file, enc, processedSize, largeBlockSize, buffers)
remainingSize -= largeBlockSize * DataShardsCount
processedSize += largeBlockSize * DataShardsCount
}
for remainingSize > 0 {
encodeData(file, enc, processedSize, smallBlockSize, buffers)
remainingSize -= smallBlockSize * DataShardsCount
processedSize += smallBlockSize * DataShardsCount
err = validateFiles(baseFileName)
if err != nil {
t.Logf("writeSortedEcxFiles: %v", err)
}
}
func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
var processedSize int64
enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
if err != nil {
return fmt.Errorf("failed to create encoder: %v", err)
}
buffers := make([][]byte, DataShardsCount+ParityShardsCount)
outputs, err := openEcFiles(baseFileName)
defer closeEcFiles(outputs)
if err != nil {
return fmt.Errorf("failed to open dat file: %v", err)
}
for i, _ := range buffers {
buffers[i] = make([]byte, bufferSize)
}
for remainingSize > largeBlockSize*DataShardsCount {
err = encodeData(file, enc, processedSize, largeBlockSize, buffers, outputs)
if err != nil {
return fmt.Errorf("failed to encode large chunk data: %v", err)
}
remainingSize -= largeBlockSize * DataShardsCount
processedSize += largeBlockSize * DataShardsCount
}
for remainingSize > 0 {
encodeData(file, enc, processedSize, smallBlockSize, buffers, outputs)
if err != nil {
return fmt.Errorf("failed to encode small chunk data: %v", err)
}
remainingSize -= smallBlockSize * DataShardsCount
processedSize += smallBlockSize * DataShardsCount
}
return nil
}
func writeSortedEcxFiles(baseFileName string) (e error) {
var indexFile *os.File
if indexFile, e = os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644); e != nil {
return fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, e)
}
cm := needle_map.NewCompactMap()
storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
if !offset.IsZero() && size != types.TombstoneFileSize {
cm.Set(key, offset, size)
} else {
cm.Delete(key)
}
return nil
})
ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
return fmt.Errorf("failed to open dat file: %v", err)
}
defer ecxFile.Close()
err = cm.AscendingVisit(func(value needle_map.NeedleValue) error {
bytes := value.ToBytes()
_, writeErr := ecxFile.Write(bytes)
return writeErr
})
if err != nil {
return fmt.Errorf("failed to open dat file: %v", err)
}
return nil
}
func validateFiles(baseFileName string) error {
return nil
}