add locating data inside the ec files
This commit is contained in:
@@ -34,10 +34,14 @@ func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize i
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func openEcFiles(baseFileName string) (files []*os.File, err error){
|
func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error){
|
||||||
for i := 0; i< DataShardsCount+ParityShardsCount; i++{
|
for i := 0; i< DataShardsCount+ParityShardsCount; i++{
|
||||||
fname := fmt.Sprintf("%s.ec%02d", baseFileName, i+1)
|
fname := fmt.Sprintf("%s.ec%02d", baseFileName, i+1)
|
||||||
f, err := os.OpenFile(fname, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
|
openOption := os.O_TRUNC|os.O_CREATE|os.O_WRONLY
|
||||||
|
if forRead {
|
||||||
|
openOption = os.O_RDONLY
|
||||||
|
}
|
||||||
|
f, err := os.OpenFile(fname, openOption, 0644)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return files, fmt.Errorf("failed to open file %s: %v", fname, err)
|
return files, fmt.Errorf("failed to open file %s: %v", fname, err)
|
||||||
}
|
}
|
||||||
|
|||||||
68
weed/storage/erasure_coding/ec_locate.go
Normal file
68
weed/storage/erasure_coding/ec_locate.go
Normal file
@@ -0,0 +1,68 @@
|
|||||||
|
package erasure_coding
|
||||||
|
|
||||||
|
type Interval struct {
|
||||||
|
blockIndex int
|
||||||
|
innerBlockOffset int64
|
||||||
|
size uint32
|
||||||
|
isLargeBlock bool
|
||||||
|
}
|
||||||
|
|
||||||
|
func locateData(largeBlockLength, smallBlockLength int64, datSize int64, offset int64, size uint32) (intervals []Interval) {
|
||||||
|
blockIndex, isLargeBlock, innerBlockOffset := locateOffset(largeBlockLength, smallBlockLength, datSize, offset)
|
||||||
|
|
||||||
|
nLargeBlockRows := int(datSize / (largeBlockLength * DataShardsCount))
|
||||||
|
|
||||||
|
for size > 0 {
|
||||||
|
interval := Interval{
|
||||||
|
blockIndex: blockIndex,
|
||||||
|
innerBlockOffset: innerBlockOffset,
|
||||||
|
isLargeBlock: isLargeBlock,
|
||||||
|
}
|
||||||
|
|
||||||
|
blockRemaining := largeBlockLength - innerBlockOffset
|
||||||
|
if !isLargeBlock {
|
||||||
|
blockRemaining = smallBlockLength - innerBlockOffset
|
||||||
|
}
|
||||||
|
|
||||||
|
if int64(size) <= blockRemaining {
|
||||||
|
interval.size = size
|
||||||
|
intervals = append(intervals, interval)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
interval.size = uint32(blockRemaining)
|
||||||
|
intervals = append(intervals, interval)
|
||||||
|
|
||||||
|
size -= interval.size
|
||||||
|
blockIndex += 1
|
||||||
|
if isLargeBlock && blockIndex == nLargeBlockRows*DataShardsCount {
|
||||||
|
isLargeBlock = false
|
||||||
|
blockIndex = 0
|
||||||
|
}
|
||||||
|
innerBlockOffset = 0
|
||||||
|
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func locateOffset(largeBlockLength, smallBlockLength int64, datSize int64, offset int64) (blockIndex int, isLargeBlock bool, innerBlockOffset int64) {
|
||||||
|
largeRowSize := largeBlockLength * DataShardsCount
|
||||||
|
nLargeBlockRows := datSize / (largeBlockLength * DataShardsCount)
|
||||||
|
|
||||||
|
// if offset is within the large block area
|
||||||
|
if offset < nLargeBlockRows*largeRowSize {
|
||||||
|
isLargeBlock = true
|
||||||
|
blockIndex, innerBlockOffset = locateOffsetWithinBlocks(largeBlockLength, offset)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
isLargeBlock = false
|
||||||
|
offset -= nLargeBlockRows * largeRowSize
|
||||||
|
blockIndex, innerBlockOffset = locateOffsetWithinBlocks(smallBlockLength, offset)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
func locateOffsetWithinBlocks(blockLength int64, offset int64) (blockIndex int, innerBlockOffset int64) {
|
||||||
|
blockIndex = int(offset / blockLength)
|
||||||
|
innerBlockOffset = offset % blockLength
|
||||||
|
return
|
||||||
|
}
|
||||||
@@ -1,6 +1,7 @@
|
|||||||
package erasure_coding
|
package erasure_coding
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
"testing"
|
"testing"
|
||||||
@@ -11,28 +12,20 @@ import (
|
|||||||
"github.com/klauspost/reedsolomon"
|
"github.com/klauspost/reedsolomon"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
largeBlockSize = 10000
|
||||||
|
smallBlockSize = 100
|
||||||
|
)
|
||||||
|
|
||||||
func TestEncodingDecoding(t *testing.T) {
|
func TestEncodingDecoding(t *testing.T) {
|
||||||
largeBlockSize := int64(10000)
|
|
||||||
smallBlockSize := int64(100)
|
|
||||||
bufferSize := 50
|
bufferSize := 50
|
||||||
baseFileName := "1"
|
baseFileName := "1"
|
||||||
|
|
||||||
file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
|
err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Logf("failed to open dat file: %v", err)
|
t.Logf("generateEcFiles: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fi, err := file.Stat()
|
|
||||||
if err != nil {
|
|
||||||
t.Logf("failed to stat dat file: %v", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
|
|
||||||
if err != nil {
|
|
||||||
t.Logf("failed to stat dat file: %v", err)
|
|
||||||
}
|
|
||||||
file.Close()
|
|
||||||
|
|
||||||
err = writeSortedEcxFiles(baseFileName)
|
err = writeSortedEcxFiles(baseFileName)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Logf("writeSortedEcxFiles: %v", err)
|
t.Logf("writeSortedEcxFiles: %v", err)
|
||||||
@@ -45,6 +38,24 @@ func TestEncodingDecoding(t *testing.T) {
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error {
|
||||||
|
file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open dat file: %v", err)
|
||||||
|
}
|
||||||
|
defer file.Close()
|
||||||
|
|
||||||
|
fi, err := file.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to stat dat file: %v", err)
|
||||||
|
}
|
||||||
|
err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("encodeDatFile: %v", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
|
func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error {
|
||||||
var processedSize int64
|
var processedSize int64
|
||||||
enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
|
enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount)
|
||||||
@@ -52,7 +63,7 @@ func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSi
|
|||||||
return fmt.Errorf("failed to create encoder: %v", err)
|
return fmt.Errorf("failed to create encoder: %v", err)
|
||||||
}
|
}
|
||||||
buffers := make([][]byte, DataShardsCount+ParityShardsCount)
|
buffers := make([][]byte, DataShardsCount+ParityShardsCount)
|
||||||
outputs, err := openEcFiles(baseFileName)
|
outputs, err := openEcFiles(baseFileName, false)
|
||||||
defer closeEcFiles(outputs)
|
defer closeEcFiles(outputs)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to open dat file: %v", err)
|
return fmt.Errorf("failed to open dat file: %v", err)
|
||||||
@@ -81,21 +92,11 @@ func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSi
|
|||||||
|
|
||||||
func writeSortedEcxFiles(baseFileName string) (e error) {
|
func writeSortedEcxFiles(baseFileName string) (e error) {
|
||||||
|
|
||||||
var indexFile *os.File
|
cm, err := readCompactMap(baseFileName)
|
||||||
if indexFile, e = os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644); e != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, e)
|
return fmt.Errorf("readCompactMap: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
cm := needle_map.NewCompactMap()
|
|
||||||
storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
|
|
||||||
if !offset.IsZero() && size != types.TombstoneFileSize {
|
|
||||||
cm.Set(key, offset, size)
|
|
||||||
} else {
|
|
||||||
cm.Delete(key)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
|
|
||||||
ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
|
ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to open dat file: %v", err)
|
return fmt.Errorf("failed to open dat file: %v", err)
|
||||||
@@ -116,6 +117,106 @@ func writeSortedEcxFiles(baseFileName string) (e error) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func validateFiles(baseFileName string) error {
|
func validateFiles(baseFileName string) error {
|
||||||
return nil
|
cm, err := readCompactMap(baseFileName)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("readCompactMap: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
datFile, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to open dat file: %v", err)
|
||||||
|
}
|
||||||
|
defer datFile.Close()
|
||||||
|
|
||||||
|
fi, err := datFile.Stat()
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to stat dat file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ecFiles, err := openEcFiles(baseFileName, true)
|
||||||
|
defer closeEcFiles(ecFiles)
|
||||||
|
|
||||||
|
err = cm.AscendingVisit(func(value needle_map.NeedleValue) error {
|
||||||
|
return assertSame(datFile, fi.Size(), ecFiles, value.Offset, value.Size)
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to check ec files: %v", err)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readCompactMap(baseFileName string) (*needle_map.CompactMap, error) {
|
||||||
|
indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err)
|
||||||
|
}
|
||||||
|
defer indexFile.Close()
|
||||||
|
|
||||||
|
cm := needle_map.NewCompactMap()
|
||||||
|
err = storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error {
|
||||||
|
if !offset.IsZero() && size != types.TombstoneFileSize {
|
||||||
|
cm.Set(key, offset, size)
|
||||||
|
} else {
|
||||||
|
cm.Delete(key)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
return cm, err
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertSame(datFile *os.File, datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) error {
|
||||||
|
|
||||||
|
data, err := readDatFile(datFile, offset, size)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read dat file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ecData, err := readEcFile(datSize, ecFiles, offset, size)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("failed to read ec file: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
if bytes.Compare(data, ecData) != 0 {
|
||||||
|
return fmt.Errorf("unexpected data read")
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readDatFile(datFile *os.File, offset types.Offset, size uint32) ([]byte, error) {
|
||||||
|
|
||||||
|
data := make([]byte, size)
|
||||||
|
n, err := datFile.ReadAt(data, offset.ToAcutalOffset())
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("failed to ReadAt dat file: %v", err)
|
||||||
|
}
|
||||||
|
if n != int(size) {
|
||||||
|
return nil, fmt.Errorf("unexpected read size %d, expected %d", n, size)
|
||||||
|
}
|
||||||
|
return data, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func readEcFile(datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) ([]byte, error) {
|
||||||
|
|
||||||
|
return nil, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestLocateData(t *testing.T) {
|
||||||
|
intervals := locateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize, 1)
|
||||||
|
if len(intervals) != 1 {
|
||||||
|
t.Errorf("unexpected interval size %d", len(intervals))
|
||||||
|
}
|
||||||
|
if !intervals[0].sameAs(Interval{0, 0, 1, false}) {
|
||||||
|
t.Errorf("unexpected interval %+v", intervals[0])
|
||||||
|
}
|
||||||
|
|
||||||
|
intervals = locateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize/2+100, DataShardsCount*largeBlockSize+1 - DataShardsCount*largeBlockSize/2-100)
|
||||||
|
fmt.Printf("%+v\n", intervals)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (this Interval) sameAs(that Interval) bool {
|
||||||
|
return this.isLargeBlock == that.isLargeBlock &&
|
||||||
|
this.innerBlockOffset == that.innerBlockOffset &&
|
||||||
|
this.blockIndex == that.blockIndex &&
|
||||||
|
this.size == that.size
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user