From 7c2c60c376c9d309de654372e254d80ec660a7e8 Mon Sep 17 00:00:00 2001 From: Chris Lu Date: Sun, 19 May 2019 03:01:58 -0700 Subject: [PATCH] add locating data inside the ec files --- weed/storage/erasure_coding/ec_encoder.go | 8 +- weed/storage/erasure_coding/ec_locate.go | 68 +++++++++ weed/storage/erasure_coding/ec_test.go | 161 ++++++++++++++++++---- 3 files changed, 205 insertions(+), 32 deletions(-) create mode 100644 weed/storage/erasure_coding/ec_locate.go diff --git a/weed/storage/erasure_coding/ec_encoder.go b/weed/storage/erasure_coding/ec_encoder.go index f200297cf..4b5205dee 100644 --- a/weed/storage/erasure_coding/ec_encoder.go +++ b/weed/storage/erasure_coding/ec_encoder.go @@ -34,10 +34,14 @@ func encodeData(file *os.File, enc reedsolomon.Encoder, startOffset, blockSize i return nil } -func openEcFiles(baseFileName string) (files []*os.File, err error){ +func openEcFiles(baseFileName string, forRead bool) (files []*os.File, err error){ for i := 0; i< DataShardsCount+ParityShardsCount; i++{ fname := fmt.Sprintf("%s.ec%02d", baseFileName, i+1) - f, err := os.OpenFile(fname, os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644) + openOption := os.O_TRUNC|os.O_CREATE|os.O_WRONLY + if forRead { + openOption = os.O_RDONLY + } + f, err := os.OpenFile(fname, openOption, 0644) if err != nil { return files, fmt.Errorf("failed to open file %s: %v", fname, err) } diff --git a/weed/storage/erasure_coding/ec_locate.go b/weed/storage/erasure_coding/ec_locate.go new file mode 100644 index 000000000..b570f750c --- /dev/null +++ b/weed/storage/erasure_coding/ec_locate.go @@ -0,0 +1,68 @@ +package erasure_coding + +type Interval struct { + blockIndex int + innerBlockOffset int64 + size uint32 + isLargeBlock bool +} + +func locateData(largeBlockLength, smallBlockLength int64, datSize int64, offset int64, size uint32) (intervals []Interval) { + blockIndex, isLargeBlock, innerBlockOffset := locateOffset(largeBlockLength, smallBlockLength, datSize, offset) + + nLargeBlockRows := int(datSize / (largeBlockLength * DataShardsCount)) + + for size > 0 { + interval := Interval{ + blockIndex: blockIndex, + innerBlockOffset: innerBlockOffset, + isLargeBlock: isLargeBlock, + } + + blockRemaining := largeBlockLength - innerBlockOffset + if !isLargeBlock { + blockRemaining = smallBlockLength - innerBlockOffset + } + + if int64(size) <= blockRemaining { + interval.size = size + intervals = append(intervals, interval) + return + } + interval.size = uint32(blockRemaining) + intervals = append(intervals, interval) + + size -= interval.size + blockIndex += 1 + if isLargeBlock && blockIndex == nLargeBlockRows*DataShardsCount { + isLargeBlock = false + blockIndex = 0 + } + innerBlockOffset = 0 + + } + return +} + +func locateOffset(largeBlockLength, smallBlockLength int64, datSize int64, offset int64) (blockIndex int, isLargeBlock bool, innerBlockOffset int64) { + largeRowSize := largeBlockLength * DataShardsCount + nLargeBlockRows := datSize / (largeBlockLength * DataShardsCount) + + // if offset is within the large block area + if offset < nLargeBlockRows*largeRowSize { + isLargeBlock = true + blockIndex, innerBlockOffset = locateOffsetWithinBlocks(largeBlockLength, offset) + return + } + + isLargeBlock = false + offset -= nLargeBlockRows * largeRowSize + blockIndex, innerBlockOffset = locateOffsetWithinBlocks(smallBlockLength, offset) + return +} + +func locateOffsetWithinBlocks(blockLength int64, offset int64) (blockIndex int, innerBlockOffset int64) { + blockIndex = int(offset / blockLength) + innerBlockOffset = offset % blockLength + return +} diff --git a/weed/storage/erasure_coding/ec_test.go b/weed/storage/erasure_coding/ec_test.go index 8942bfbe5..06529225b 100644 --- a/weed/storage/erasure_coding/ec_test.go +++ b/weed/storage/erasure_coding/ec_test.go @@ -1,6 +1,7 @@ package erasure_coding import ( + "bytes" "fmt" "os" "testing" @@ -11,28 +12,20 @@ import ( "github.com/klauspost/reedsolomon" ) +const ( + largeBlockSize = 10000 + smallBlockSize = 100 +) + func TestEncodingDecoding(t *testing.T) { - largeBlockSize := int64(10000) - smallBlockSize := int64(100) bufferSize := 50 baseFileName := "1" - file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0) + err := generateEcFiles(baseFileName, bufferSize, largeBlockSize, smallBlockSize) if err != nil { - t.Logf("failed to open dat file: %v", err) + t.Logf("generateEcFiles: %v", err) } - fi, err := file.Stat() - if err != nil { - t.Logf("failed to stat dat file: %v", err) - } - - err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize) - if err != nil { - t.Logf("failed to stat dat file: %v", err) - } - file.Close() - err = writeSortedEcxFiles(baseFileName) if err != nil { t.Logf("writeSortedEcxFiles: %v", err) @@ -45,6 +38,24 @@ func TestEncodingDecoding(t *testing.T) { } +func generateEcFiles(baseFileName string, bufferSize int, largeBlockSize int64, smallBlockSize int64) error { + file, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("failed to open dat file: %v", err) + } + defer file.Close() + + fi, err := file.Stat() + if err != nil { + return fmt.Errorf("failed to stat dat file: %v", err) + } + err = encodeDatFile(fi.Size(), err, baseFileName, bufferSize, largeBlockSize, file, smallBlockSize) + if err != nil { + return fmt.Errorf("encodeDatFile: %v", err) + } + return nil +} + func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSize int, largeBlockSize int64, file *os.File, smallBlockSize int64) error { var processedSize int64 enc, err := reedsolomon.New(DataShardsCount, ParityShardsCount) @@ -52,7 +63,7 @@ func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSi return fmt.Errorf("failed to create encoder: %v", err) } buffers := make([][]byte, DataShardsCount+ParityShardsCount) - outputs, err := openEcFiles(baseFileName) + outputs, err := openEcFiles(baseFileName, false) defer closeEcFiles(outputs) if err != nil { return fmt.Errorf("failed to open dat file: %v", err) @@ -81,21 +92,11 @@ func encodeDatFile(remainingSize int64, err error, baseFileName string, bufferSi func writeSortedEcxFiles(baseFileName string) (e error) { - var indexFile *os.File - if indexFile, e = os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644); e != nil { - return fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, e) + cm, err := readCompactMap(baseFileName) + if err != nil { + return fmt.Errorf("readCompactMap: %v", err) } - cm := needle_map.NewCompactMap() - storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error { - if !offset.IsZero() && size != types.TombstoneFileSize { - cm.Set(key, offset, size) - } else { - cm.Delete(key) - } - return nil - }) - ecxFile, err := os.OpenFile(baseFileName+".ecx", os.O_TRUNC|os.O_CREATE|os.O_WRONLY, 0644) if err != nil { return fmt.Errorf("failed to open dat file: %v", err) @@ -116,6 +117,106 @@ func writeSortedEcxFiles(baseFileName string) (e error) { } func validateFiles(baseFileName string) error { - return nil + cm, err := readCompactMap(baseFileName) + if err != nil { + return fmt.Errorf("readCompactMap: %v", err) + } + datFile, err := os.OpenFile(baseFileName+".dat", os.O_RDONLY, 0) + if err != nil { + return fmt.Errorf("failed to open dat file: %v", err) + } + defer datFile.Close() + + fi, err := datFile.Stat() + if err != nil { + return fmt.Errorf("failed to stat dat file: %v", err) + } + + ecFiles, err := openEcFiles(baseFileName, true) + defer closeEcFiles(ecFiles) + + err = cm.AscendingVisit(func(value needle_map.NeedleValue) error { + return assertSame(datFile, fi.Size(), ecFiles, value.Offset, value.Size) + }) + if err != nil { + return fmt.Errorf("failed to check ec files: %v", err) + } + return nil +} + +func readCompactMap(baseFileName string) (*needle_map.CompactMap, error) { + indexFile, err := os.OpenFile(baseFileName+".idx", os.O_RDONLY, 0644) + if err != nil { + return nil, fmt.Errorf("cannot read Volume Index %s.idx: %v", baseFileName, err) + } + defer indexFile.Close() + + cm := needle_map.NewCompactMap() + err = storage.WalkIndexFile(indexFile, func(key types.NeedleId, offset types.Offset, size uint32) error { + if !offset.IsZero() && size != types.TombstoneFileSize { + cm.Set(key, offset, size) + } else { + cm.Delete(key) + } + return nil + }) + return cm, err +} + +func assertSame(datFile *os.File, datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) error { + + data, err := readDatFile(datFile, offset, size) + if err != nil { + return fmt.Errorf("failed to read dat file: %v", err) + } + + ecData, err := readEcFile(datSize, ecFiles, offset, size) + if err != nil { + return fmt.Errorf("failed to read ec file: %v", err) + } + + if bytes.Compare(data, ecData) != 0 { + return fmt.Errorf("unexpected data read") + } + + return nil +} + +func readDatFile(datFile *os.File, offset types.Offset, size uint32) ([]byte, error) { + + data := make([]byte, size) + n, err := datFile.ReadAt(data, offset.ToAcutalOffset()) + if err != nil { + return nil, fmt.Errorf("failed to ReadAt dat file: %v", err) + } + if n != int(size) { + return nil, fmt.Errorf("unexpected read size %d, expected %d", n, size) + } + return data, nil +} + +func readEcFile(datSize int64, ecFiles []*os.File, offset types.Offset, size uint32) ([]byte, error) { + + return nil, nil +} + +func TestLocateData(t *testing.T) { + intervals := locateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize, 1) + if len(intervals) != 1 { + t.Errorf("unexpected interval size %d", len(intervals)) + } + if !intervals[0].sameAs(Interval{0, 0, 1, false}) { + t.Errorf("unexpected interval %+v", intervals[0]) + } + + intervals = locateData(largeBlockSize, smallBlockSize, DataShardsCount*largeBlockSize+1, DataShardsCount*largeBlockSize/2+100, DataShardsCount*largeBlockSize+1 - DataShardsCount*largeBlockSize/2-100) + fmt.Printf("%+v\n", intervals) +} + +func (this Interval) sameAs(that Interval) bool { + return this.isLargeBlock == that.isLargeBlock && + this.innerBlockOffset == that.innerBlockOffset && + this.blockIndex == that.blockIndex && + this.size == that.size }