mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-05-21 09:11:29 +00:00
weed fix -ecx reconstructs the .dat from the local data shards, scans the needles, and writes a fresh ascending-sorted .ecx containing only live entries — the same on-disk index WriteSortedFileFromIdx emits at encode time. When the .vif is also missing it is regenerated from the inferred EC ratio (flags > .vif > shard-count inference / 10+4) and the .dat size recovered from the scan. When some data shards are missing but at least dataShards shards survive, the missing shards are first reconstructed from the survivors via Reed-Solomon, so a partial shard set is repaired too. Also makes erasure_coding.WriteDatFile de-stripe using len(shardFileNames) instead of the DataShardsCount constant, so the caller's actual data-shard count is honored (behavior-preserving for the default 10, and fixing the existing caller that already passes ECContext.DataShards). This recovers an EC volume whose sealed index was lost from every node while enough shards survive, a state neither ec.rebuild nor ec.decode can repair because both require an existing .ecx. Flags: -ecx, -ecDataShards, -ecParityShards. Run with the volume server stopped.
234 lines
6.4 KiB
Go
234 lines
6.4 KiB
Go
package erasure_coding
|
|
|
|
import (
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/backend"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/idx"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/needle_map"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/super_block"
|
|
"github.com/seaweedfs/seaweedfs/weed/storage/types"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
// EcNoLiveEntriesSubstring is used for server/client coordination when ec.decode determines that
|
|
// decoding should be a no-op (all entries are deleted).
|
|
const EcNoLiveEntriesSubstring = "has no live entries"
|
|
|
|
// HasLiveNeedles returns whether the EC index (.ecx) contains at least one live (non-deleted) entry.
|
|
// This is used by ec.decode to avoid generating an empty normal volume when all entries were deleted.
|
|
func HasLiveNeedles(indexBaseFileName string) (hasLive bool, err error) {
|
|
err = iterateEcxFile(indexBaseFileName, func(_ types.NeedleId, _ types.Offset, size types.Size) error {
|
|
if !size.IsDeleted() {
|
|
hasLive = true
|
|
return io.EOF // stop early
|
|
}
|
|
return nil
|
|
})
|
|
return
|
|
}
|
|
|
|
// write .idx file from .ecx and .ecj files
|
|
func WriteIdxFileFromEcIndex(baseFileName string) (err error) {
|
|
|
|
ecxFile, openErr := os.OpenFile(baseFileName+".ecx", os.O_RDONLY, 0644)
|
|
if openErr != nil {
|
|
return fmt.Errorf("cannot open ec index %s.ecx: %v", baseFileName, openErr)
|
|
}
|
|
defer ecxFile.Close()
|
|
|
|
idxFile, openErr := os.OpenFile(baseFileName+".idx", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
|
|
if openErr != nil {
|
|
return fmt.Errorf("cannot open %s.idx: %v", baseFileName, openErr)
|
|
}
|
|
defer idxFile.Close()
|
|
|
|
io.Copy(idxFile, ecxFile)
|
|
|
|
err = iterateEcjFile(baseFileName, func(key types.NeedleId) error {
|
|
|
|
bytes := needle_map.ToBytes(key, types.Offset{}, types.TombstoneFileSize)
|
|
idxFile.Write(bytes)
|
|
|
|
return nil
|
|
})
|
|
|
|
return err
|
|
}
|
|
|
|
// FindDatFileSize calculate .dat file size from max offset entry
|
|
// there may be extra deletions after that entry
|
|
// but they are deletions anyway
|
|
func FindDatFileSize(dataBaseFileName, indexBaseFileName string) (datSize int64, err error) {
|
|
|
|
version, err := readEcVolumeVersion(dataBaseFileName)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("read ec volume %s version: %v", dataBaseFileName, err)
|
|
}
|
|
|
|
// Safety: ensure datSize is at least SuperBlockSize. While the caller typically
|
|
// checks HasLiveNeedles first, this protects against direct calls to FindDatFileSize
|
|
// when all needles are deleted (see issue #7748).
|
|
datSize = int64(super_block.SuperBlockSize)
|
|
|
|
err = iterateEcxFile(indexBaseFileName, func(key types.NeedleId, offset types.Offset, size types.Size) error {
|
|
|
|
if size.IsDeleted() {
|
|
return nil
|
|
}
|
|
|
|
entryStopOffset := offset.ToActualOffset() + needle.GetActualSize(size, version)
|
|
if datSize < entryStopOffset {
|
|
datSize = entryStopOffset
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
return
|
|
}
|
|
|
|
func readEcVolumeVersion(baseFileName string) (version needle.Version, err error) {
|
|
|
|
// find volume version
|
|
datFile, err := os.OpenFile(baseFileName+".ec00", os.O_RDONLY, 0644)
|
|
if err != nil {
|
|
return 0, fmt.Errorf("open ec volume %s superblock: %v", baseFileName, err)
|
|
}
|
|
datBackend := backend.NewDiskFile(datFile)
|
|
|
|
superBlock, err := super_block.ReadSuperBlock(datBackend)
|
|
datBackend.Close()
|
|
if err != nil {
|
|
return 0, fmt.Errorf("read ec volume %s superblock: %v", baseFileName, err)
|
|
}
|
|
|
|
return superBlock.Version, nil
|
|
|
|
}
|
|
|
|
func iterateEcxFile(baseFileName string, processNeedleFn func(key types.NeedleId, offset types.Offset, size types.Size) error) error {
|
|
ecxFile, openErr := os.OpenFile(baseFileName+".ecx", os.O_RDONLY, 0644)
|
|
if openErr != nil {
|
|
return fmt.Errorf("cannot open ec index %s.ecx: %v", baseFileName, openErr)
|
|
}
|
|
defer ecxFile.Close()
|
|
|
|
buf := make([]byte, types.NeedleMapEntrySize)
|
|
for {
|
|
n, err := ecxFile.Read(buf)
|
|
if n != types.NeedleMapEntrySize {
|
|
if err == io.EOF {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
key, offset, size := idx.IdxFileEntry(buf)
|
|
if processNeedleFn != nil {
|
|
err = processNeedleFn(key, offset, size)
|
|
}
|
|
if err != nil {
|
|
if err != io.EOF {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
func iterateEcjFile(baseFileName string, processNeedleFn func(key types.NeedleId) error) error {
|
|
if !util.FileExists(baseFileName + ".ecj") {
|
|
return nil
|
|
}
|
|
ecjFile, openErr := os.OpenFile(baseFileName+".ecj", os.O_RDONLY, 0644)
|
|
if openErr != nil {
|
|
return fmt.Errorf("cannot open ec index %s.ecj: %v", baseFileName, openErr)
|
|
}
|
|
defer ecjFile.Close()
|
|
|
|
buf := make([]byte, types.NeedleIdSize)
|
|
for {
|
|
n, err := ecjFile.Read(buf)
|
|
if n != types.NeedleIdSize {
|
|
if err == io.EOF {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
if processNeedleFn != nil {
|
|
err = processNeedleFn(types.BytesToNeedleId(buf))
|
|
}
|
|
if err != nil {
|
|
if err == io.EOF {
|
|
return nil
|
|
}
|
|
return err
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
// WriteDatFile generates .dat from EC shard files (e.g., .ec00 ~ .ec09 for 10+4)
|
|
func WriteDatFile(baseFileName string, datFileSize int64, shardFileNames []string) error {
|
|
|
|
datFile, openErr := os.OpenFile(baseFileName+".dat", os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0644)
|
|
if openErr != nil {
|
|
return fmt.Errorf("cannot write volume %s.dat: %v", baseFileName, openErr)
|
|
}
|
|
defer datFile.Close()
|
|
|
|
// Use the actual number of data shards passed in rather than the global
|
|
// constant, so the de-striping matches the caller's shard set.
|
|
dataShards := len(shardFileNames)
|
|
inputFiles := make([]*os.File, dataShards)
|
|
|
|
defer func() {
|
|
for shardId := 0; shardId < dataShards; shardId++ {
|
|
if inputFiles[shardId] != nil {
|
|
inputFiles[shardId].Close()
|
|
}
|
|
}
|
|
}()
|
|
|
|
for shardId := 0; shardId < dataShards; shardId++ {
|
|
inputFiles[shardId], openErr = os.OpenFile(shardFileNames[shardId], os.O_RDONLY, 0)
|
|
if openErr != nil {
|
|
return openErr
|
|
}
|
|
}
|
|
|
|
for datFileSize >= int64(dataShards)*ErasureCodingLargeBlockSize {
|
|
for shardId := 0; shardId < dataShards; shardId++ {
|
|
w, err := io.CopyN(datFile, inputFiles[shardId], ErasureCodingLargeBlockSize)
|
|
if w != ErasureCodingLargeBlockSize {
|
|
return fmt.Errorf("copy %s large block on shardId %d: %v", baseFileName, shardId, err)
|
|
}
|
|
datFileSize -= ErasureCodingLargeBlockSize
|
|
}
|
|
}
|
|
|
|
for datFileSize > 0 {
|
|
for shardId := 0; shardId < dataShards; shardId++ {
|
|
toRead := min(datFileSize, ErasureCodingSmallBlockSize)
|
|
w, err := io.CopyN(datFile, inputFiles[shardId], toRead)
|
|
if w != toRead {
|
|
return fmt.Errorf("copy %s small block %d: %v", baseFileName, shardId, err)
|
|
}
|
|
datFileSize -= toRead
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func min(x, y int64) int64 {
|
|
if x > y {
|
|
return y
|
|
}
|
|
return x
|
|
}
|