Files
seaweedfs/test/plugin_workers/fake_volume_server.go
Chris Lu 3a8389cd68 fix(ec): verify full shard set before deleting source volume (#9490) (#9493)
* fix(ec): verify full shard set before deleting source volume (#9490)

Before this change, both the worker EC task and the shell ec.encode
command would delete the source .dat as soon as MountEcShards returned —
even if distribute/mount failed partway, leaving fewer than 14 shards
in the cluster. The deletion was logged at V(2), so by the time someone
noticed missing data the only trace was a 0-byte .dat synthesized by
disk_location at next restart.

- Worker path adds Step 6: poll VolumeEcShardsInfo on every destination,
  union the bitmaps, and refuse to call deleteOriginalVolume unless all
  TotalShardsCount distinct shard ids are observed. A failed gate leaves
  the source readonly so the next detection scan can retry.
- Shell ec.encode adds the same gate after EcBalance, walking the master
  topology with collectEcNodeShardsInfo.
- VolumeDelete RPC success and .dat/.idx unlinks now log at V(0) so any
  source destruction is traceable in default-verbosity production logs.

The EC-balance-vs-in-flight-encode race is intentionally left for a
follow-up; balance should refuse to move shards for a volume whose
encode job is not in Completed state.

* fix(ec): trim doc comments on the new shard-verification path

Drop WHAT-describing godoc on freshly added helpers; keep only the WHY
notes (query-error policy in VerifyShardsAcrossServers, the #9490
reference at the call sites).

* fix(ec): drop issue-number anchors from new comments

Issue references age poorly — the why behind each comment already
stands on its own.

* fix(ec): parametrize RequireFullShardSet on totalShards

Take totalShards as an argument instead of reading the package-level
TotalShardsCount constant. The OSS callers continue to pass 14, but the
helper is now usable with any DataShards+ParityShards ratio.

* test(plugin_workers): make fake volume server respond to VolumeEcShardsInfo

The new pre-delete verification gate calls VolumeEcShardsInfo on every
destination after mount, and the fake server's UnimplementedVolumeServer
returns Unimplemented — the verifier read that as zero shards on every
node and aborted source deletion. Build the response from recorded
mount requests so the integration test exercises the gate end-to-end.

* fix(rust/volume): log .dat/.idx unlink with size in remove_volume_files

Mirror the Go-side change in weed/storage/volume_write.go: stat each
file before removing and emit an info-level log for .dat/.idx so a
destructive call is always traceable. The OSS Rust crate previously
unlinked them silently.

* fix(ec/decode): verify regenerated .dat before deleting EC shards

After mountDecodedVolume succeeds, the previous code immediately
unmounts and deletes every EC shard. A silent failure in generate or
mount could leave the cluster with neither shards nor a valid normal
volume. Probe ReadVolumeFileStatus on the target and refuse to proceed
if dat or idx is 0 bytes.

Also make the fake volume server's VolumeEcShardsInfo reflect whichever
shard files exist on disk (seeded for tests as well as mounted via
RPC), so the new gate can be exercised end-to-end.

* fix(ec): address PR review nits in verification + fake server

- Drop unused ServerShardInventory.Sizes field.
- Skip shard ids >= MaxShardCount before bitmap Set so the ShardBits
  bound is explicit (Set already no-ops on overflow, this is for
  clarity).
- Nil-guard the fake server's VolumeEcShardsInfo so a malformed call
  doesn't panic the test process.
2026-05-13 19:29:24 -07:00

513 lines
14 KiB
Go

package pluginworkers
import (
"context"
"fmt"
"io"
"net"
"os"
"path/filepath"
"strings"
"sync"
"testing"
"time"
"github.com/seaweedfs/seaweedfs/weed/operation"
"github.com/seaweedfs/seaweedfs/weed/pb"
"github.com/seaweedfs/seaweedfs/weed/pb/volume_server_pb"
"google.golang.org/grpc"
"google.golang.org/grpc/credentials/insecure"
)
// VolumeServer provides a minimal volume server for erasure coding tests.
type VolumeServer struct {
volume_server_pb.UnimplementedVolumeServerServer
t *testing.T
server *grpc.Server
listener net.Listener
address string
baseDir string
mu sync.Mutex
receivedFiles map[string]uint64
mountRequests []*volume_server_pb.VolumeEcShardsMountRequest
deleteRequests []*volume_server_pb.VolumeDeleteRequest
markReadonlyCalls int
markWritableCalls int
readFileStatusCalls int
vacuumGarbageRatio float64
vacuumCheckCalls int
vacuumCompactCalls int
vacuumCommitCalls int
vacuumCleanupCalls int
volumeCopyCalls int
volumeMountCalls int
tailReceiverCalls int
}
// NewVolumeServer starts a test volume server using the provided base directory.
func NewVolumeServer(t *testing.T, baseDir string) *VolumeServer {
t.Helper()
if baseDir == "" {
baseDir = t.TempDir()
}
if err := os.MkdirAll(baseDir, 0755); err != nil {
t.Fatalf("create volume base dir: %v", err)
}
listener, err := net.Listen("tcp", "127.0.0.1:0")
if err != nil {
t.Fatalf("listen volume server: %v", err)
}
grpcPort := listener.Addr().(*net.TCPAddr).Port
server := pb.NewGrpcServer()
vs := &VolumeServer{
t: t,
server: server,
listener: listener,
address: fmt.Sprintf("127.0.0.1:0.%d", grpcPort),
baseDir: baseDir,
receivedFiles: make(map[string]uint64),
}
volume_server_pb.RegisterVolumeServerServer(server, vs)
go func() {
_ = server.Serve(listener)
}()
t.Cleanup(func() {
vs.Shutdown()
})
return vs
}
// Address returns the gRPC address of the volume server.
func (v *VolumeServer) Address() string {
return v.address
}
// BaseDir returns the base directory used by the server.
func (v *VolumeServer) BaseDir() string {
return v.baseDir
}
// ReceivedFiles returns a snapshot of received files and byte counts.
func (v *VolumeServer) ReceivedFiles() map[string]uint64 {
v.mu.Lock()
defer v.mu.Unlock()
out := make(map[string]uint64, len(v.receivedFiles))
for key, value := range v.receivedFiles {
out[key] = value
}
return out
}
// SetVacuumGarbageRatio sets the garbage ratio returned by VacuumVolumeCheck.
func (v *VolumeServer) SetVacuumGarbageRatio(ratio float64) {
v.mu.Lock()
defer v.mu.Unlock()
v.vacuumGarbageRatio = ratio
}
// VacuumStats returns the vacuum RPC call counts.
func (v *VolumeServer) VacuumStats() (check, compact, commit, cleanup int) {
v.mu.Lock()
defer v.mu.Unlock()
return v.vacuumCheckCalls, v.vacuumCompactCalls, v.vacuumCommitCalls, v.vacuumCleanupCalls
}
// BalanceStats returns the balance RPC call counts.
func (v *VolumeServer) BalanceStats() (copyCalls, mountCalls, tailCalls int) {
v.mu.Lock()
defer v.mu.Unlock()
return v.volumeCopyCalls, v.volumeMountCalls, v.tailReceiverCalls
}
// MountRequests returns recorded mount requests.
func (v *VolumeServer) MountRequests() []*volume_server_pb.VolumeEcShardsMountRequest {
v.mu.Lock()
defer v.mu.Unlock()
out := make([]*volume_server_pb.VolumeEcShardsMountRequest, len(v.mountRequests))
copy(out, v.mountRequests)
return out
}
// DeleteRequests returns recorded delete requests.
func (v *VolumeServer) DeleteRequests() []*volume_server_pb.VolumeDeleteRequest {
v.mu.Lock()
defer v.mu.Unlock()
out := make([]*volume_server_pb.VolumeDeleteRequest, len(v.deleteRequests))
copy(out, v.deleteRequests)
return out
}
// MarkReadonlyCount returns the number of readonly calls.
func (v *VolumeServer) MarkReadonlyCount() int {
v.mu.Lock()
defer v.mu.Unlock()
return v.markReadonlyCalls
}
// MarkWritableCount returns the number of writable calls.
func (v *VolumeServer) MarkWritableCount() int {
v.mu.Lock()
defer v.mu.Unlock()
return v.markWritableCalls
}
// ReadFileStatusCount returns the number of ReadVolumeFileStatus calls.
func (v *VolumeServer) ReadFileStatusCount() int {
v.mu.Lock()
defer v.mu.Unlock()
return v.readFileStatusCalls
}
// Shutdown stops the volume server.
func (v *VolumeServer) Shutdown() {
if v.server != nil {
v.server.GracefulStop()
}
if v.listener != nil {
_ = v.listener.Close()
}
}
func (v *VolumeServer) filePath(volumeID uint32, ext string) string {
return filepath.Join(v.baseDir, fmt.Sprintf("%d%s", volumeID, ext))
}
func (v *VolumeServer) CopyFile(req *volume_server_pb.CopyFileRequest, stream volume_server_pb.VolumeServer_CopyFileServer) error {
if req == nil {
return fmt.Errorf("copy file request is nil")
}
path := v.filePath(req.VolumeId, req.Ext)
file, err := os.Open(path)
if err != nil {
if req.IgnoreSourceFileNotFound {
return nil
}
return err
}
defer file.Close()
buf := make([]byte, 64*1024)
remaining := int64(req.GetStopOffset())
for {
if remaining == 0 {
break
}
readBuf := buf
if remaining > 0 && remaining < int64(len(buf)) {
readBuf = buf[:remaining]
}
n, readErr := file.Read(readBuf)
if n > 0 {
if err := stream.Send(&volume_server_pb.CopyFileResponse{FileContent: readBuf[:n]}); err != nil {
return err
}
if remaining > 0 {
remaining -= int64(n)
}
}
if readErr == io.EOF {
break
}
if readErr != nil {
return readErr
}
}
return nil
}
func (v *VolumeServer) ReceiveFile(stream volume_server_pb.VolumeServer_ReceiveFileServer) error {
var (
info *volume_server_pb.ReceiveFileInfo
file *os.File
bytesWritten uint64
filePath string
)
defer func() {
if file != nil {
_ = file.Close()
}
}()
for {
req, err := stream.Recv()
if err == io.EOF {
if info == nil {
return stream.SendAndClose(&volume_server_pb.ReceiveFileResponse{Error: "missing file info"})
}
v.mu.Lock()
v.receivedFiles[filePath] = bytesWritten
v.mu.Unlock()
return stream.SendAndClose(&volume_server_pb.ReceiveFileResponse{BytesWritten: bytesWritten})
}
if err != nil {
return err
}
if reqInfo := req.GetInfo(); reqInfo != nil {
info = reqInfo
filePath = v.filePath(info.VolumeId, info.Ext)
if err := os.MkdirAll(filepath.Dir(filePath), 0755); err != nil {
return err
}
file, err = os.Create(filePath)
if err != nil {
return err
}
continue
}
chunk := req.GetFileContent()
if len(chunk) == 0 {
continue
}
if file == nil {
return fmt.Errorf("file info not received")
}
n, writeErr := file.Write(chunk)
if writeErr != nil {
return writeErr
}
bytesWritten += uint64(n)
}
}
func (v *VolumeServer) VolumeEcShardsMount(ctx context.Context, req *volume_server_pb.VolumeEcShardsMountRequest) (*volume_server_pb.VolumeEcShardsMountResponse, error) {
v.mu.Lock()
v.mountRequests = append(v.mountRequests, req)
v.mu.Unlock()
return &volume_server_pb.VolumeEcShardsMountResponse{}, nil
}
func (v *VolumeServer) VolumeEcShardsInfo(ctx context.Context, req *volume_server_pb.VolumeEcShardsInfoRequest) (*volume_server_pb.VolumeEcShardsInfoResponse, error) {
if req == nil {
return nil, fmt.Errorf("VolumeEcShardsInfo request is nil")
}
v.mu.Lock()
defer v.mu.Unlock()
// Report whichever shards exist on disk: seeded or mounted. Collection
// comes from the matching mount request when one exists.
collectionByShard := make(map[uint32]string)
for _, mr := range v.mountRequests {
if mr == nil || mr.VolumeId != req.VolumeId {
continue
}
for _, shardId := range mr.ShardIds {
if _, ok := collectionByShard[shardId]; !ok {
collectionByShard[shardId] = mr.Collection
}
}
}
resp := &volume_server_pb.VolumeEcShardsInfoResponse{}
prefix := fmt.Sprintf("%d.ec", req.VolumeId)
entries, _ := os.ReadDir(v.baseDir)
for _, entry := range entries {
if entry.IsDir() {
continue
}
name := entry.Name()
if !strings.HasPrefix(name, prefix) {
continue
}
suffix := strings.TrimPrefix(name, prefix)
if len(suffix) < 2 {
continue
}
var shardId uint32
if _, err := fmt.Sscanf(suffix[:2], "%d", &shardId); err != nil {
continue
}
var size int64
if info, err := entry.Info(); err == nil {
size = info.Size()
}
resp.EcShardInfos = append(resp.EcShardInfos, &volume_server_pb.EcShardInfo{
ShardId: shardId,
Size: size,
Collection: collectionByShard[shardId],
VolumeId: req.VolumeId,
})
}
return resp, nil
}
func (v *VolumeServer) VolumeDelete(ctx context.Context, req *volume_server_pb.VolumeDeleteRequest) (*volume_server_pb.VolumeDeleteResponse, error) {
v.mu.Lock()
v.deleteRequests = append(v.deleteRequests, req)
v.mu.Unlock()
if req != nil {
_ = os.Remove(v.filePath(req.VolumeId, ".dat"))
_ = os.Remove(v.filePath(req.VolumeId, ".idx"))
}
return &volume_server_pb.VolumeDeleteResponse{}, nil
}
func (v *VolumeServer) VolumeMarkReadonly(ctx context.Context, req *volume_server_pb.VolumeMarkReadonlyRequest) (*volume_server_pb.VolumeMarkReadonlyResponse, error) {
v.mu.Lock()
v.markReadonlyCalls++
v.mu.Unlock()
return &volume_server_pb.VolumeMarkReadonlyResponse{}, nil
}
func (v *VolumeServer) VolumeMarkWritable(ctx context.Context, req *volume_server_pb.VolumeMarkWritableRequest) (*volume_server_pb.VolumeMarkWritableResponse, error) {
v.mu.Lock()
v.markWritableCalls++
v.mu.Unlock()
return &volume_server_pb.VolumeMarkWritableResponse{}, nil
}
func (v *VolumeServer) ReadVolumeFileStatus(ctx context.Context, req *volume_server_pb.ReadVolumeFileStatusRequest) (*volume_server_pb.ReadVolumeFileStatusResponse, error) {
v.mu.Lock()
v.readFileStatusCalls++
v.mu.Unlock()
datInfo, err := os.Stat(v.filePath(req.VolumeId, ".dat"))
if err != nil {
return nil, err
}
idxInfo, err := os.Stat(v.filePath(req.VolumeId, ".idx"))
if err != nil {
return nil, err
}
return &volume_server_pb.ReadVolumeFileStatusResponse{
VolumeId: req.VolumeId,
DatFileSize: uint64(datInfo.Size()),
IdxFileSize: uint64(idxInfo.Size()),
FileCount: 1,
}, nil
}
func (v *VolumeServer) VacuumVolumeCheck(ctx context.Context, req *volume_server_pb.VacuumVolumeCheckRequest) (*volume_server_pb.VacuumVolumeCheckResponse, error) {
v.mu.Lock()
v.vacuumCheckCalls++
ratio := v.vacuumGarbageRatio
v.mu.Unlock()
return &volume_server_pb.VacuumVolumeCheckResponse{GarbageRatio: ratio}, nil
}
func (v *VolumeServer) VacuumVolumeCompact(req *volume_server_pb.VacuumVolumeCompactRequest, stream volume_server_pb.VolumeServer_VacuumVolumeCompactServer) error {
v.mu.Lock()
v.vacuumCompactCalls++
v.mu.Unlock()
return stream.Send(&volume_server_pb.VacuumVolumeCompactResponse{ProcessedBytes: 1024})
}
func (v *VolumeServer) VacuumVolumeCommit(ctx context.Context, req *volume_server_pb.VacuumVolumeCommitRequest) (*volume_server_pb.VacuumVolumeCommitResponse, error) {
v.mu.Lock()
v.vacuumCommitCalls++
v.mu.Unlock()
return &volume_server_pb.VacuumVolumeCommitResponse{}, nil
}
func (v *VolumeServer) VacuumVolumeCleanup(ctx context.Context, req *volume_server_pb.VacuumVolumeCleanupRequest) (*volume_server_pb.VacuumVolumeCleanupResponse, error) {
v.mu.Lock()
v.vacuumCleanupCalls++
v.mu.Unlock()
return &volume_server_pb.VacuumVolumeCleanupResponse{}, nil
}
func (v *VolumeServer) VolumeCopy(req *volume_server_pb.VolumeCopyRequest, stream volume_server_pb.VolumeServer_VolumeCopyServer) error {
v.mu.Lock()
v.volumeCopyCalls++
v.mu.Unlock()
dialOption := grpc.WithTransportCredentials(insecure.NewCredentials())
var statusResp *volume_server_pb.ReadVolumeFileStatusResponse
if err := operation.WithVolumeServerClient(false, pb.ServerAddress(req.SourceDataNode), dialOption,
func(client volume_server_pb.VolumeServerClient) error {
var readErr error
statusResp, readErr = client.ReadVolumeFileStatus(stream.Context(), &volume_server_pb.ReadVolumeFileStatusRequest{
VolumeId: req.VolumeId,
})
return readErr
}); err != nil {
return err
}
if err := v.copyRemoteFile(stream.Context(), req.SourceDataNode, req.VolumeId, ".dat", statusResp.DatFileSize, dialOption); err != nil {
return err
}
if err := v.copyRemoteFile(stream.Context(), req.SourceDataNode, req.VolumeId, ".idx", statusResp.IdxFileSize, dialOption); err != nil {
return err
}
if err := stream.Send(&volume_server_pb.VolumeCopyResponse{ProcessedBytes: int64(statusResp.DatFileSize + statusResp.IdxFileSize)}); err != nil {
return err
}
return stream.Send(&volume_server_pb.VolumeCopyResponse{LastAppendAtNs: uint64(time.Now().UnixNano())})
}
func (v *VolumeServer) VolumeMount(ctx context.Context, req *volume_server_pb.VolumeMountRequest) (*volume_server_pb.VolumeMountResponse, error) {
v.mu.Lock()
v.volumeMountCalls++
v.mu.Unlock()
return &volume_server_pb.VolumeMountResponse{}, nil
}
func (v *VolumeServer) VolumeTailReceiver(ctx context.Context, req *volume_server_pb.VolumeTailReceiverRequest) (*volume_server_pb.VolumeTailReceiverResponse, error) {
v.mu.Lock()
v.tailReceiverCalls++
v.mu.Unlock()
return &volume_server_pb.VolumeTailReceiverResponse{}, nil
}
func (v *VolumeServer) copyRemoteFile(ctx context.Context, sourceDataNode string, volumeID uint32, ext string, fileSize uint64, dialOption grpc.DialOption) error {
path := v.filePath(volumeID, ext)
if err := os.MkdirAll(filepath.Dir(path), 0755); err != nil {
return err
}
file, err := os.Create(path)
if err != nil {
return err
}
defer file.Close()
return operation.WithVolumeServerClient(true, pb.ServerAddress(sourceDataNode), dialOption,
func(client volume_server_pb.VolumeServerClient) error {
stream, err := client.CopyFile(ctx, &volume_server_pb.CopyFileRequest{
VolumeId: volumeID,
Ext: ext,
StopOffset: fileSize,
})
if err != nil {
return err
}
for {
resp, recvErr := stream.Recv()
if recvErr == io.EOF {
return nil
}
if recvErr != nil {
return recvErr
}
if len(resp.FileContent) == 0 {
continue
}
if _, err := file.Write(resp.FileContent); err != nil {
return err
}
}
})
}