From 3557ae283fc64d368799f9f264ef24b3bb2e1ce6 Mon Sep 17 00:00:00 2001
From: Ping Qiu <pingqiu@gmail.com>
Date: Mon, 9 Mar 2026 17:44:01 -0700
Subject: [PATCH] feat: Phase 10 CP10-3 -- NVMe/TCP Tier 1 optimizations, WAL
 admission control, benchmark platform

CP10-3 Tier 1 optimizations (T1-T4):
- TCP_NODELAY + 256KB socket buffers on NVMe/TCP connections
- Response batching: all C2H data chunks + CapsuleResp in single flush
- Tiered buffer pool (4KB/64KB/256KB sync.Pool) for write payloads
- Configurable MaxH2CDataLength wiring through controller/IC/chunking

BUG-CP103-1: NVMe write retry with jittered backoff for transient WAL pressure
- writeWithRetry() with bounded backoff [50/200/800ms]
- throttleOnWALPressure() pre-write delay above 90% WAL usage
- WALPressureProvider interface + NVMeAdapter.WALPressure()

BUG-CP103-2: Volume-level WAL admission control
- WALAdmission with counting semaphore (max concurrent writers)
- Soft watermark (0.7): small delay to desynchronize herd
- Hard watermark (0.9): block until flusher drains
- Single-deadline budget shared across watermark wait + semaphore
- Close-aware during both watermark and semaphore waits
- Wired into BlockVol.WriteLBA() and Trim()

Benchmark platform enhancements:
- NVMe benchmark actions and scenarios (A/B, CW sweep, IOQ sweep)
- Database benchmark actions (SQLite, pgbench)
- K8s operator QA reconciler tests
- New testrunner scenarios for HA, fault injection, CSI lifecycle

Test counts: 213 NVMe + 625 engine + operator + testrunner tests, all passing.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 weed/storage/blockvol/blockvol.go             |   36 +
 weed/storage/blockvol/config.go               |   24 +
 weed/storage/blockvol/config_test.go          |    6 +
 .../blockvol/iscsi/cmd/iscsi-target/main.go   |   60 +-
 weed/storage/blockvol/nvme/adapter.go         |    6 +
 weed/storage/blockvol/nvme/bufpool.go         |   47 +
 weed/storage/blockvol/nvme/controller.go      |  183 +-
 weed/storage/blockvol/nvme/fabric.go          |   33 +-
 weed/storage/blockvol/nvme/identify.go        |   33 +-
 weed/storage/blockvol/nvme/io.go              |   48 +-
 weed/storage/blockvol/nvme/nvme_qa_test.go    | 2176 ++++++++++++++++-
 weed/storage/blockvol/nvme/nvme_test.go       | 1138 ++++++++-
 weed/storage/blockvol/nvme/protocol.go        |   59 +
 weed/storage/blockvol/nvme/server.go          |   20 +-
 weed/storage/blockvol/nvme/wire.go            |   78 +-
 weed/storage/blockvol/nvme/write_retry.go     |   80 +
 .../internal/controller/qa_reconciler_test.go |  541 ++++
 weed/storage/blockvol/qa_phase4a_cp3_test.go  |    4 +
 .../storage/blockvol/qa_wal_admission_test.go |  462 ++++
 .../blockvol/testrunner/actions/bench.go      |  448 ++++
 .../blockvol/testrunner/actions/bench_test.go |  365 +++
 .../blockvol/testrunner/actions/block.go      |   11 +-
 .../blockvol/testrunner/actions/database.go   |  196 +-
 .../testrunner/actions/devops_test.go         |   72 +-
 .../blockvol/testrunner/actions/k8s.go        |  540 ++++
 .../blockvol/testrunner/actions/nvme.go       |  218 ++
 .../testrunner/actions/nvme_bench_test.go     | 1013 ++++++++
 .../blockvol/testrunner/actions/register.go   |    3 +
 weed/storage/blockvol/testrunner/agent.go     |   22 +-
 .../testrunner/cmd/sw-test-runner/main.go     |   24 +-
 weed/storage/blockvol/testrunner/engine.go    |   98 +-
 .../blockvol/testrunner/engine_test.go        |  334 +++
 .../blockvol/testrunner/infra/fault.go        |    8 +-
 .../blockvol/testrunner/infra/ha_target.go    |  125 +-
 .../storage/blockvol/testrunner/infra/node.go |   29 +-
 .../blockvol/testrunner/infra/target.go       |   78 +-
 weed/storage/blockvol/testrunner/parser.go    |    6 +
 .../testrunner/scenarios/cp103-25g-ab.yaml    |  455 ++++
 .../scenarios/cp103-nvme-cw-sweep.yaml        |  435 ++++
 .../scenarios/cp103-nvme-ioq-sweep.yaml       |  236 ++
 .../scenarios/cp103-perf-baseline.yaml        |  431 ++++
 .../scenarios/cp83-snapshot-expand.yaml       |    4 +-
 .../scenarios/cp85-perf-baseline.yaml         |    7 +-
 .../testrunner/scenarios/ha-rf3-failover.yaml |  157 ++
 .../scenarios/lease-expiry-write-gate.yaml    |  128 +
 .../scenarios/lease-renewal-under-io.yaml     |  138 ++
 .../scenarios/op-csi-lifecycle.yaml           |  174 ++
 .../scenarios/op-failure-injection.yaml       |  199 ++
 .../testrunner/scenarios/op-mini-soak.yaml    |  315 +++
 .../scenarios/op-ownership-conflict.yaml      |  242 ++
 .../scenarios/op-upgrade-rollback.yaml        |  154 ++
 weed/storage/blockvol/testrunner/types.go     |   38 +-
 weed/storage/blockvol/wal_admission.go        |  121 +
 weed/storage/blockvol/wal_admission_test.go   |  354 +++
 54 files changed, 12022 insertions(+), 190 deletions(-)
 create mode 100644 weed/storage/blockvol/nvme/bufpool.go
 create mode 100644 weed/storage/blockvol/nvme/write_retry.go
 create mode 100644 weed/storage/blockvol/qa_wal_admission_test.go
 create mode 100644 weed/storage/blockvol/testrunner/actions/bench.go
 create mode 100644 weed/storage/blockvol/testrunner/actions/bench_test.go
 create mode 100644 weed/storage/blockvol/testrunner/actions/k8s.go
 create mode 100644 weed/storage/blockvol/testrunner/actions/nvme.go
 create mode 100644 weed/storage/blockvol/testrunner/actions/nvme_bench_test.go
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/cp103-25g-ab.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/cp103-nvme-cw-sweep.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/cp103-nvme-ioq-sweep.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/ha-rf3-failover.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/lease-expiry-write-gate.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/lease-renewal-under-io.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/op-csi-lifecycle.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/op-failure-injection.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/op-mini-soak.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/op-ownership-conflict.yaml
 create mode 100644 weed/storage/blockvol/testrunner/scenarios/op-upgrade-rollback.yaml
 create mode 100644 weed/storage/blockvol/wal_admission.go
 create mode 100644 weed/storage/blockvol/wal_admission_test.go

diff --git a/weed/storage/blockvol/blockvol.go b/weed/storage/blockvol/blockvol.go
index 493c0deca..3355d79c3 100644
--- a/weed/storage/blockvol/blockvol.go
+++ b/weed/storage/blockvol/blockvol.go
@@ -65,6 +65,9 @@ type BlockVol struct {
 	healthScore *HealthScore
 	scrubber    *Scrubber
 
+	// Write admission control (BUG-CP103-2).
+	walAdmission *WALAdmission
+
 	// Observability (CP8-4).
 	Metrics *EngineMetrics
 
@@ -156,6 +159,14 @@ func CreateBlockVol(path string, opts CreateOptions, cfgs ...BlockVolConfig) (*B
 		Metrics:  v.Metrics,
 	})
 	go v.flusher.Run()
+	v.walAdmission = NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: cfg.WALMaxConcurrentWrites,
+		SoftWatermark: cfg.WALSoftWatermark,
+		HardWatermark: cfg.WALHardWatermark,
+		WALUsedFn:     wal.UsedFraction,
+		NotifyFn:      v.flusher.NotifyUrgent,
+		ClosedFn:      v.closed.Load,
+	})
 	return v, nil
 }
 
@@ -255,6 +266,15 @@ func OpenBlockVol(path string, cfgs ...BlockVolConfig) (*BlockVol, error) {
 		log.Printf("blockvol: recovered %d snapshot(s)", len(v.snapshots))
 	}
 
+	v.walAdmission = NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: cfg.WALMaxConcurrentWrites,
+		SoftWatermark: cfg.WALSoftWatermark,
+		HardWatermark: cfg.WALHardWatermark,
+		WALUsedFn:     wal.UsedFraction,
+		NotifyFn:      v.flusher.NotifyUrgent,
+		ClosedFn:      v.closed.Load,
+	})
+
 	return v, nil
 }
 
@@ -335,6 +355,14 @@ func (v *BlockVol) WriteLBA(lba uint64, data []byte) error {
 		return err
 	}
 
+	// Admission control: throttle/block based on WAL pressure watermarks.
+	if v.walAdmission != nil {
+		if err := v.walAdmission.Acquire(v.config.WALFullTimeout); err != nil {
+			return fmt.Errorf("blockvol: write admission: %w", err)
+		}
+		defer v.walAdmission.Release()
+	}
+
 	lsn := v.nextLSN.Add(1) - 1
 	entry := &WALEntry{
 		LSN:    lsn,
@@ -511,6 +539,14 @@ func (v *BlockVol) Trim(lba uint64, length uint32) error {
 		return err
 	}
 
+	// Admission control: throttle/block based on WAL pressure watermarks.
+	if v.walAdmission != nil {
+		if err := v.walAdmission.Acquire(v.config.WALFullTimeout); err != nil {
+			return fmt.Errorf("blockvol: trim admission: %w", err)
+		}
+		defer v.walAdmission.Release()
+	}
+
 	lsn := v.nextLSN.Add(1) - 1
 	entry := &WALEntry{
 		LSN:    lsn,
diff --git a/weed/storage/blockvol/config.go b/weed/storage/blockvol/config.go
index bf7a00faf..c90fcf628 100644
--- a/weed/storage/blockvol/config.go
+++ b/weed/storage/blockvol/config.go
@@ -16,6 +16,9 @@ type BlockVolConfig struct {
 	WALFullTimeout         time.Duration // max retry time when WAL is full (default 5s)
 	FlushInterval          time.Duration // flusher periodic interval (default 100ms)
 	DirtyMapShards         int           // number of dirty map shards, must be power-of-2 (default 256)
+	WALSoftWatermark       float64       // WAL fraction above which writes begin throttling (default 0.7)
+	WALHardWatermark       float64       // WAL fraction above which writes block until drain (default 0.9)
+	WALMaxConcurrentWrites int           // max concurrent writers in WAL append path (default 16)
 }
 
 // DefaultConfig returns a BlockVolConfig with production defaults.
@@ -28,6 +31,9 @@ func DefaultConfig() BlockVolConfig {
 		WALFullTimeout:         5 * time.Second,
 		FlushInterval:          100 * time.Millisecond,
 		DirtyMapShards:         256,
+		WALSoftWatermark:       0.7,
+		WALHardWatermark:       0.9,
+		WALMaxConcurrentWrites: 16,
 	}
 }
 
@@ -55,6 +61,15 @@ func (c *BlockVolConfig) applyDefaults() {
 	if c.DirtyMapShards == 0 {
 		c.DirtyMapShards = d.DirtyMapShards
 	}
+	if c.WALSoftWatermark == 0 {
+		c.WALSoftWatermark = d.WALSoftWatermark
+	}
+	if c.WALHardWatermark == 0 {
+		c.WALHardWatermark = d.WALHardWatermark
+	}
+	if c.WALMaxConcurrentWrites == 0 {
+		c.WALMaxConcurrentWrites = d.WALMaxConcurrentWrites
+	}
 }
 
 var errInvalidConfig = errors.New("blockvol: invalid config")
@@ -82,5 +97,14 @@ func (c *BlockVolConfig) Validate() error {
 	if c.FlushInterval <= 0 {
 		return fmt.Errorf("%w: FlushInterval must be positive, got %v", errInvalidConfig, c.FlushInterval)
 	}
+	if c.WALSoftWatermark <= 0 || c.WALSoftWatermark >= 1 {
+		return fmt.Errorf("%w: WALSoftWatermark must be in (0,1), got %f", errInvalidConfig, c.WALSoftWatermark)
+	}
+	if c.WALHardWatermark <= c.WALSoftWatermark || c.WALHardWatermark > 1 {
+		return fmt.Errorf("%w: WALHardWatermark must be in (SoftWatermark,1], got %f", errInvalidConfig, c.WALHardWatermark)
+	}
+	if c.WALMaxConcurrentWrites <= 0 {
+		return fmt.Errorf("%w: WALMaxConcurrentWrites must be positive, got %d", errInvalidConfig, c.WALMaxConcurrentWrites)
+	}
 	return nil
 }
diff --git a/weed/storage/blockvol/config_test.go b/weed/storage/blockvol/config_test.go
index d34930d99..cbb1a7fc1 100644
--- a/weed/storage/blockvol/config_test.go
+++ b/weed/storage/blockvol/config_test.go
@@ -64,6 +64,9 @@ func testConfigValidateGood(t *testing.T) {
 			WALFullTimeout:         10 * time.Second,
 			FlushInterval:          50 * time.Millisecond,
 			DirtyMapShards:         1,
+			WALSoftWatermark:       0.5,
+			WALHardWatermark:       0.8,
+			WALMaxConcurrentWrites: 32,
 		},
 		{
 			GroupCommitMaxDelay:     1 * time.Microsecond,
@@ -73,6 +76,9 @@ func testConfigValidateGood(t *testing.T) {
 			WALFullTimeout:         1 * time.Millisecond,
 			FlushInterval:          1 * time.Millisecond,
 			DirtyMapShards:         1024,
+			WALSoftWatermark:       0.3,
+			WALHardWatermark:       0.6,
+			WALMaxConcurrentWrites: 4,
 		},
 	}
 	for i, cfg := range cases {
diff --git a/weed/storage/blockvol/iscsi/cmd/iscsi-target/main.go b/weed/storage/blockvol/iscsi/cmd/iscsi-target/main.go
index b121daa07..cebce459a 100644
--- a/weed/storage/blockvol/iscsi/cmd/iscsi-target/main.go
+++ b/weed/storage/blockvol/iscsi/cmd/iscsi-target/main.go
@@ -20,6 +20,7 @@ import (
 	"github.com/prometheus/client_golang/prometheus"
 	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
 	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/iscsi"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/nvme"
 )
 
 func main() {
@@ -35,8 +36,13 @@ func main() {
 	replicaData := flag.String("replica-data", "", "replica receiver data listen address (e.g. :9001; empty = disabled)")
 	replicaCtrl := flag.String("replica-ctrl", "", "replica receiver ctrl listen address (e.g. :9002; empty = disabled)")
 	rebuildListen := flag.String("rebuild-listen", "", "rebuild server listen address (e.g. :9003; empty = disabled)")
+	walSize := flag.String("wal-size", "64M", "WAL size (e.g., 64M, 128M) -- used with -create")
 	chapUser := flag.String("chap-user", "", "CHAP username (empty = CHAP disabled)")
 	chapSecret := flag.String("chap-secret", "", "CHAP shared secret")
+	nvmeAddr := flag.String("nvme-addr", "", "NVMe/TCP listen address (e.g. :4420; empty = disabled)")
+	nqn := flag.String("nqn", "", "NVMe NQN (defaults to nqn.2024-01.com.seaweedfs:vol.<sanitized iqn suffix>)")
+	walMaxCW := flag.Int("wal-max-concurrent-writes", 0, "max concurrent writers in WAL append path (0 = use default 16)")
+	nvmeIOQueues := flag.Int("nvme-io-queues", 0, "max NVMe IO queues (0 = use default 4)")
 	flag.Parse()
 
 	if *volPath == "" {
@@ -53,6 +59,15 @@ func main() {
 
 	logger := log.New(os.Stdout, "[iscsi] ", log.LstdFlags)
 
+	// Build config with optional WAL concurrency override.
+	var cfgs []blockvol.BlockVolConfig
+	if *walMaxCW > 0 {
+		cfg := blockvol.DefaultConfig()
+		cfg.WALMaxConcurrentWrites = *walMaxCW
+		cfgs = append(cfgs, cfg)
+		logger.Printf("WALMaxConcurrentWrites = %d", *walMaxCW)
+	}
+
 	var vol *blockvol.BlockVol
 	var err error
 
@@ -61,9 +76,13 @@ func main() {
 		if parseErr != nil {
 			log.Fatalf("invalid size %q: %v", *size, parseErr)
 		}
+		walBytes, parseErr := parseSize(*walSize)
+		if parseErr != nil {
+			log.Fatalf("invalid wal-size %q: %v", *walSize, parseErr)
+		}
 		if _, statErr := os.Stat(*volPath); statErr == nil {
 			// File exists -- open it instead of failing
-			vol, err = blockvol.OpenBlockVol(*volPath)
+			vol, err = blockvol.OpenBlockVol(*volPath, cfgs...)
 			if err != nil {
 				log.Fatalf("open existing volume: %v", err)
 			}
@@ -72,15 +91,15 @@ func main() {
 			vol, err = blockvol.CreateBlockVol(*volPath, blockvol.CreateOptions{
 				VolumeSize: volSize,
 				BlockSize:  4096,
-				WALSize:    64 * 1024 * 1024,
-			})
+				WALSize:    walBytes,
+			}, cfgs...)
 			if err != nil {
 				log.Fatalf("create volume: %v", err)
 			}
 			logger.Printf("created volume: %s (%s)", *volPath, *size)
 		}
 	} else {
-		vol, err = blockvol.OpenBlockVol(*volPath)
+		vol, err = blockvol.OpenBlockVol(*volPath, cfgs...)
 		if err != nil {
 			log.Fatalf("open volume: %v", err)
 		}
@@ -154,6 +173,36 @@ func main() {
 	}
 	ts.AddVolume(*iqn, adapter)
 
+	// Start NVMe/TCP target if configured.
+	var nvmeSrv *nvme.Server
+	if *nvmeAddr != "" {
+		nvmeNQN := *nqn
+		if nvmeNQN == "" {
+			// Derive NQN from IQN: extract suffix after last ':'
+			iqnParts := strings.SplitN(*iqn, ":", 2)
+			suffix := *iqn
+			if len(iqnParts) == 2 {
+				suffix = iqnParts[1]
+			}
+			nvmeNQN = blockvol.BuildNQN("nqn.2024-01.com.seaweedfs:vol.", suffix)
+		}
+
+		nvmeCfg := nvme.DefaultConfig()
+		nvmeCfg.ListenAddr = *nvmeAddr
+		nvmeCfg.Enabled = true
+		if *nvmeIOQueues > 0 {
+			nvmeCfg.MaxIOQueues = uint16(*nvmeIOQueues)
+			logger.Printf("NVMe MaxIOQueues = %d", *nvmeIOQueues)
+		}
+
+		nvmeSrv = nvme.NewServer(nvmeCfg)
+		nvmeSrv.AddVolume(nvmeNQN, adapter, [16]byte{}) // NGUID zero = auto
+		if err := nvmeSrv.ListenAndServe(); err != nil {
+			log.Fatalf("nvme target: %v", err)
+		}
+		logger.Printf("NVMe/TCP target: %s on %s", nvmeNQN, *nvmeAddr)
+	}
+
 	// Start periodic performance stats logging (every 5 seconds).
 	instrumented.StartStatsLogger(5 * time.Second)
 
@@ -163,6 +212,9 @@ func main() {
 	go func() {
 		sig := <-sigCh
 		logger.Printf("received %v, shutting down...", sig)
+		if nvmeSrv != nil {
+			nvmeSrv.Close()
+		}
 		ts.Close()
 	}()
 
diff --git a/weed/storage/blockvol/nvme/adapter.go b/weed/storage/blockvol/nvme/adapter.go
index 8edabbfd3..5a386fcda 100644
--- a/weed/storage/blockvol/nvme/adapter.go
+++ b/weed/storage/blockvol/nvme/adapter.go
@@ -61,9 +61,15 @@ func (a *NVMeAdapter) DeviceNGUID() [16]byte {
 	return UUIDToNGUID(a.Vol.Info().UUID)
 }
 
+// WALPressure returns the current WAL usage fraction (0.0–1.0).
+func (a *NVMeAdapter) WALPressure() float64 {
+	return a.Vol.WALUsedFraction()
+}
+
 // Compile-time checks.
 var _ BlockDevice = (*NVMeAdapter)(nil)
 var _ ANAProvider = (*NVMeAdapter)(nil)
+var _ WALPressureProvider = (*NVMeAdapter)(nil)
 
 // RoleToANAState maps a BlockVol Role to an NVMe ANA state.
 func RoleToANAState(r blockvol.Role) uint8 {
diff --git a/weed/storage/blockvol/nvme/bufpool.go b/weed/storage/blockvol/nvme/bufpool.go
new file mode 100644
index 000000000..6359e2323
--- /dev/null
+++ b/weed/storage/blockvol/nvme/bufpool.go
@@ -0,0 +1,47 @@
+package nvme
+
+import "sync"
+
+// bufPool provides tiered buffer pools for NVMe I/O.
+// Three tiers: 4KB (small I/O), 64KB (medium), 256KB (large).
+var bufPool = struct {
+	small  sync.Pool // 4KB
+	medium sync.Pool // 64KB
+	large  sync.Pool // 256KB
+}{
+	small:  sync.Pool{New: func() any { b := make([]byte, 4096); return &b }},
+	medium: sync.Pool{New: func() any { b := make([]byte, 65536); return &b }},
+	large:  sync.Pool{New: func() any { b := make([]byte, 262144); return &b }},
+}
+
+// getBuffer returns a buffer of at least size bytes from the pool.
+func getBuffer(size int) []byte {
+	switch {
+	case size <= 4096:
+		bp := bufPool.small.Get().(*[]byte)
+		return (*bp)[:size]
+	case size <= 65536:
+		bp := bufPool.medium.Get().(*[]byte)
+		return (*bp)[:size]
+	case size <= 262144:
+		bp := bufPool.large.Get().(*[]byte)
+		return (*bp)[:size]
+	default:
+		return make([]byte, size) // oversized: don't pool
+	}
+}
+
+// putBuffer returns a buffer to the appropriate pool.
+func putBuffer(buf []byte) {
+	c := cap(buf)
+	buf = buf[:c]
+	switch c {
+	case 4096:
+		bufPool.small.Put(&buf)
+	case 65536:
+		bufPool.medium.Put(&buf)
+	case 262144:
+		bufPool.large.Put(&buf)
+	// Oversized or wrong-sized: let GC collect
+	}
+}
diff --git a/weed/storage/blockvol/nvme/controller.go b/weed/storage/blockvol/nvme/controller.go
index 1e4d4ae4f..bb5b5eb6a 100644
--- a/weed/storage/blockvol/nvme/controller.go
+++ b/weed/storage/blockvol/nvme/controller.go
@@ -74,7 +74,12 @@ type Controller struct {
 	// Features
 	maxIOQueues   uint16
 	grantedQueues uint16
-	isAdmin       bool // true if this controller owns admin queue (QID=0)
+	isAdmin       bool   // true if this controller owns admin queue (QID=0)
+	maxDataLen    uint32 // C2H/H2C data chunk size (from Config)
+
+	// Command interleaving: capsules received during R2T H2CData collection.
+	// Drained by Serve() before reading the next PDU from the wire.
+	pendingCapsules []*Request
 
 	// Lifecycle
 	wg     sync.WaitGroup
@@ -83,16 +88,21 @@ type Controller struct {
 
 // newController creates a controller for the given connection.
 func newController(conn net.Conn, server *Server) *Controller {
+	maxData := server.cfg.MaxH2CDataLength
+	if maxData == 0 {
+		maxData = maxH2CDataLen // fallback to 32KB default
+	}
 	c := &Controller{
 		conn:   conn,
 		in:     NewReader(conn),
-		out:    NewWriter(conn),
+		out:    NewWriterSize(conn, int(maxData)+maxHeaderSize),
 		state:  stateConnected,
 		server: server,
 		regVS:  nvmeVersion14,
 		// CAP register: MQES=63 (bits 15:0), CQR=1 (bit 16), TO=30 (bits 31:24, *500ms=15s), CSS bit37=1 (NVM command set)
-		regCAP: uint64(63) | (1 << 16) | (uint64(30) << 24) | (1 << 37),
+		regCAP:      uint64(63) | (1 << 16) | (uint64(30) << 24) | (1 << 37),
 		maxIOQueues: server.cfg.MaxIOQueues,
+		maxDataLen:  maxData,
 	}
 	return c
 }
@@ -111,6 +121,15 @@ func (c *Controller) Serve() error {
 			return nil
 		}
 
+		// Drain capsules that arrived during a prior R2T data collection.
+		for len(c.pendingCapsules) > 0 {
+			req := c.pendingCapsules[0]
+			c.pendingCapsules = c.pendingCapsules[1:]
+			if err := c.dispatchPending(req); err != nil {
+				return fmt.Errorf("pending capsule: %w", err)
+			}
+		}
+
 		hdr, err := c.in.Dequeue()
 		if err != nil {
 			if err == io.EOF || c.closed.Load() {
@@ -134,6 +153,11 @@ func (c *Controller) Serve() error {
 				return fmt.Errorf("capsule: %w", err)
 			}
 
+		case pduH2CData:
+			// H2CData PDUs are only expected after R2T, handled inline
+			// by recvH2CData. If we see one here, it's unexpected.
+			return fmt.Errorf("unexpected H2CData PDU outside R2T flow")
+
 		case pduH2CTermReq:
 			return nil // host terminated
 
@@ -152,7 +176,7 @@ func (c *Controller) handleIC() error {
 
 	resp := ICResponse{
 		PDUFormatVersion: 0,
-		MaxH2CDataLength: maxH2CDataLen,
+		MaxH2CDataLength: c.maxDataLen,
 	}
 	if err := c.out.SendHeaderOnly(pduICResp, &resp, icBodySize); err != nil {
 		return err
@@ -177,8 +201,9 @@ func (c *Controller) handleCapsule() error {
 	// Read optional inline data
 	var payload []byte
 	if dataLen := c.in.Length(); dataLen > 0 {
-		payload = make([]byte, dataLen)
+		payload = getBuffer(int(dataLen))
 		if err := c.in.ReceiveData(payload); err != nil {
+			putBuffer(payload)
 			return err
 		}
 	}
@@ -206,8 +231,28 @@ func (c *Controller) handleCapsule() error {
 	return c.dispatchIO(req)
 }
 
+// dispatchPending processes a capsule that was buffered during R2T data
+// collection. The capsule and payload are already fully read — only
+// SQHD advance and command dispatch remain.
+func (c *Controller) dispatchPending(req *Request) error {
+	c.sqhd++
+	if c.sqhd >= c.queueSize && c.queueSize > 0 {
+		c.sqhd = 0
+	}
+	if c.queueID == 0 {
+		return c.dispatchAdmin(req)
+	}
+	return c.dispatchIO(req)
+}
+
 // dispatchAdmin handles admin queue commands synchronously.
 func (c *Controller) dispatchAdmin(req *Request) error {
+	defer func() {
+		if req.payload != nil {
+			putBuffer(req.payload)
+			req.payload = nil
+		}
+	}()
 	capsule := &req.capsule
 
 	if capsule.OpCode == adminFabric {
@@ -236,6 +281,12 @@ func (c *Controller) dispatchAdmin(req *Request) error {
 
 // dispatchIO handles IO queue commands.
 func (c *Controller) dispatchIO(req *Request) error {
+	defer func() {
+		if req.payload != nil {
+			putBuffer(req.payload)
+			req.payload = nil
+		}
+	}()
 	capsule := &req.capsule
 
 	switch capsule.OpCode {
@@ -254,11 +305,13 @@ func (c *Controller) dispatchIO(req *Request) error {
 }
 
 // sendC2HDataAndResponse sends C2HData PDUs followed by a CapsuleResp.
+// All chunks and the final response are batched in the bufio buffer,
+// then flushed to the wire in a single FlushBuf() call.
 func (c *Controller) sendC2HDataAndResponse(req *Request) error {
 	if len(req.c2hData) > 0 {
 		data := req.c2hData
 		offset := uint32(0)
-		chunkSize := uint32(maxH2CDataLen)
+		chunkSize := c.maxDataLen
 
 		for offset < uint32(len(data)) {
 			end := offset + chunkSize
@@ -278,14 +331,26 @@ func (c *Controller) sendC2HDataAndResponse(req *Request) error {
 				flags = c2hFlagLast
 			}
 
-			if err := c.out.SendWithData(pduC2HData, flags, &hdr, c2hDataHdrSize, chunk); err != nil {
+			if err := c.out.writeHeaderAndData(pduC2HData, flags, &hdr, c2hDataHdrSize, chunk); err != nil {
 				return err
 			}
 			offset = end
 		}
 	}
 
-	return c.sendResponse(req)
+	// Write CapsuleResp to bufio buffer
+	if c.flowCtlOff {
+		req.resp.SQHD = 0xFFFF
+	} else {
+		req.resp.SQHD = c.sqhd
+	}
+	c.resetKATO()
+	if err := c.out.writeHeaderAndData(pduCapsuleResp, 0, &req.resp, capsuleRespSize, nil); err != nil {
+		return err
+	}
+
+	// Single flush: all C2H chunks + CapsuleResp in one syscall
+	return c.out.FlushBuf()
 }
 
 // sendResponse sends a CapsuleResp PDU.
@@ -302,6 +367,108 @@ func (c *Controller) sendResponse(req *Request) error {
 	return c.out.SendHeaderOnly(pduCapsuleResp, &req.resp, capsuleRespSize)
 }
 
+// ---------- R2T / H2C Data ----------
+
+// sendR2T sends a Ready-to-Transfer PDU requesting data from the host.
+func (c *Controller) sendR2T(cid uint16, tag uint16, offset, length uint32) error {
+	r2t := R2THeader{
+		CCCID: cid,
+		TAG:   tag,
+		DATAO: offset,
+		DATAL: length,
+	}
+	return c.out.SendHeaderOnly(pduR2T, &r2t, r2tHdrSize)
+}
+
+// recvH2CData reads H2CData PDU(s) from the wire and returns the accumulated data.
+// Reads exactly `totalBytes` of data, potentially across multiple H2C PDUs.
+//
+// At QD>1 the host may interleave CapsuleCmd PDUs on the same connection
+// before the H2CData for a prior R2T arrives. Such capsules are fully read
+// and buffered in c.pendingCapsules for dispatch after the current command
+// completes (NVMe/TCP spec §3.5 — command pipelining).
+func (c *Controller) recvH2CData(totalBytes uint32) ([]byte, error) {
+	buf := getBuffer(int(totalBytes))
+	received := uint32(0)
+
+	for received < totalBytes {
+		hdr, err := c.in.Dequeue()
+		if err != nil {
+			putBuffer(buf)
+			return nil, fmt.Errorf("recvH2CData: read header: %w", err)
+		}
+
+		// Interleaved CapsuleCmd: buffer it for later dispatch.
+		if hdr.Type == pduCapsuleCmd {
+			if err := c.bufferInterleaved(); err != nil {
+				putBuffer(buf)
+				return nil, fmt.Errorf("recvH2CData: buffer interleaved capsule: %w", err)
+			}
+			continue
+		}
+
+		if hdr.Type != pduH2CData {
+			putBuffer(buf)
+			return nil, fmt.Errorf("recvH2CData: expected H2CData (0x6), got 0x%x", hdr.Type)
+		}
+
+		var h2c H2CDataHeader
+		if err := c.in.Receive(&h2c); err != nil {
+			putBuffer(buf)
+			return nil, fmt.Errorf("recvH2CData: receive header: %w", err)
+		}
+
+		dataLen := c.in.Length()
+		if dataLen == 0 {
+			putBuffer(buf)
+			return nil, fmt.Errorf("recvH2CData: H2CData PDU has no payload")
+		}
+		if h2c.DATAO+dataLen > totalBytes {
+			putBuffer(buf)
+			return nil, fmt.Errorf("recvH2CData: data exceeds expected size (%d+%d > %d)",
+				h2c.DATAO, dataLen, totalBytes)
+		}
+
+		if err := c.in.ReceiveData(buf[h2c.DATAO : h2c.DATAO+dataLen]); err != nil {
+			putBuffer(buf)
+			return nil, fmt.Errorf("recvH2CData: receive data: %w", err)
+		}
+		received += dataLen
+	}
+
+	return buf, nil
+}
+
+// bufferInterleaved reads a complete CapsuleCmd (header + optional inline
+// data) that arrived during R2T data collection and appends it to
+// c.pendingCapsules. Called from recvH2CData when hdr.Type == pduCapsuleCmd.
+func (c *Controller) bufferInterleaved() error {
+	var capsule CapsuleCommand
+	if err := c.in.Receive(&capsule); err != nil {
+		return err
+	}
+
+	var payload []byte
+	if dataLen := c.in.Length(); dataLen > 0 {
+		payload = getBuffer(int(dataLen))
+		if err := c.in.ReceiveData(payload); err != nil {
+			putBuffer(payload)
+			return err
+		}
+	}
+
+	req := &Request{
+		capsule: capsule,
+		payload: payload,
+	}
+	req.resp.CID = capsule.CID
+	req.resp.QueueID = c.queueID
+	req.resp.Status = uint16(StatusSuccess)
+
+	c.pendingCapsules = append(c.pendingCapsules, req)
+	return nil
+}
+
 // ---------- KATO management ----------
 
 func (c *Controller) startKATO() {
diff --git a/weed/storage/blockvol/nvme/fabric.go b/weed/storage/blockvol/nvme/fabric.go
index ef6f36110..373aaf4d9 100644
--- a/weed/storage/blockvol/nvme/fabric.go
+++ b/weed/storage/blockvol/nvme/fabric.go
@@ -112,10 +112,9 @@ func (c *Controller) handleConnect(req *Request) error {
 
 // handlePropertyGet returns a controller register value.
 func (c *Controller) handlePropertyGet(req *Request) error {
-	// Property offset in D10 (bits 31:0, but only lower bits used)
-	offset := req.capsule.D10
-	// Attrib in D11 bit 0: 0=4byte, 1=8byte
-	size8 := (req.capsule.D11 & 1) != 0
+	// Per NVMe-oF spec: CDW10 bits 2:0 = ATTRIB (size), CDW11 = OFST (offset)
+	size8 := (req.capsule.D10 & 1) != 0
+	offset := req.capsule.D11
 
 	var val uint64
 	switch offset {
@@ -144,8 +143,9 @@ func (c *Controller) handlePropertyGet(req *Request) error {
 
 // handlePropertySet handles controller register writes.
 func (c *Controller) handlePropertySet(req *Request) error {
-	offset := req.capsule.D10
-	value := uint64(req.capsule.D14) | uint64(req.capsule.D15)<<32
+	// Per NVMe-oF spec: CDW10 = ATTRIB (size), CDW11 = OFST (offset), CDW12-CDW13 = VALUE
+	offset := req.capsule.D11
+	value := uint64(req.capsule.D12) | uint64(req.capsule.D13)<<32
 
 	switch offset {
 	case propCC:
@@ -236,20 +236,19 @@ func connectKATO(capsule *CapsuleCommand) uint32 {
 	return capsule.D12
 }
 
-// PropertySet value extraction: the go-nvme reference puts value in D12/D13,
-// but NVMe spec actually uses CDW14/CDW15 for PropertySet. We handle both.
+// propertySetValue extracts the value from a PropertySet capsule (CDW12-CDW13).
 func propertySetValue(capsule *CapsuleCommand) uint64 {
-	return uint64(capsule.D14) | uint64(capsule.D15)<<32
+	return uint64(capsule.D12) | uint64(capsule.D13)<<32
 }
 
 // propertyGetSize returns true if the PropertyGet requests an 8-byte value.
 func propertyGetSize8(capsule *CapsuleCommand) bool {
-	return (capsule.D11 & 1) != 0
+	return (capsule.D10 & 1) != 0
 }
 
 // propertyGetOffset returns the register offset for PropertyGet.
 func propertyGetOffset(capsule *CapsuleCommand) uint32 {
-	return capsule.D10
+	return capsule.D11
 }
 
 // ---------- ConnectData marshal helpers for tests ----------
@@ -271,26 +270,28 @@ func makeConnectCapsule(queueID, queueSize uint16, kato uint32, fcType uint8) Ca
 }
 
 // makePropertyGetCapsule creates a PropertyGet capsule for the given register offset.
+// Per NVMe-oF spec: CDW10 = ATTRIB (size), CDW11 = OFST (offset).
 func makePropertyGetCapsule(offset uint32, size8 bool) CapsuleCommand {
 	c := CapsuleCommand{
 		OpCode: adminFabric,
 		FCType: fcPropertyGet,
-		D10:    offset,
+		D11:    offset,
 	}
 	if size8 {
-		c.D11 = 1
+		c.D10 = 1
 	}
 	return c
 }
 
 // makePropertySetCapsule creates a PropertySet capsule.
+// Per NVMe-oF spec: CDW10 = ATTRIB (size), CDW11 = OFST (offset), CDW12-13 = VALUE.
 func makePropertySetCapsule(offset uint32, value uint64) CapsuleCommand {
 	return CapsuleCommand{
 		OpCode: adminFabric,
 		FCType: fcPropertySet,
-		D10:    offset,
-		D14:    uint32(value),
-		D15:    uint32(value >> 32),
+		D11:    offset,
+		D12:    uint32(value),
+		D13:    uint32(value >> 32),
 	}
 }
 
diff --git a/weed/storage/blockvol/nvme/identify.go b/weed/storage/blockvol/nvme/identify.go
index d245ea9c1..cbe0f0950 100644
--- a/weed/storage/blockvol/nvme/identify.go
+++ b/weed/storage/blockvol/nvme/identify.go
@@ -86,6 +86,20 @@ func (c *Controller) identifyController(req *Request) error {
 	// ELPE (Error Log Page Entries) - offset 262
 	buf[262] = 0 // 1 entry (0-based)
 
+	// KAS (Keep Alive Support) - offset 320-321
+	// Granularity in 100ms units. Non-zero is mandatory for fabrics controllers.
+	binary.LittleEndian.PutUint16(buf[320:], 10) // 1 second granularity
+
+	// ANACAP (ANA Capabilities) - offset 341
+	// bit 3: reports Optimized state
+	buf[341] = 0x08
+
+	// ANAGRPMAX (Max ANA Group ID) - offset 344-347
+	binary.LittleEndian.PutUint32(buf[344:], 1)
+
+	// NANAGRPID (Number of ANA Group IDs) - offset 348-351
+	binary.LittleEndian.PutUint32(buf[348:], 1)
+
 	// SQES (Submission Queue Entry Size) - offset 512
 	// min=6 (2^6=64 bytes), max=6
 	buf[512] = 0x66
@@ -104,16 +118,6 @@ func (c *Controller) identifyController(req *Request) error {
 	// bit 3: WriteZeros, bit 2: DatasetMgmt (Trim)
 	binary.LittleEndian.PutUint16(buf[520:], 0x0C)
 
-	// ANACAP (ANA Capabilities) - offset 522
-	// bit 3: reports Optimized state
-	buf[522] = 0x08
-
-	// ANAGRPMAX - offset 524-527
-	binary.LittleEndian.PutUint32(buf[524:], 1)
-
-	// NANAGRPID - offset 528-531
-	binary.LittleEndian.PutUint32(buf[528:], 1)
-
 	// VWC (Volatile Write Cache) - offset 525
 	// bit 0: volatile write cache present → Flush required
 	buf[525] = 0x01
@@ -122,8 +126,13 @@ func (c *Controller) identifyController(req *Request) error {
 	// bit 0: SGLs supported (required for NVMe/TCP)
 	binary.LittleEndian.PutUint32(buf[536:], 0x01)
 
-	// SubNQN (Subsystem NQN) - offset 768, 256 bytes
-	copyPadded(buf[768:1024], sub.NQN)
+	// MNAN (Maximum Number of Allowed Namespaces) - offset 540-543
+	// Must be non-zero for NVMe 1.4+ controllers; kernel validates this.
+	binary.LittleEndian.PutUint32(buf[540:], 1)
+
+	// SubNQN (Subsystem NQN) - offset 768, 256 bytes, NUL-terminated
+	// Must NOT be space-padded — kernel uses strcmp() to match against Connect NQN.
+	copy(buf[768:1024], sub.NQN) // buf is already zeroed → NUL-terminated
 
 	// IOCCSZ (I/O Queue Command Capsule Supported Size) - offset 1792-1795
 	// In 16-byte units: 64/16 = 4
diff --git a/weed/storage/blockvol/nvme/io.go b/weed/storage/blockvol/nvme/io.go
index 32b7b8988..abb38e182 100644
--- a/weed/storage/blockvol/nvme/io.go
+++ b/weed/storage/blockvol/nvme/io.go
@@ -31,7 +31,7 @@ func (c *Controller) handleRead(req *Request) error {
 	return c.sendC2HDataAndResponse(req)
 }
 
-// handleWrite processes an NVMe Write command with inline data.
+// handleWrite processes an NVMe Write command with inline or R2T data.
 func (c *Controller) handleWrite(req *Request) error {
 	sub := c.subsystem
 	if sub == nil {
@@ -45,17 +45,11 @@ func (c *Controller) handleWrite(req *Request) error {
 		return c.sendResponse(req)
 	}
 
-	// Inline data must be present (DataOffset != 0 in the received PDU).
-	// If DataOffset == 0 for a Write, the host expects R2T flow — reject.
-	if len(req.payload) == 0 {
-		req.resp.Status = uint16(StatusInvalidField)
-		return c.sendResponse(req)
-	}
-
 	dev := sub.Dev
 	lba := req.capsule.Lba()
 	nlb := req.capsule.LbaLength()
 	blockSize := dev.BlockSize()
+	expectedBytes := uint32(nlb) * blockSize
 
 	// Bounds check
 	nsze := dev.VolumeSize() / uint64(blockSize)
@@ -64,14 +58,30 @@ func (c *Controller) handleWrite(req *Request) error {
 		return c.sendResponse(req)
 	}
 
-	// Validate payload size matches NLB*blockSize.
-	expectedBytes := uint32(nlb) * blockSize
-	if uint32(len(req.payload)) != expectedBytes {
-		req.resp.Status = uint16(StatusInvalidField)
-		return c.sendResponse(req)
+	var writeData []byte
+
+	if len(req.payload) > 0 {
+		// Inline data path: data was in the CapsuleCmd PDU.
+		if uint32(len(req.payload)) != expectedBytes {
+			req.resp.Status = uint16(StatusInvalidField)
+			return c.sendResponse(req)
+		}
+		writeData = req.payload
+	} else {
+		// R2T flow: send Ready-to-Transfer, then receive H2C Data PDUs.
+		if err := c.sendR2T(req.capsule.CID, 0, 0, expectedBytes); err != nil {
+			return err
+		}
+		data, err := c.recvH2CData(expectedBytes)
+		if err != nil {
+			return err
+		}
+		writeData = data
+		defer putBuffer(data)
 	}
 
-	if err := dev.WriteAt(lba, req.payload); err != nil {
+	throttleOnWALPressure(dev)
+	if err := writeWithRetry(dev, lba, writeData); err != nil {
 		req.resp.Status = uint16(mapBlockError(err))
 		return c.sendResponse(req)
 	}
@@ -133,8 +143,14 @@ func (c *Controller) handleWriteZeros(req *Request) error {
 			return c.sendResponse(req)
 		}
 	} else {
-		zeroBuf := make([]byte, totalBytes)
-		if err := dev.WriteAt(lba, zeroBuf); err != nil {
+		zeroBuf := getBuffer(int(totalBytes))
+		for i := range zeroBuf {
+			zeroBuf[i] = 0
+		}
+		throttleOnWALPressure(dev)
+		err := writeWithRetry(dev, lba, zeroBuf)
+		putBuffer(zeroBuf)
+		if err != nil {
 			req.resp.Status = uint16(mapBlockError(err))
 			return c.sendResponse(req)
 		}
diff --git a/weed/storage/blockvol/nvme/nvme_qa_test.go b/weed/storage/blockvol/nvme/nvme_qa_test.go
index b034f4c3e..999632aae 100644
--- a/weed/storage/blockvol/nvme/nvme_qa_test.go
+++ b/weed/storage/blockvol/nvme/nvme_qa_test.go
@@ -9,11 +9,14 @@ import (
 	"bytes"
 	"encoding/binary"
 	"errors"
+	"fmt"
 	"io"
 	"net"
 	"sync"
 	"testing"
 	"time"
+
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
 )
 
 // ============================================================
@@ -348,7 +351,7 @@ func TestQA_PropertyGetUnknownOffset(t *testing.T) {
 		OpCode: adminFabric,
 		FCType: fcPropertyGet,
 		CID:    801,
-		D10:    0xDEAD, // invalid register offset
+		D11:    0xDEAD, // invalid register offset (CDW11=OFST)
 	}
 	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
 	resp := recvCapsuleResp(t, r)
@@ -1539,3 +1542,2174 @@ func TestQA_Identify_ControllerModelSerial(t *testing.T) {
 
 	recvCapsuleResp(t, r)
 }
+
+// ============================================================
+// QA-17: mapBlockError heuristic string matching
+// ============================================================
+
+// TestQA_MapBlockError_WriteHeuristic: error with "write" → MediaWriteFault.
+func TestQA_MapBlockError_WriteHeuristic(t *testing.T) {
+	err := errors.New("disk write failed at sector 42")
+	got := mapBlockError(err)
+	if got != StatusMediaWriteFault {
+		t.Fatalf("error with 'write': got 0x%04x, want MediaWriteFault 0x%04x", got, StatusMediaWriteFault)
+	}
+}
+
+// TestQA_MapBlockError_WriteHeuristicCapital: "Write" (capital W) also matches.
+func TestQA_MapBlockError_WriteHeuristicCapital(t *testing.T) {
+	err := errors.New("Write operation timed out")
+	got := mapBlockError(err)
+	if got != StatusMediaWriteFault {
+		t.Fatalf("error with 'Write': got 0x%04x, want MediaWriteFault 0x%04x", got, StatusMediaWriteFault)
+	}
+}
+
+// TestQA_MapBlockError_ReadHeuristic: error with "read" → MediaReadError.
+func TestQA_MapBlockError_ReadHeuristic(t *testing.T) {
+	err := errors.New("read I/O error on extent 7")
+	got := mapBlockError(err)
+	if got != StatusMediaReadError {
+		t.Fatalf("error with 'read': got 0x%04x, want MediaReadError 0x%04x", got, StatusMediaReadError)
+	}
+}
+
+// TestQA_MapBlockError_ReadHeuristicCapital: "Read" also matches.
+func TestQA_MapBlockError_ReadHeuristicCapital(t *testing.T) {
+	err := errors.New("Read from backend failed")
+	got := mapBlockError(err)
+	if got != StatusMediaReadError {
+		t.Fatalf("error with 'Read': got 0x%04x, want MediaReadError 0x%04x", got, StatusMediaReadError)
+	}
+}
+
+// TestQA_MapBlockError_UnknownError: no write/read keyword → InternalError.
+func TestQA_MapBlockError_UnknownError(t *testing.T) {
+	err := errors.New("something completely unexpected happened")
+	got := mapBlockError(err)
+	if got != StatusInternalError {
+		t.Fatalf("unknown error: got 0x%04x, want InternalError 0x%04x", got, StatusInternalError)
+	}
+}
+
+// TestQA_MapBlockError_Nil: nil → StatusSuccess.
+func TestQA_MapBlockError_Nil(t *testing.T) {
+	got := mapBlockError(nil)
+	if got != StatusSuccess {
+		t.Fatalf("nil error: got 0x%04x, want StatusSuccess", got)
+	}
+}
+
+// ============================================================
+// QA-18: PropertySet 8-byte values (D14:D15 merge)
+// ============================================================
+
+func TestQA_PropertySet_8ByteValue(t *testing.T) {
+	nqn := "nqn.test:qa-propset8"
+	client, r, w, _, _ := setupAdminSession(t, nqn)
+	defer client.Close()
+
+	// PropertySet CC with a value that spans both D14 and D15.
+	// CC is 32-bit, but the PropertySet wire format uses D14:D15 (64-bit).
+	// Set CC.EN=1 (bit 0) via 8-byte value with high bits nonzero to test merge.
+	cmd := makePropertySetCapsule(propCC, 0x0000000100000001) // D15=1, D14=1
+	cmd.CID = 300
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
+
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("PropertySet 8-byte: 0x%04x", resp.Status)
+	}
+
+	// Verify CC was set by reading it back via PropertyGet.
+	getCmd := makePropertyGetCapsule(propCC, false)
+	getCmd.CID = 301
+	w.SendWithData(pduCapsuleCmd, 0, &getCmd, capsuleCmdSize, nil)
+
+	resp = recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("PropertyGet CC: 0x%04x", resp.Status)
+	}
+	// CC is 32-bit, only D14 (low 32 bits) matters → expect 1
+	if resp.DW0 != 1 {
+		t.Fatalf("CC = 0x%08x, want 0x00000001", resp.DW0)
+	}
+}
+
+// ============================================================
+// QA-19: KATO=0 (KeepAlive disabled) — no timer armed
+// ============================================================
+
+func TestQA_KATO_Zero_NoTimer(t *testing.T) {
+	nqn := "nqn.test:qa-kato0"
+	dev := newMockDevice(256, 512)
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	recvICResp(t, r)
+
+	// Connect with KATO=0 (disabled)
+	sendConnect(w, 0, 64, 0, nqn, "host", 0xFFFF)
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("connect failed: 0x%04x", resp.Status)
+	}
+
+	// Enable controller (CC.EN=1) — this triggers startKATO()
+	ccCmd := makePropertySetCapsule(propCC, 1)
+	ccCmd.CID = 400
+	w.SendWithData(pduCapsuleCmd, 0, &ccCmd, capsuleCmdSize, nil)
+	resp = recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("CC.EN set failed: 0x%04x", resp.Status)
+	}
+
+	// With KATO=0, no timer should fire. Wait 200ms and verify session alive.
+	time.Sleep(200 * time.Millisecond)
+
+	kaCmd := CapsuleCommand{OpCode: adminKeepAlive, CID: 401}
+	w.SendWithData(pduCapsuleCmd, 0, &kaCmd, capsuleCmdSize, nil)
+	resp = recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("KeepAlive after 200ms with KATO=0 should succeed: 0x%04x", resp.Status)
+	}
+}
+
+// ============================================================
+// QA-20: NUMD log page length capping
+// ============================================================
+
+// TestQA_LogPage_ErrorLog_LargeNUMD: request > 64 bytes → capped to 64.
+func TestQA_LogPage_ErrorLog_LargeNUMD(t *testing.T) {
+	nqn := "nqn.test:qa-numd"
+	client, r, w, _, _ := setupAdminSession(t, nqn)
+	defer client.Close()
+
+	// Request 4096 bytes (NUMD=1023, 0-based dwords)
+	cmd := CapsuleCommand{
+		OpCode: adminGetLogPage,
+		CID:    500,
+		D10:    uint32(logPageError) | (1023 << 16), // NUMDL=1023
+		D11:    0,                                    // NUMDU=0
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
+
+	// Should get C2HData with at most 64 bytes (error log cap)
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if hdr.Type != pduC2HData {
+		t.Fatalf("expected C2HData, got 0x%x", hdr.Type)
+	}
+	var c2h C2HDataHeader
+	r.Receive(&c2h)
+	dataLen := r.Length()
+	data := make([]byte, dataLen)
+	r.ReceiveData(data)
+
+	if c2h.DATAL > 64 {
+		t.Fatalf("error log data length %d > 64 (not capped)", c2h.DATAL)
+	}
+
+	recvCapsuleResp(t, r)
+}
+
+// TestQA_LogPage_SMART_LargeNUMD: request > 512 bytes → capped to 512.
+func TestQA_LogPage_SMART_LargeNUMD(t *testing.T) {
+	nqn := "nqn.test:qa-numd-smart"
+	client, r, w, _, _ := setupAdminSession(t, nqn)
+	defer client.Close()
+
+	// Request 8192 bytes (NUMD=2047)
+	cmd := CapsuleCommand{
+		OpCode: adminGetLogPage,
+		CID:    501,
+		D10:    uint32(logPageSMART) | (2047 << 16),
+		D11:    0,
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
+
+	var total uint32
+	for {
+		hdr, err := r.Dequeue()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if hdr.Type == pduCapsuleResp {
+			var resp CapsuleResponse
+			r.Receive(&resp)
+			if StatusWord(resp.Status).IsError() {
+				t.Fatalf("SMART log failed: 0x%04x", resp.Status)
+			}
+			break
+		}
+		if hdr.Type == pduC2HData {
+			var c2h C2HDataHeader
+			r.Receive(&c2h)
+			chunk := make([]byte, r.Length())
+			r.ReceiveData(chunk)
+			total += uint32(len(chunk))
+		}
+	}
+
+	if total > 512 {
+		t.Fatalf("SMART log data %d > 512 (not capped)", total)
+	}
+	if total == 0 {
+		t.Fatal("SMART log data empty")
+	}
+}
+
+// TestQA_LogPage_ANA_LargeNUMD: request > 40 bytes → capped to 40.
+func TestQA_LogPage_ANA_LargeNUMD(t *testing.T) {
+	nqn := "nqn.test:qa-numd-ana"
+	client, r, w, _, _ := setupAdminSession(t, nqn)
+	defer client.Close()
+
+	cmd := CapsuleCommand{
+		OpCode: adminGetLogPage,
+		CID:    502,
+		D10:    uint32(logPageANA) | (4095 << 16),
+		D11:    0,
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
+
+	var total uint32
+	for {
+		hdr, err := r.Dequeue()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if hdr.Type == pduCapsuleResp {
+			var resp CapsuleResponse
+			r.Receive(&resp)
+			break
+		}
+		if hdr.Type == pduC2HData {
+			var c2h C2HDataHeader
+			r.Receive(&c2h)
+			chunk := make([]byte, r.Length())
+			r.ReceiveData(chunk)
+			total += uint32(len(chunk))
+		}
+	}
+
+	if total > 40 {
+		t.Fatalf("ANA log data %d > 40 (not capped)", total)
+	}
+}
+
+// ============================================================
+// QA-21: Multiple SetFeatures on same session
+// ============================================================
+
+func TestQA_SetFeatures_MultipleCallsSameSession(t *testing.T) {
+	nqn := "nqn.test:qa-multiset"
+	client, r, w, _, _ := setupAdminSession(t, nqn)
+	defer client.Close()
+
+	// First SetFeatures: request 8 queues
+	cmd1 := CapsuleCommand{
+		OpCode: adminSetFeatures,
+		CID:    600,
+		D10:    uint32(fidNumberOfQueues),
+		D11:    7 | (7 << 16), // NCQR=7, NSQR=7 (0-based)
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd1, capsuleCmdSize, nil)
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("first SetFeatures: 0x%04x", resp.Status)
+	}
+	// maxIOQueues=4, so clamped: NCQR=3, NSQR=3 (0-based)
+	ncqr1 := resp.DW0 & 0xFFFF
+	if ncqr1 != 3 { // 4-1=3 (0-based)
+		t.Fatalf("first NCQR = %d, want 3", ncqr1)
+	}
+
+	// Second SetFeatures: request 2 queues (NCQR=2, raw value in D11)
+	cmd2 := CapsuleCommand{
+		OpCode: adminSetFeatures,
+		CID:    601,
+		D10:    uint32(fidNumberOfQueues),
+		D11:    2 | (2 << 16), // NCQR=2, NSQR=2
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd2, capsuleCmdSize, nil)
+	resp = recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("second SetFeatures: 0x%04x", resp.Status)
+	}
+	// Granted 2, response is (2-1)=1 (0-based)
+	ncqr2 := resp.DW0 & 0xFFFF
+	if ncqr2 != 1 {
+		t.Fatalf("second NCQR = %d, want 1", ncqr2)
+	}
+
+	// GetFeatures should reflect last SetFeatures (grantedQueues=2)
+	cmd3 := CapsuleCommand{
+		OpCode: adminGetFeatures,
+		CID:    602,
+		D10:    uint32(fidNumberOfQueues),
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd3, capsuleCmdSize, nil)
+	resp = recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("GetFeatures: 0x%04x", resp.Status)
+	}
+	// grantedQueues=2, response is (2-1)=1 (0-based)
+	ncqr3 := resp.DW0 & 0xFFFF
+	if ncqr3 != 1 {
+		t.Fatalf("GetFeatures NCQR = %d, want 1 (reflecting second SetFeatures)", ncqr3)
+	}
+}
+
+// TestQA_SetFeatures_KATOOverwrite: second KATO SetFeatures overwrites first.
+func TestQA_SetFeatures_KATOOverwrite(t *testing.T) {
+	nqn := "nqn.test:qa-kato-overwrite"
+	client, r, w, _, _ := setupAdminSession(t, nqn)
+	defer client.Close()
+
+	// Set KATO to 5000ms
+	cmd1 := CapsuleCommand{
+		OpCode: adminSetFeatures,
+		CID:    610,
+		D10:    uint32(fidKeepAliveTimer),
+		D11:    5000,
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd1, capsuleCmdSize, nil)
+	recvCapsuleResp(t, r)
+
+	// Overwrite KATO to 30000ms
+	cmd2 := CapsuleCommand{
+		OpCode: adminSetFeatures,
+		CID:    611,
+		D10:    uint32(fidKeepAliveTimer),
+		D11:    30000,
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd2, capsuleCmdSize, nil)
+	recvCapsuleResp(t, r)
+
+	// GetFeatures should return 30000
+	cmd3 := CapsuleCommand{
+		OpCode: adminGetFeatures,
+		CID:    612,
+		D10:    uint32(fidKeepAliveTimer),
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd3, capsuleCmdSize, nil)
+	resp := recvCapsuleResp(t, r)
+	if resp.DW0 != 30000 {
+		t.Fatalf("KATO = %d, want 30000", resp.DW0)
+	}
+}
+
+// ============================================================
+// QA-22: CNTLID allocation monotonic
+// ============================================================
+
+func TestQA_CNTLID_MonotonicallyIncreasing(t *testing.T) {
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+
+	ids := make([]uint16, 100)
+	for i := 0; i < 100; i++ {
+		ids[i] = srv.allocCNTLID()
+	}
+
+	for i := 1; i < len(ids); i++ {
+		if ids[i] <= ids[i-1] {
+			t.Fatalf("CNTLID[%d]=%d <= CNTLID[%d]=%d (not monotonic)", i, ids[i], i-1, ids[i-1])
+		}
+	}
+}
+
+// ============================================================
+// QA-23: Connection drop mid-PDU
+// ============================================================
+
+func TestQA_Wire_ConnectionDropMidReceive(t *testing.T) {
+	pr, pw := io.Pipe()
+
+	r := NewReader(pr)
+
+	go func() {
+		// Write valid CommonHeader for capsule with 64-byte body
+		hdr := CommonHeader{
+			Type:         pduCapsuleCmd,
+			HeaderLength: commonHeaderSize + capsuleCmdSize,
+			DataOffset:   0,
+			DataLength:   uint32(commonHeaderSize + capsuleCmdSize),
+		}
+		buf := make([]byte, commonHeaderSize)
+		hdr.Marshal(buf)
+		pw.Write(buf)
+
+		// Write only 10 of 64 body bytes, then close
+		pw.Write(make([]byte, 10))
+		pw.Close()
+	}()
+
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatalf("Dequeue should succeed: %v", err)
+	}
+	if hdr.Type != pduCapsuleCmd {
+		t.Fatalf("wrong type: 0x%x", hdr.Type)
+	}
+
+	// Receive should fail — only 10 of 64 body bytes available
+	var capsule CapsuleCommand
+	err = r.Receive(&capsule)
+	if err == nil {
+		t.Fatal("expected error from Receive on truncated body")
+	}
+}
+
+func TestQA_Wire_ConnectionDropMidPayload(t *testing.T) {
+	pr, pw := io.Pipe()
+	r := NewReader(pr)
+
+	go func() {
+		hdr := CommonHeader{
+			Type:         pduCapsuleCmd,
+			HeaderLength: commonHeaderSize + capsuleCmdSize,
+			DataOffset:   commonHeaderSize + capsuleCmdSize,
+			DataLength:   uint32(commonHeaderSize+capsuleCmdSize) + 512,
+		}
+		buf := make([]byte, commonHeaderSize)
+		hdr.Marshal(buf)
+		pw.Write(buf)
+
+		// Full 64-byte capsule body
+		pw.Write(make([]byte, capsuleCmdSize))
+
+		// Only 100 of 512 payload bytes, then close
+		pw.Write(make([]byte, 100))
+		pw.Close()
+	}()
+
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatalf("Dequeue: %v", err)
+	}
+	_ = hdr
+
+	var capsule CapsuleCommand
+	if err := r.Receive(&capsule); err != nil {
+		t.Fatalf("Receive should succeed (header complete): %v", err)
+	}
+
+	if r.Length() != 512 {
+		t.Fatalf("Length = %d, want 512", r.Length())
+	}
+
+	// ReceiveData should fail — only 100 bytes available
+	payload := make([]byte, 512)
+	err = r.ReceiveData(payload)
+	if err == nil {
+		t.Fatal("expected error from ReceiveData on truncated payload")
+	}
+}
+
+// ============================================================
+// QA-24: WriteZeros with DEALLOC bit + errors
+// ============================================================
+
+func TestQA_IO_WriteZeros_DEALLOC_TrimError(t *testing.T) {
+	dev := newMockDevice(256, 512)
+	dev.trimErr = errors.New("trim failed: disk error")
+	client, r, w := setupQAIOQueue(t, "nqn.test:qa-dealloc-err", dev)
+	defer client.Close()
+
+	cmd := CapsuleCommand{
+		OpCode: ioWriteZeros,
+		CID:    700,
+		D10:    0,
+		D12:    0 | commandBitDeallocate, // 1 block + DEALLOC
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
+	resp := recvCapsuleResp(t, r)
+	if !StatusWord(resp.Status).IsError() {
+		t.Fatal("WriteZeros DEALLOC with trim error should fail")
+	}
+}
+
+func TestQA_IO_WriteZeros_NoDEALLOC_WriteError(t *testing.T) {
+	dev := newMockDevice(256, 512)
+	dev.writeErr = errors.New("Write failed: disk full")
+	client, r, w := setupQAIOQueue(t, "nqn.test:qa-wz-noalloc-err", dev)
+	defer client.Close()
+
+	cmd := CapsuleCommand{
+		OpCode: ioWriteZeros,
+		CID:    701,
+		D10:    0,
+		D12:    0, // 1 block, no DEALLOC
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
+	resp := recvCapsuleResp(t, r)
+	if !StatusWord(resp.Status).IsError() {
+		t.Fatal("WriteZeros without DEALLOC with write error should fail")
+	}
+	// Heuristic: "Write" in error → MediaWriteFault
+	if StatusWord(resp.Status) != StatusMediaWriteFault {
+		t.Fatalf("got 0x%04x, want MediaWriteFault", resp.Status)
+	}
+}
+
+// ============================================================
+// QA-25: PropertyGet with 8-byte size (CAP is 64-bit)
+// ============================================================
+
+func TestQA_PropertyGet_CAP_8Byte(t *testing.T) {
+	nqn := "nqn.test:qa-propget8"
+	client, r, w, _, _ := setupAdminSession(t, nqn)
+	defer client.Close()
+
+	cmd := makePropertyGetCapsule(propCAP, true)
+	cmd.CID = 800
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
+
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("PropertyGet CAP 8byte: 0x%04x", resp.Status)
+	}
+
+	val := uint64(resp.DW0) | (uint64(resp.DW1) << 32)
+	if val&0xFFFF != 63 {
+		t.Fatalf("CAP MQES = %d, want 63", val&0xFFFF)
+	}
+	if val&(1<<16) == 0 {
+		t.Fatal("CAP CQR bit not set")
+	}
+}
+
+func TestQA_PropertyGet_CC_4Byte(t *testing.T) {
+	nqn := "nqn.test:qa-propget4"
+	client, r, w, _, _ := setupAdminSession(t, nqn)
+	defer client.Close()
+
+	cmd := makePropertyGetCapsule(propCC, false)
+	cmd.CID = 801
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
+
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("PropertyGet CC 4byte: 0x%04x", resp.Status)
+	}
+	if resp.DW0 != 0 {
+		t.Fatalf("CC = 0x%08x, want 0", resp.DW0)
+	}
+}
+
+// ============================================================
+// QA-26: QueueSize 0-based conversion
+// ============================================================
+
+func TestQA_Connect_QueueSizeConversion(t *testing.T) {
+	nqn := "nqn.test:qa-qsize"
+	dev := newMockDevice(256, 512)
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	recvICResp(t, r)
+
+	// Connect with SQSIZE=7 (0-based → queueSize=8)
+	cmd := CapsuleCommand{
+		OpCode: adminFabric,
+		FCType: fcConnect,
+		CID:    0,
+		D10:    0, // QID=0
+		D11:    7, // SQSIZE=7 (0-based)
+		D12:    0,
+	}
+	cd := ConnectData{CNTLID: 0xFFFF, SubNQN: nqn, HostNQN: "host"}
+	payload := make([]byte, connectDataSize)
+	cd.Marshal(payload)
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, payload)
+
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("connect: 0x%04x", resp.Status)
+	}
+
+	// Verify SQHD wraps at queueSize=8: send 9 commands
+	for i := uint16(0); i < 9; i++ {
+		kaCmd := CapsuleCommand{OpCode: adminKeepAlive, CID: 900 + i}
+		w.SendWithData(pduCapsuleCmd, 0, &kaCmd, capsuleCmdSize, nil)
+		resp = recvCapsuleResp(t, r)
+	}
+	// After Connect (SQHD=1) + 9 KeepAlives, SQHD = (1+9) % 8 = 2
+	if resp.SQHD != 2 {
+		t.Fatalf("SQHD after 9 commands (qsize=8) = %d, want 2", resp.SQHD)
+	}
+}
+
+// ============================================================
+// QA-27: Non-ANAProvider device (IsHealthy fallback)
+// ============================================================
+
+type nonANADevice struct {
+	healthy bool
+}
+
+func (d *nonANADevice) ReadAt(lba uint64, length uint32) ([]byte, error) {
+	return make([]byte, length), nil
+}
+func (d *nonANADevice) WriteAt(lba uint64, data []byte) error { return nil }
+func (d *nonANADevice) Trim(lba uint64, length uint32) error  { return nil }
+func (d *nonANADevice) SyncCache() error                      { return nil }
+func (d *nonANADevice) BlockSize() uint32                     { return 512 }
+func (d *nonANADevice) VolumeSize() uint64                    { return 128 * 1024 }
+func (d *nonANADevice) IsHealthy() bool                       { return d.healthy }
+
+func TestQA_ANA_NonANAProvider_Healthy(t *testing.T) {
+	dev := &nonANADevice{healthy: true}
+	nqn := "nqn.test:qa-nonana-h"
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	srv.AddVolume(nqn, dev, [16]byte{0x60, 1, 2, 3})
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	recvICResp(t, r)
+
+	// Write should succeed — IsHealthy() returns true
+	writeCmd := CapsuleCommand{OpCode: ioWrite, CID: 1000, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &writeCmd, capsuleCmdSize, make([]byte, 512))
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("write on healthy non-ANA device: 0x%04x", resp.Status)
+	}
+}
+
+func TestQA_ANA_NonANAProvider_Unhealthy(t *testing.T) {
+	dev := &nonANADevice{healthy: false}
+	nqn := "nqn.test:qa-nonana-u"
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	srv.AddVolume(nqn, dev, [16]byte{0x60, 1, 2, 3})
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	recvICResp(t, r)
+
+	// Write rejected — IsHealthy() returns false
+	writeCmd := CapsuleCommand{OpCode: ioWrite, CID: 1001, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &writeCmd, capsuleCmdSize, make([]byte, 512))
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status) != StatusNSNotReady {
+		t.Fatalf("write on unhealthy non-ANA: got 0x%04x, want NSNotReady", resp.Status)
+	}
+
+	// Read still works (not gated by isWriteAllowed)
+	readCmd := CapsuleCommand{OpCode: ioRead, CID: 1002, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &readCmd, capsuleCmdSize, nil)
+
+	for {
+		hdr, err := r.Dequeue()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if hdr.Type == pduCapsuleResp {
+			var rr CapsuleResponse
+			r.Receive(&rr)
+			if StatusWord(rr.Status).IsError() {
+				t.Fatalf("read on unhealthy non-ANA should succeed: 0x%04x", rr.Status)
+			}
+			break
+		}
+		if hdr.Type == pduC2HData {
+			var c2h C2HDataHeader
+			r.Receive(&c2h)
+			chunk := make([]byte, r.Length())
+			r.ReceiveData(chunk)
+		}
+	}
+}
+
+// ============================================================
+// QA-28: Lba() and LbaLength() edge cases
+// ============================================================
+
+func TestQA_Capsule_Lba64Bit(t *testing.T) {
+	c := CapsuleCommand{D10: 0xDEADBEEF, D11: 0x00000001}
+	lba := c.Lba()
+	want := uint64(0x00000001DEADBEEF)
+	if lba != want {
+		t.Fatalf("Lba() = 0x%016x, want 0x%016x", lba, want)
+	}
+}
+
+func TestQA_Capsule_LbaLengthZeroBased(t *testing.T) {
+	c := CapsuleCommand{D12: 0}
+	if c.LbaLength() != 1 {
+		t.Fatalf("LbaLength(D12=0) = %d, want 1", c.LbaLength())
+	}
+
+	c.D12 = 0xFFFF
+	if c.LbaLength() != 0x10000 {
+		t.Fatalf("LbaLength(D12=0xFFFF) = %d, want 65536", c.LbaLength())
+	}
+
+	c.D12 = 99
+	if c.LbaLength() != 100 {
+		t.Fatalf("LbaLength(D12=99) = %d, want 100", c.LbaLength())
+	}
+}
+
+// ============================================================
+// QA-29: Admin AsyncEvent stub
+// ============================================================
+
+func TestQA_Admin_AsyncEvent_Stub(t *testing.T) {
+	nqn := "nqn.test:qa-async"
+	client, r, w, _, _ := setupAdminSession(t, nqn)
+	defer client.Close()
+
+	cmd := CapsuleCommand{OpCode: adminAsyncEvent, CID: 1100}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
+
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("AsyncEvent stub should succeed: 0x%04x", resp.Status)
+	}
+}
+
+// ============================================================
+// QA-30: H2CTermReq from host closes session
+// ============================================================
+
+func TestQA_H2CTermReq_ClosesSession(t *testing.T) {
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	ctrl := newController(serverConn, srv)
+
+	done := make(chan error, 1)
+	go func() { done <- ctrl.Serve() }()
+
+	w := NewWriter(clientConn)
+	r := NewReader(clientConn)
+
+	sendICReq(w)
+	recvICResp(t, r)
+
+	// H2CTermReq — controller should exit cleanly
+	termReq := ICRequest{}
+	w.SendHeaderOnly(pduH2CTermReq, &termReq, icBodySize)
+
+	select {
+	case err := <-done:
+		if err != nil {
+			t.Fatalf("Serve should return nil on H2CTermReq: %v", err)
+		}
+	case <-time.After(2 * time.Second):
+		t.Fatal("timeout waiting for Serve to exit after H2CTermReq")
+	}
+}
+
+// ============================================================
+// QA-31: CP10-3 Tier 1 — Padding, Buffer Pool, Batching, Config
+// ============================================================
+
+// --- 31a: Padding skip adversarial (Finding 1 fix) ---
+
+// TestQA_Padding_MaxDataOffset255 crafts DataOffset=255 (uint8 max) with
+// HeaderLength=8, yielding 247 bytes of padding — the worst case.
+func TestQA_Padding_MaxDataOffset255(t *testing.T) {
+	dataOffset := uint8(255)
+	payload := []byte{0xCA, 0xFE}
+	dataLength := uint32(dataOffset) + uint32(len(payload))
+
+	var hdr [commonHeaderSize]byte
+	ch := CommonHeader{
+		Type:         pduCapsuleCmd,
+		HeaderLength: commonHeaderSize,
+		DataOffset:   dataOffset,
+		DataLength:   dataLength,
+	}
+	ch.Marshal(hdr[:])
+
+	var buf bytes.Buffer
+	buf.Write(hdr[:])
+	buf.Write(make([]byte, int(dataOffset)-commonHeaderSize)) // 247 bytes padding
+	buf.Write(payload)
+
+	r := NewReader(&buf)
+	if _, err := r.Dequeue(); err != nil {
+		t.Fatalf("Dequeue: %v", err)
+	}
+	var capsule CapsuleCommand
+	if err := r.Receive(&capsule); err != nil {
+		t.Fatalf("Receive: %v", err)
+	}
+	if r.Length() != uint32(len(payload)) {
+		t.Fatalf("Length = %d, want %d", r.Length(), len(payload))
+	}
+	data := make([]byte, r.Length())
+	if err := r.ReceiveData(data); err != nil {
+		t.Fatalf("ReceiveData: %v", err)
+	}
+	if data[0] != 0xCA || data[1] != 0xFE {
+		t.Fatalf("payload = %x, want CAFE", data)
+	}
+}
+
+// TestQA_Padding_ExactlyPadBufBoundary tests DataOffset that creates padding
+// of exactly maxHeaderSize (128) bytes — the boundary where chunked loop
+// does exactly one iteration.
+func TestQA_Padding_ExactlyPadBufBoundary(t *testing.T) {
+	// HeaderLength=8, DataOffset=136 → pad=128 = exactly len(padBuf)
+	dataOffset := uint8(commonHeaderSize + maxHeaderSize) // 136
+	payload := []byte{0x42}
+	dataLength := uint32(dataOffset) + uint32(len(payload))
+
+	var hdr [commonHeaderSize]byte
+	ch := CommonHeader{
+		Type:         pduCapsuleCmd,
+		HeaderLength: commonHeaderSize,
+		DataOffset:   dataOffset,
+		DataLength:   dataLength,
+	}
+	ch.Marshal(hdr[:])
+
+	var buf bytes.Buffer
+	buf.Write(hdr[:])
+	buf.Write(make([]byte, maxHeaderSize)) // exactly 128 bytes padding
+	buf.Write(payload)
+
+	r := NewReader(&buf)
+	if _, err := r.Dequeue(); err != nil {
+		t.Fatalf("Dequeue: %v", err)
+	}
+	var capsule CapsuleCommand
+	if err := r.Receive(&capsule); err != nil {
+		t.Fatalf("Receive with boundary padding: %v", err)
+	}
+	data := make([]byte, r.Length())
+	if err := r.ReceiveData(data); err != nil {
+		t.Fatal(err)
+	}
+	if data[0] != 0x42 {
+		t.Fatalf("payload = 0x%x, want 0x42", data[0])
+	}
+}
+
+// TestQA_Padding_OneBeyondPadBuf tests padding of maxHeaderSize+1 (129) bytes
+// to confirm the chunked loop handles the two-iteration case.
+func TestQA_Padding_OneBeyondPadBuf(t *testing.T) {
+	padSize := maxHeaderSize + 1 // 129
+	dataOffset := uint8(commonHeaderSize + padSize) // 137
+	payload := []byte{0xBB}
+	dataLength := uint32(dataOffset) + uint32(len(payload))
+
+	var hdr [commonHeaderSize]byte
+	ch := CommonHeader{
+		Type:         pduCapsuleCmd,
+		HeaderLength: commonHeaderSize,
+		DataOffset:   dataOffset,
+		DataLength:   dataLength,
+	}
+	ch.Marshal(hdr[:])
+
+	var buf bytes.Buffer
+	buf.Write(hdr[:])
+	buf.Write(make([]byte, padSize))
+	buf.Write(payload)
+
+	r := NewReader(&buf)
+	if _, err := r.Dequeue(); err != nil {
+		t.Fatalf("Dequeue: %v", err)
+	}
+	var capsule CapsuleCommand
+	if err := r.Receive(&capsule); err != nil {
+		t.Fatalf("Receive: %v", err)
+	}
+	data := make([]byte, r.Length())
+	if err := r.ReceiveData(data); err != nil {
+		t.Fatal(err)
+	}
+	if data[0] != 0xBB {
+		t.Fatalf("payload = 0x%x, want 0xBB", data[0])
+	}
+}
+
+// TestQA_Padding_ZeroPad verifies DataOffset == HeaderLength (no padding).
+func TestQA_Padding_ZeroPad(t *testing.T) {
+	payload := []byte{0xAA, 0xBB, 0xCC, 0xDD}
+	headerLen := uint8(commonHeaderSize + capsuleCmdSize) // 72
+	dataLength := uint32(headerLen) + uint32(len(payload))
+
+	var hdr [commonHeaderSize]byte
+	ch := CommonHeader{
+		Type:         pduCapsuleCmd,
+		HeaderLength: headerLen,
+		DataOffset:   headerLen, // == HeaderLength, so pad=0
+		DataLength:   dataLength,
+	}
+	ch.Marshal(hdr[:])
+
+	var buf bytes.Buffer
+	buf.Write(hdr[:])
+	buf.Write(make([]byte, capsuleCmdSize)) // specific header
+	// no padding
+	buf.Write(payload)
+
+	r := NewReader(&buf)
+	if _, err := r.Dequeue(); err != nil {
+		t.Fatalf("Dequeue: %v", err)
+	}
+	var capsule CapsuleCommand
+	if err := r.Receive(&capsule); err != nil {
+		t.Fatalf("Receive: %v", err)
+	}
+	if r.Length() != uint32(len(payload)) {
+		t.Fatalf("Length = %d, want %d", r.Length(), len(payload))
+	}
+	data := make([]byte, r.Length())
+	if err := r.ReceiveData(data); err != nil {
+		t.Fatal(err)
+	}
+	if data[0] != 0xAA {
+		t.Fatalf("data[0] = 0x%x, want 0xAA", data[0])
+	}
+}
+
+// TestQA_Padding_StreamEOFMidPad verifies EOF during padding skip is returned,
+// not silently swallowed.
+func TestQA_Padding_StreamEOFMidPad(t *testing.T) {
+	dataOffset := uint8(200)
+	dataLength := uint32(dataOffset) + 4
+
+	var hdr [commonHeaderSize]byte
+	ch := CommonHeader{
+		Type:         pduCapsuleCmd,
+		HeaderLength: commonHeaderSize,
+		DataOffset:   dataOffset,
+		DataLength:   dataLength,
+	}
+	ch.Marshal(hdr[:])
+
+	// Only provide 50 bytes of padding instead of 192.
+	var buf bytes.Buffer
+	buf.Write(hdr[:])
+	buf.Write(make([]byte, 50)) // truncated
+
+	r := NewReader(&buf)
+	if _, err := r.Dequeue(); err != nil {
+		t.Fatal(err)
+	}
+	var capsule CapsuleCommand
+	err := r.Receive(&capsule)
+	if err == nil {
+		t.Fatal("expected error for truncated padding")
+	}
+}
+
+// TestQA_Padding_TwoConsecutivePDUs verifies padding skip doesn't consume
+// bytes from the next PDU.
+func TestQA_Padding_TwoConsecutivePDUs(t *testing.T) {
+	// PDU 1: large padding (200 bytes), 2-byte payload
+	do1 := uint8(200)
+	pay1 := []byte{0x11, 0x22}
+	dl1 := uint32(do1) + uint32(len(pay1))
+	var h1 [commonHeaderSize]byte
+	ch1 := CommonHeader{
+		Type: pduCapsuleCmd, HeaderLength: commonHeaderSize,
+		DataOffset: do1, DataLength: dl1,
+	}
+	ch1.Marshal(h1[:])
+
+	// PDU 2: no padding, 2-byte payload
+	hl2 := uint8(commonHeaderSize)
+	do2 := hl2
+	pay2 := []byte{0x33, 0x44}
+	dl2 := uint32(do2) + uint32(len(pay2))
+	var h2 [commonHeaderSize]byte
+	ch2 := CommonHeader{
+		Type: pduCapsuleCmd, HeaderLength: hl2,
+		DataOffset: do2, DataLength: dl2,
+	}
+	ch2.Marshal(h2[:])
+
+	var buf bytes.Buffer
+	// PDU 1
+	buf.Write(h1[:])
+	buf.Write(make([]byte, int(do1)-commonHeaderSize)) // 192 bytes padding
+	buf.Write(pay1)
+	// PDU 2
+	buf.Write(h2[:])
+	buf.Write(pay2)
+
+	r := NewReader(&buf)
+
+	// Read PDU 1
+	if _, err := r.Dequeue(); err != nil {
+		t.Fatalf("PDU1 Dequeue: %v", err)
+	}
+	var c1 CapsuleCommand
+	if err := r.Receive(&c1); err != nil {
+		t.Fatalf("PDU1 Receive: %v", err)
+	}
+	d1 := make([]byte, r.Length())
+	if err := r.ReceiveData(d1); err != nil {
+		t.Fatalf("PDU1 ReceiveData: %v", err)
+	}
+	if d1[0] != 0x11 || d1[1] != 0x22 {
+		t.Fatalf("PDU1 payload = %x, want 1122", d1)
+	}
+
+	// Read PDU 2 — must not be corrupted by PDU 1's padding
+	if _, err := r.Dequeue(); err != nil {
+		t.Fatalf("PDU2 Dequeue: %v", err)
+	}
+	var c2 CapsuleCommand
+	if err := r.Receive(&c2); err != nil {
+		t.Fatalf("PDU2 Receive: %v", err)
+	}
+	d2 := make([]byte, r.Length())
+	if err := r.ReceiveData(d2); err != nil {
+		t.Fatalf("PDU2 ReceiveData: %v", err)
+	}
+	if d2[0] != 0x33 || d2[1] != 0x44 {
+		t.Fatalf("PDU2 payload = %x, want 3344 (stream desync?)", d2)
+	}
+}
+
+// --- 31b: Buffer pool adversarial ---
+
+// TestQA_BufPool_StaleDataNotLeaked verifies that a buffer returned from
+// getBuffer after putBuffer doesn't leak data across requests.
+func TestQA_BufPool_StaleDataNotLeaked(t *testing.T) {
+	// Write secret pattern into a 4KB buffer, return it.
+	secret := getBuffer(4096)
+	for i := range secret {
+		secret[i] = 0xFF
+	}
+	putBuffer(secret)
+
+	// Get another 4KB buffer (likely the same one from pool).
+	// In real usage, the caller must fill/zero before use.
+	// This test verifies the pool doesn't memset — callers
+	// must be aware.
+	reused := getBuffer(4096)
+	defer putBuffer(reused)
+
+	// The buffer MAY contain stale 0xFF data — that's expected.
+	// What matters is that the pool mechanics work correctly:
+	// correct length, correct capacity, no panic.
+	if len(reused) != 4096 {
+		t.Fatalf("len = %d, want 4096", len(reused))
+	}
+	if cap(reused) != 4096 {
+		t.Fatalf("cap = %d, want 4096", cap(reused))
+	}
+}
+
+// TestQA_BufPool_ConcurrentGetPut hammers the pool from many goroutines
+// to verify no data races or panics.
+func TestQA_BufPool_ConcurrentGetPut(t *testing.T) {
+	var wg sync.WaitGroup
+	for i := 0; i < 32; i++ {
+		wg.Add(1)
+		go func(id int) {
+			defer wg.Done()
+			for j := 0; j < 100; j++ {
+				// Vary sizes across all tiers + oversized.
+				sizes := []int{512, 4096, 8192, 65536, 100000, 262144, 500000}
+				buf := getBuffer(sizes[j%len(sizes)])
+				// Write to detect races.
+				buf[0] = byte(id)
+				buf[len(buf)-1] = byte(j)
+				putBuffer(buf)
+			}
+		}(i)
+	}
+	wg.Wait()
+}
+
+// TestQA_BufPool_ZeroSize verifies getBuffer(0) doesn't panic.
+func TestQA_BufPool_ZeroSize(t *testing.T) {
+	buf := getBuffer(0)
+	if len(buf) != 0 {
+		t.Fatalf("len = %d, want 0", len(buf))
+	}
+	// cap should be 4096 (small pool bucket)
+	if cap(buf) != 4096 {
+		t.Fatalf("cap = %d, want 4096", cap(buf))
+	}
+	putBuffer(buf) // must not panic
+}
+
+// TestQA_BufPool_PutWrongCap verifies putBuffer with a non-tier-sized buffer
+// doesn't panic (just doesn't return to any pool).
+func TestQA_BufPool_PutWrongCap(t *testing.T) {
+	buf := make([]byte, 1000) // cap=1000, not a pool tier
+	putBuffer(buf)            // should be silently ignored, no panic
+}
+
+// TestQA_BufPool_WriteZerosPooled verifies WriteZeros handler
+// correctly zeros pooled buffers before writing.
+func TestQA_BufPool_WriteZerosPooled(t *testing.T) {
+	nqn := "nqn.test:qa-pool-wz"
+	dev := newMockDevice(64, 512)
+
+	// Pre-fill device with non-zero data.
+	for i := range dev.data {
+		dev.data[i] = 0xAB
+	}
+
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	recvICResp(t, r)
+
+	// Poison the pool: get a 4KB buffer, fill with 0xFF, return it.
+	poison := getBuffer(4096)
+	for i := range poison {
+		poison[i] = 0xFF
+	}
+	putBuffer(poison)
+
+	// WriteZeros on 8 blocks (4KB) — must zero the buffer despite pool reuse.
+	wzCmd := CapsuleCommand{
+		OpCode: ioWriteZeros,
+		CID:    1,
+		D10:    0, // LBA 0
+		D12:    7, // 8 blocks (0-based)
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &wzCmd, capsuleCmdSize, nil)
+
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("WriteZeros failed: 0x%04x", resp.Status)
+	}
+
+	// Verify device data is actually zero, not stale 0xFF from pool.
+	for i := 0; i < 4096; i++ {
+		if dev.data[i] != 0 {
+			t.Fatalf("dev.data[%d] = 0x%x, want 0 (stale pool data leaked)", i, dev.data[i])
+		}
+	}
+}
+
+// --- 31c: Response batching adversarial ---
+
+// TestQA_Batch_MultiChunkC2H_InterleavedVerify verifies C2H batched
+// response has correct DATAO offsets and LAST flag only on final chunk.
+func TestQA_Batch_MultiChunkC2H_InterleavedVerify(t *testing.T) {
+	nqn := "nqn.test:qa-batch-c2h"
+	dev := newMockDevice(512, 512) // 256KB
+
+	// Write a known pattern: LBA i → byte i.
+	for i := 0; i < len(dev.data); i++ {
+		dev.data[i] = byte(i / 512)
+	}
+
+	srv := NewServer(Config{
+		Enabled:          true,
+		ListenAddr:       "127.0.0.1:0",
+		MaxIOQueues:      4,
+		MaxH2CDataLength: 8192, // 8KB chunks → 4 chunks for 32KB read
+	})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+
+	// Custom IC receive (non-default MaxH2CDataLength).
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if hdr.Type != pduICResp {
+		t.Fatalf("expected ICResp")
+	}
+	var ic ICResponse
+	r.Receive(&ic)
+
+	// Read 32KB = 64 blocks
+	readCmd := CapsuleCommand{
+		OpCode: ioRead,
+		CID:    1,
+		D10:    0,
+		D12:    63, // 64 blocks
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &readCmd, capsuleCmdSize, nil)
+
+	// Expect 4 C2HData (32KB / 8KB) + 1 CapsuleResp
+	var allData []byte
+	chunkCount := 0
+	lastFlagCount := 0
+	prevOffset := uint32(0)
+
+	for {
+		hdr, err := r.Dequeue()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if hdr.Type == pduCapsuleResp {
+			var capsResp CapsuleResponse
+			r.Receive(&capsResp)
+			if StatusWord(capsResp.Status).IsError() {
+				t.Fatalf("read error: 0x%04x", capsResp.Status)
+			}
+			break
+		}
+		if hdr.Type != pduC2HData {
+			t.Fatalf("unexpected PDU 0x%x", hdr.Type)
+		}
+
+		chunkCount++
+		var c2h C2HDataHeader
+		r.Receive(&c2h)
+		dataBuf := make([]byte, r.Length())
+		r.ReceiveData(dataBuf)
+		allData = append(allData, dataBuf...)
+
+		// Verify DATAO is monotonically increasing.
+		if chunkCount > 1 && c2h.DATAO <= prevOffset {
+			t.Fatalf("chunk %d: DATAO=%d <= prev=%d", chunkCount, c2h.DATAO, prevOffset)
+		}
+		if chunkCount > 1 {
+			prevOffset = c2h.DATAO
+		}
+
+		// LAST flag only on final chunk.
+		if hdr.Flags&c2hFlagLast != 0 {
+			lastFlagCount++
+		}
+	}
+
+	if chunkCount != 4 {
+		t.Fatalf("expected 4 chunks, got %d", chunkCount)
+	}
+	if lastFlagCount != 1 {
+		t.Fatalf("expected LAST flag on exactly 1 chunk, got %d", lastFlagCount)
+	}
+	if len(allData) != 32768 {
+		t.Fatalf("total data = %d, want 32768", len(allData))
+	}
+	// Verify data content.
+	for i := 0; i < 64; i++ {
+		if allData[i*512] != byte(i) {
+			t.Fatalf("block %d: first byte = 0x%x, want 0x%x", i, allData[i*512], byte(i))
+		}
+	}
+}
+
+// TestQA_Batch_SingleBlockNoChunking verifies a 1-block read (512B)
+// with default 32KB maxDataLen produces exactly 1 C2H chunk + 1 response.
+func TestQA_Batch_SingleBlockNoChunking(t *testing.T) {
+	nqn := "nqn.test:qa-batch-1blk"
+	dev := newMockDevice(64, 512)
+	dev.data[0] = 0xEE
+
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	recvICResp(t, r)
+
+	readCmd := CapsuleCommand{OpCode: ioRead, CID: 1, D10: 0, D12: 0} // 1 block
+	w.SendWithData(pduCapsuleCmd, 0, &readCmd, capsuleCmdSize, nil)
+
+	// Expect exactly 1 C2HData with LAST flag + 1 CapsuleResp
+	hdr, _ := r.Dequeue()
+	if hdr.Type != pduC2HData {
+		t.Fatalf("expected C2HData, got 0x%x", hdr.Type)
+	}
+	if hdr.Flags&c2hFlagLast == 0 {
+		t.Fatal("expected LAST flag on single-chunk read")
+	}
+	var c2h C2HDataHeader
+	r.Receive(&c2h)
+	dataBuf := make([]byte, r.Length())
+	r.ReceiveData(dataBuf)
+
+	if len(dataBuf) != 512 {
+		t.Fatalf("data = %d bytes, want 512", len(dataBuf))
+	}
+	if dataBuf[0] != 0xEE {
+		t.Fatalf("data[0] = 0x%x, want 0xEE", dataBuf[0])
+	}
+
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("status error: 0x%04x", resp.Status)
+	}
+}
+
+// TestQA_Batch_WriteReadCycle_PooledBuffers exercises write+read in a tight
+// loop to verify pooled buffer lifecycle doesn't corrupt data across
+// request boundaries.
+func TestQA_Batch_WriteReadCycle_PooledBuffers(t *testing.T) {
+	nqn := "nqn.test:qa-batch-cycle"
+	dev := newMockDevice(256, 512)
+
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	recvICResp(t, r)
+
+	// 20 write+read cycles with different patterns.
+	for i := 0; i < 20; i++ {
+		pattern := byte(i + 1)
+		writeData := make([]byte, 4096) // 8 blocks
+		for j := range writeData {
+			writeData[j] = pattern
+		}
+
+		writeCmd := CapsuleCommand{
+			OpCode: ioWrite, CID: uint16(i * 2), D10: 0, D12: 7,
+		}
+		w.SendWithData(pduCapsuleCmd, 0, &writeCmd, capsuleCmdSize, writeData)
+		resp := recvCapsuleResp(t, r)
+		if StatusWord(resp.Status).IsError() {
+			t.Fatalf("cycle %d write: 0x%04x", i, resp.Status)
+		}
+
+		readCmd := CapsuleCommand{
+			OpCode: ioRead, CID: uint16(i*2 + 1), D10: 0, D12: 7,
+		}
+		w.SendWithData(pduCapsuleCmd, 0, &readCmd, capsuleCmdSize, nil)
+
+		// Consume C2H data chunks.
+		var readBuf []byte
+		for {
+			hdr, err := r.Dequeue()
+			if err != nil {
+				t.Fatalf("cycle %d read dequeue: %v", i, err)
+			}
+			if hdr.Type == pduCapsuleResp {
+				var rsp CapsuleResponse
+				r.Receive(&rsp)
+				if StatusWord(rsp.Status).IsError() {
+					t.Fatalf("cycle %d read: 0x%04x", i, rsp.Status)
+				}
+				break
+			}
+			var c2h C2HDataHeader
+			r.Receive(&c2h)
+			d := make([]byte, r.Length())
+			r.ReceiveData(d)
+			readBuf = append(readBuf, d...)
+		}
+
+		if len(readBuf) != 4096 {
+			t.Fatalf("cycle %d: read %d bytes, want 4096", i, len(readBuf))
+		}
+		for j, b := range readBuf {
+			if b != pattern {
+				t.Fatalf("cycle %d: byte[%d] = 0x%x, want 0x%x", i, j, b, pattern)
+			}
+		}
+	}
+}
+
+// --- 31d: MaxH2CDataLength adversarial ---
+
+// TestQA_MaxDataLen_VerySmallChunk verifies chunking with maxDataLen
+// smaller than one block (512B > 256B chunk → 2 chunks per block).
+func TestQA_MaxDataLen_VerySmallChunk(t *testing.T) {
+	nqn := "nqn.test:qa-tiny-chunk"
+	dev := newMockDevice(64, 512)
+	for i := range dev.data {
+		dev.data[i] = 0x77
+	}
+
+	srv := NewServer(Config{
+		Enabled:          true,
+		ListenAddr:       "127.0.0.1:0",
+		MaxIOQueues:      4,
+		MaxH2CDataLength: 256, // very small: 2 chunks per 512B block
+	})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	// Custom IC recv
+	hdr, _ := r.Dequeue()
+	if hdr.Type != pduICResp {
+		t.Fatal("expected ICResp")
+	}
+	var ic ICResponse
+	r.Receive(&ic)
+	if ic.MaxH2CDataLength != 256 {
+		t.Fatalf("MaxH2CDataLength = %d, want 256", ic.MaxH2CDataLength)
+	}
+
+	// Read 1 block = 512B → expect 2 C2H chunks (256B each)
+	readCmd := CapsuleCommand{OpCode: ioRead, CID: 1, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &readCmd, capsuleCmdSize, nil)
+
+	chunkCount := 0
+	totalData := 0
+	for {
+		hdr, _ := r.Dequeue()
+		if hdr.Type == pduCapsuleResp {
+			var resp CapsuleResponse
+			r.Receive(&resp)
+			if StatusWord(resp.Status).IsError() {
+				t.Fatalf("read error: 0x%04x", resp.Status)
+			}
+			break
+		}
+		chunkCount++
+		var c2h C2HDataHeader
+		r.Receive(&c2h)
+		d := make([]byte, r.Length())
+		r.ReceiveData(d)
+		totalData += len(d)
+	}
+
+	if chunkCount != 2 {
+		t.Fatalf("expected 2 chunks (512B / 256B), got %d", chunkCount)
+	}
+	if totalData != 512 {
+		t.Fatalf("total = %d, want 512", totalData)
+	}
+}
+
+// TestQA_MaxDataLen_ExactMultiple verifies chunking when read size
+// is an exact multiple of maxDataLen (no remainder chunk).
+func TestQA_MaxDataLen_ExactMultiple(t *testing.T) {
+	nqn := "nqn.test:qa-exact-mul"
+	dev := newMockDevice(128, 512) // 64KB
+
+	srv := NewServer(Config{
+		Enabled:          true,
+		ListenAddr:       "127.0.0.1:0",
+		MaxIOQueues:      4,
+		MaxH2CDataLength: 4096, // 4KB
+	})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	hdr, _ := r.Dequeue()
+	if hdr.Type != pduICResp {
+		t.Fatal("expected ICResp")
+	}
+	r.Receive(&ICResponse{})
+
+	// Read 16KB (32 blocks) / 4KB chunks = exactly 4 chunks
+	readCmd := CapsuleCommand{OpCode: ioRead, CID: 1, D10: 0, D12: 31}
+	w.SendWithData(pduCapsuleCmd, 0, &readCmd, capsuleCmdSize, nil)
+
+	chunkCount := 0
+	for {
+		hdr, _ := r.Dequeue()
+		if hdr.Type == pduCapsuleResp {
+			r.Receive(&CapsuleResponse{})
+			break
+		}
+		chunkCount++
+		var c2h C2HDataHeader
+		r.Receive(&c2h)
+		d := make([]byte, r.Length())
+		r.ReceiveData(d)
+		if len(d) != 4096 {
+			t.Fatalf("chunk %d: len=%d, want 4096", chunkCount, len(d))
+		}
+	}
+
+	if chunkCount != 4 {
+		t.Fatalf("expected 4 chunks (16KB / 4KB), got %d", chunkCount)
+	}
+}
+
+// TestQA_MaxDataLen_NonMultiple verifies chunking when read size
+// is NOT an exact multiple (last chunk is smaller).
+func TestQA_MaxDataLen_NonMultiple(t *testing.T) {
+	nqn := "nqn.test:qa-nonmul"
+	dev := newMockDevice(128, 512)
+
+	srv := NewServer(Config{
+		Enabled:          true,
+		ListenAddr:       "127.0.0.1:0",
+		MaxIOQueues:      4,
+		MaxH2CDataLength: 3072, // 3KB — doesn't divide 512 evenly into chunks
+	})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+	sendICReq(w)
+	hdr, _ := r.Dequeue()
+	if hdr.Type != pduICResp {
+		t.Fatal("expected ICResp")
+	}
+	r.Receive(&ICResponse{})
+
+	// Read 10KB (20 blocks) / 3KB chunks → 4 chunks (3+3+3+1 KB)
+	readCmd := CapsuleCommand{OpCode: ioRead, CID: 1, D10: 0, D12: 19}
+	w.SendWithData(pduCapsuleCmd, 0, &readCmd, capsuleCmdSize, nil)
+
+	var chunkSizes []int
+	for {
+		hdr, _ := r.Dequeue()
+		if hdr.Type == pduCapsuleResp {
+			r.Receive(&CapsuleResponse{})
+			break
+		}
+		var c2h C2HDataHeader
+		r.Receive(&c2h)
+		d := make([]byte, r.Length())
+		r.ReceiveData(d)
+		chunkSizes = append(chunkSizes, len(d))
+	}
+
+	if len(chunkSizes) != 4 {
+		t.Fatalf("expected 4 chunks, got %d: %v", len(chunkSizes), chunkSizes)
+	}
+	// First 3 chunks should be 3072, last should be 1024 (10240 - 3*3072)
+	for i := 0; i < 3; i++ {
+		if chunkSizes[i] != 3072 {
+			t.Fatalf("chunk[%d] = %d, want 3072", i, chunkSizes[i])
+		}
+	}
+	if chunkSizes[3] != 1024 {
+		t.Fatalf("chunk[3] = %d, want 1024", chunkSizes[3])
+	}
+}
+
+// --- 31e: NQN sanitization adversarial ---
+
+// TestQA_NQN_SpecialChars verifies NQN construction sanitizes
+// characters that are invalid in NVMe NQN format.
+func TestQA_NQN_SpecialChars(t *testing.T) {
+	srv := NewServer(Config{NQNPrefix: "nqn.2024-01.com.seaweedfs:vol."})
+	tests := []struct {
+		input string
+		want  string
+	}{
+		{"simple-vol", "nqn.2024-01.com.seaweedfs:vol.simple-vol"},
+		{"UPPER", "nqn.2024-01.com.seaweedfs:vol.upper"},
+		{"has_underscore", "nqn.2024-01.com.seaweedfs:vol.has-underscore"},
+		{"has spaces", "nqn.2024-01.com.seaweedfs:vol.has-spaces"},
+		{"pvc-abc123", "nqn.2024-01.com.seaweedfs:vol.pvc-abc123"},
+		{"a/b\\c:d", "nqn.2024-01.com.seaweedfs:vol.a-b-c-d"},
+	}
+	for _, tt := range tests {
+		got := srv.NQN(tt.input)
+		if got != tt.want {
+			t.Errorf("NQN(%q) = %q, want %q", tt.input, got, tt.want)
+		}
+	}
+}
+
+// TestQA_NQN_LongName verifies NQN truncation with hash suffix
+// for names exceeding 64 characters.
+func TestQA_NQN_LongName(t *testing.T) {
+	srv := NewServer(Config{NQNPrefix: "nqn.2024-01.com.seaweedfs:vol."})
+	longName := "pvc-" + string(make([]byte, 80)) // 84 chars, way over 64
+	// Replace zero bytes with 'a' for valid input.
+	input := "pvc-"
+	for i := 0; i < 80; i++ {
+		input += "a"
+	}
+
+	nqn := srv.NQN(input)
+	prefix := "nqn.2024-01.com.seaweedfs:vol."
+	suffix := nqn[len(prefix):]
+
+	// Suffix should be at most 64 chars (SanitizeIQN contract).
+	if len(suffix) > 64 {
+		t.Fatalf("suffix len = %d, want <= 64: %s", len(suffix), suffix)
+	}
+
+	// Two different long names should produce different NQNs.
+	input2 := "pvc-"
+	for i := 0; i < 80; i++ {
+		input2 += "b"
+	}
+	nqn2 := srv.NQN(input2)
+	if nqn == nqn2 {
+		t.Fatal("two different long names produced same NQN")
+	}
+	_ = longName
+}
+
+// --- 31f: TCP tuning adversarial ---
+
+// TestQA_TuneConn_RapidAcceptClose verifies tuneConn doesn't panic
+// when the connection is closed immediately after accept.
+func TestQA_TuneConn_RapidAcceptClose(t *testing.T) {
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ln.Close()
+
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		for i := 0; i < 10; i++ {
+			conn, err := ln.Accept()
+			if err != nil {
+				return
+			}
+			tuneConn(conn)
+			conn.Close() // close immediately after tuning
+		}
+	}()
+
+	for i := 0; i < 10; i++ {
+		conn, err := net.Dial("tcp", ln.Addr().String())
+		if err != nil {
+			break
+		}
+		conn.Close()
+	}
+	ln.Close()
+	<-done
+}
+
+// --- 31g: Writer batching edge cases ---
+
+// TestQA_Batch_FlushBufWithoutWrite verifies FlushBuf on an empty
+// buffer doesn't error (no-op flush).
+func TestQA_Batch_FlushBufWithoutWrite(t *testing.T) {
+	var buf bytes.Buffer
+	w := NewWriter(&buf)
+	if err := w.FlushBuf(); err != nil {
+		t.Fatalf("FlushBuf on empty: %v", err)
+	}
+	if buf.Len() != 0 {
+		t.Fatalf("expected empty buffer, got %d bytes", buf.Len())
+	}
+}
+
+// TestQA_Batch_MultipleFlushBuf verifies calling FlushBuf multiple times
+// after writeHeaderAndData is idempotent.
+func TestQA_Batch_MultipleFlushBuf(t *testing.T) {
+	var buf bytes.Buffer
+	w := NewWriter(&buf)
+
+	resp := CapsuleResponse{CID: 1, Status: uint16(StatusSuccess)}
+	if err := w.writeHeaderAndData(pduCapsuleResp, 0, &resp, capsuleRespSize, nil); err != nil {
+		t.Fatal(err)
+	}
+	if err := w.FlushBuf(); err != nil {
+		t.Fatal(err)
+	}
+	first := buf.Len()
+
+	// Second FlushBuf should be no-op.
+	if err := w.FlushBuf(); err != nil {
+		t.Fatal(err)
+	}
+	if buf.Len() != first {
+		t.Fatalf("second FlushBuf changed buffer: %d → %d", first, buf.Len())
+	}
+}
+
+// ============================================================
+// QA-WAL: BUG-CP103-1 Adversarial WAL Pressure Tests
+// ============================================================
+// These tests exercise the full NVMe/TCP protocol path under WAL pressure
+// through the server, verifying that write backpressure never produces
+// permanent error status codes and that reads remain unaffected.
+
+// TestQA_WAL_ConcurrentWritesUnderPressure sends multiple writes through the
+// NVMe/TCP protocol stack under persistent WAL pressure. Verifies every
+// response is StatusNSNotReady with DNR=0 (retryable), never a permanent
+// error like MediaWriteFault or InternalErrorDNR.
+func TestQA_WAL_ConcurrentWritesUnderPressure(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	nqn := "nqn.test:qa-wal-concurrent"
+	dev := newMockDevice(256, 512)
+	dev.writeErr = blockvol.ErrWALFull
+	dev.walPressure = 1.0
+
+	const numWrites = 8
+	client, r, w := setupQAIOQueue(t, nqn, dev)
+	defer client.Close()
+
+	// Send and receive sequentially (net.Pipe is synchronous).
+	for i := 0; i < numWrites; i++ {
+		cmd := CapsuleCommand{
+			OpCode: ioWrite,
+			CID:    uint16(500 + i),
+			D10:    uint32(i), // LBA
+			D12:    0,         // NLB 0 = 1 block
+		}
+		w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, make([]byte, 512))
+		resp := recvCapsuleResp(t, r)
+		status := StatusWord(resp.Status)
+
+		if status.DNR() {
+			t.Fatalf("write CID=%d: got DNR=1 (permanent failure) under WAL pressure — must be retryable", resp.CID)
+		}
+		if status == StatusMediaWriteFault {
+			t.Fatalf("write CID=%d: WAL pressure must not map to MediaWriteFault", resp.CID)
+		}
+		if status != StatusNSNotReady {
+			t.Fatalf("write CID=%d: expected StatusNSNotReady (0x%04x), got 0x%04x", resp.CID, StatusNSNotReady, status)
+		}
+	}
+}
+
+// TestQA_WAL_ReadsDuringWritePressure verifies that read commands succeed
+// normally while the write path is under WAL pressure. WAL pressure must
+// not affect the read path.
+func TestQA_WAL_ReadsDuringWritePressure(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	nqn := "nqn.test:qa-wal-read-ok"
+	dev := newMockDevice(256, 512)
+	dev.writeErr = blockvol.ErrWALFull
+	dev.walPressure = 1.0
+
+	// Pre-fill LBA 0 with known data.
+	pattern := make([]byte, 512)
+	for i := range pattern {
+		pattern[i] = 0xAB
+	}
+	dev.writeErr = nil
+	dev.WriteAt(0, pattern)
+	dev.writeErr = blockvol.ErrWALFull
+
+	client, r, w := setupQAIOQueue(t, nqn, dev)
+	defer client.Close()
+
+	// Write should fail with retryable status.
+	wCmd := CapsuleCommand{OpCode: ioWrite, CID: 600, D10: 1, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &wCmd, capsuleCmdSize, make([]byte, 512))
+	wResp := recvCapsuleResp(t, r)
+	if StatusWord(wResp.Status) != StatusNSNotReady {
+		t.Fatalf("write should fail with NSNotReady, got 0x%04x", wResp.Status)
+	}
+
+	// Read should succeed despite write pressure.
+	rCmd := CapsuleCommand{OpCode: ioRead, CID: 601, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &rCmd, capsuleCmdSize, nil)
+
+	// Read returns C2HData PDU (data transfer) followed by CapsuleResponse.
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatalf("read response dequeue: %v", err)
+	}
+	if hdr.Type != pduC2HData {
+		t.Fatalf("expected C2HData (0x7), got 0x%x", hdr.Type)
+	}
+	var c2h C2HDataHeader
+	r.Receive(&c2h)
+	readData := make([]byte, r.Length())
+	r.ReceiveData(readData)
+
+	// Now read the CapsuleResponse.
+	rResp := recvCapsuleResp(t, r)
+	if StatusWord(rResp.Status) != StatusSuccess {
+		t.Fatalf("read should succeed during write pressure, got 0x%04x", rResp.Status)
+	}
+}
+
+// TestQA_WAL_WriteZerosUnderPressure verifies WriteZeros (without DEALLOC)
+// also goes through the WAL pressure retry path and returns retryable status.
+func TestQA_WAL_WriteZerosUnderPressure(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	nqn := "nqn.test:qa-wal-wz"
+	dev := newMockDevice(256, 512)
+	dev.writeErr = blockvol.ErrWALFull
+	dev.walPressure = 1.0
+
+	client, r, w := setupQAIOQueue(t, nqn, dev)
+	defer client.Close()
+
+	// WriteZeros without DEALLOC bit — goes through write path.
+	wzCmd := CapsuleCommand{
+		OpCode: ioWriteZeros,
+		CID:    700,
+		D10:    0,  // LBA
+		D12:    3,  // NLB=3 → 4 blocks
+		D14:    0,  // no DEALLOC
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &wzCmd, capsuleCmdSize, nil)
+	resp := recvCapsuleResp(t, r)
+	status := StatusWord(resp.Status)
+
+	if status.DNR() {
+		t.Fatal("WriteZeros under WAL pressure must not return DNR=1")
+	}
+	if status == StatusMediaWriteFault {
+		t.Fatal("WriteZeros WAL pressure must not map to MediaWriteFault")
+	}
+	if status != StatusNSNotReady {
+		t.Fatalf("WriteZeros: expected StatusNSNotReady, got 0x%04x", status)
+	}
+}
+
+// TestQA_WAL_PressureTransition verifies correct behavior when WAL pressure
+// transitions: first write fails under pressure, pressure clears, second
+// write succeeds. Tests the real protocol path through the server.
+func TestQA_WAL_PressureTransition(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	nqn := "nqn.test:qa-wal-transition"
+	dev := newMockDevice(256, 512)
+	dev.writeErr = blockvol.ErrWALFull
+	dev.walPressure = 1.0
+
+	client, r, w := setupQAIOQueue(t, nqn, dev)
+	defer client.Close()
+
+	// First write: should fail with retryable status.
+	cmd1 := CapsuleCommand{OpCode: ioWrite, CID: 800, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd1, capsuleCmdSize, make([]byte, 512))
+	resp1 := recvCapsuleResp(t, r)
+	if StatusWord(resp1.Status) != StatusNSNotReady {
+		t.Fatalf("write under pressure: expected NSNotReady, got 0x%04x", resp1.Status)
+	}
+
+	// Clear WAL pressure.
+	dev.mu.Lock()
+	dev.writeErr = nil
+	dev.walPressure = 0.1
+	dev.mu.Unlock()
+
+	// Second write: should succeed.
+	cmd2 := CapsuleCommand{OpCode: ioWrite, CID: 801, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd2, capsuleCmdSize, make([]byte, 512))
+	resp2 := recvCapsuleResp(t, r)
+	if StatusWord(resp2.Status) != StatusSuccess {
+		t.Fatalf("write after pressure cleared: expected Success, got 0x%04x", resp2.Status)
+	}
+}
+
+// TestQA_WAL_ErrorEscalationPrevention verifies that different error types
+// are never confused: WAL pressure returns NSNotReady (DNR=0), while
+// permanent errors like ErrLeaseExpired return DNR=1. This prevents
+// error escalation where transient pressure is treated as permanent.
+func TestQA_WAL_ErrorEscalationPrevention(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	nqn := "nqn.test:qa-wal-escalation"
+	dev := newMockDevice(256, 512)
+
+	client, r, w := setupQAIOQueue(t, nqn, dev)
+	defer client.Close()
+
+	// Phase 1: WAL pressure → retryable (DNR=0).
+	dev.mu.Lock()
+	dev.writeErr = blockvol.ErrWALFull
+	dev.walPressure = 1.0
+	dev.mu.Unlock()
+
+	cmd1 := CapsuleCommand{OpCode: ioWrite, CID: 900, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd1, capsuleCmdSize, make([]byte, 512))
+	resp1 := recvCapsuleResp(t, r)
+	s1 := StatusWord(resp1.Status)
+	if s1.DNR() {
+		t.Fatal("WAL pressure must produce DNR=0 (retryable)")
+	}
+	if s1 != StatusNSNotReady {
+		t.Fatalf("WAL pressure: expected NSNotReady, got 0x%04x", s1)
+	}
+
+	// Phase 2: Lease expired → permanent (DNR=1).
+	dev.mu.Lock()
+	dev.writeErr = blockvol.ErrLeaseExpired
+	dev.walPressure = 0.0
+	dev.mu.Unlock()
+
+	cmd2 := CapsuleCommand{OpCode: ioWrite, CID: 901, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd2, capsuleCmdSize, make([]byte, 512))
+	resp2 := recvCapsuleResp(t, r)
+	s2 := StatusWord(resp2.Status)
+	if !s2.DNR() {
+		t.Fatal("ErrLeaseExpired must produce DNR=1 (permanent)")
+	}
+
+	// Phase 3: Back to WAL pressure → still retryable (DNR=0).
+	dev.mu.Lock()
+	dev.writeErr = blockvol.ErrWALFull
+	dev.walPressure = 1.0
+	dev.mu.Unlock()
+
+	cmd3 := CapsuleCommand{OpCode: ioWrite, CID: 902, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &cmd3, capsuleCmdSize, make([]byte, 512))
+	resp3 := recvCapsuleResp(t, r)
+	s3 := StatusWord(resp3.Status)
+	if s3.DNR() {
+		t.Fatal("WAL pressure after lease error must still produce DNR=0")
+	}
+	if s3 != StatusNSNotReady {
+		t.Fatalf("WAL pressure after lease error: expected NSNotReady, got 0x%04x", s3)
+	}
+}
+
+// TestQA_WAL_ThrottleDoesNotBlockReads verifies that the proactive throttle
+// on high WAL pressure does not affect read or flush commands.
+func TestQA_WAL_ThrottleDoesNotBlockReads(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+
+	var throttleSleeps int
+	sleepFn = func(d time.Duration) { throttleSleeps++ }
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	dev := newMockDevice(256, 512)
+	dev.walPressure = 1.0
+
+	// Throttle should only trigger for write paths.
+	throttleSleeps = 0
+	throttleOnWALPressure(dev)
+	if throttleSleeps != 1 {
+		t.Fatalf("expected throttle to fire at pressure=1.0, got %d sleeps", throttleSleeps)
+	}
+
+	// Read path should not call throttleOnWALPressure — verify by checking
+	// that a read through the protocol succeeds without extra delays.
+	nqn := "nqn.test:qa-wal-throttle-read"
+	client, r, w := setupQAIOQueue(t, nqn, dev)
+	defer client.Close()
+
+	throttleSleeps = 0
+	rCmd := CapsuleCommand{OpCode: ioRead, CID: 1000, D10: 0, D12: 0}
+	w.SendWithData(pduCapsuleCmd, 0, &rCmd, capsuleCmdSize, nil)
+
+	// Read returns C2HData + CapsuleResponse.
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatalf("read dequeue: %v", err)
+	}
+	if hdr.Type != pduC2HData {
+		t.Fatalf("expected C2HData (0x7), got 0x%x", hdr.Type)
+	}
+	var c2h C2HDataHeader
+	r.Receive(&c2h)
+	readBuf := make([]byte, r.Length())
+	r.ReceiveData(readBuf)
+
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status) != StatusSuccess {
+		t.Fatalf("read should succeed at any WAL pressure, got 0x%04x", resp.Status)
+	}
+}
+
+// TestQA_WAL_WrappedErrorProtocolPath verifies that wrapped ErrWALFull
+// (e.g., from appendWithRetry → fmt.Errorf) still maps correctly through
+// the full protocol stack.
+func TestQA_WAL_WrappedErrorProtocolPath(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	// Verify mapBlockError handles wrapped errors.
+	wrapped := fmt.Errorf("blockvol: WAL full timeout after 5s: %w", blockvol.ErrWALFull)
+	status := mapBlockError(wrapped)
+	if status != StatusNSNotReady {
+		t.Fatalf("wrapped ErrWALFull: expected NSNotReady (0x%04x), got 0x%04x", StatusNSNotReady, status)
+	}
+	if status.DNR() {
+		t.Fatal("wrapped ErrWALFull must have DNR=0")
+	}
+
+	// Double-wrapped.
+	doubleWrapped := fmt.Errorf("io handler: %w", wrapped)
+	status2 := mapBlockError(doubleWrapped)
+	if status2 != StatusNSNotReady {
+		t.Fatalf("double-wrapped ErrWALFull: expected NSNotReady, got 0x%04x", status2)
+	}
+}
+
+// TestQA_WAL_FlushDuringPressure verifies that a Flush (sync cache) command
+// succeeds even when write pressure is high, as long as syncErr is nil.
+func TestQA_WAL_FlushDuringPressure(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	nqn := "nqn.test:qa-wal-flush"
+	dev := newMockDevice(256, 512)
+	dev.writeErr = blockvol.ErrWALFull
+	dev.walPressure = 1.0
+
+	client, r, w := setupQAIOQueue(t, nqn, dev)
+	defer client.Close()
+
+	// Flush should succeed — it does not go through the write retry path.
+	flushCmd := CapsuleCommand{
+		OpCode: ioFlush,
+		CID:    1100,
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &flushCmd, capsuleCmdSize, nil)
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status) != StatusSuccess {
+		t.Fatalf("Flush should succeed during write pressure, got 0x%04x", resp.Status)
+	}
+}
+
+// TestQA_Batch_BackToBack_HeaderOnly verifies two consecutive header-only
+// PDUs batched with writeHeaderAndData + single FlushBuf.
+func TestQA_Batch_BackToBack_HeaderOnly(t *testing.T) {
+	var buf bytes.Buffer
+	w := NewWriter(&buf)
+
+	r1 := CapsuleResponse{CID: 1, Status: uint16(StatusSuccess)}
+	r2 := CapsuleResponse{CID: 2, Status: uint16(StatusSuccess)}
+	w.writeHeaderAndData(pduCapsuleResp, 0, &r1, capsuleRespSize, nil)
+	w.writeHeaderAndData(pduCapsuleResp, 0, &r2, capsuleRespSize, nil)
+	w.FlushBuf()
+
+	// Should be able to read both PDUs back.
+	r := NewReader(&buf)
+	hdr1, _ := r.Dequeue()
+	if hdr1.Type != pduCapsuleResp {
+		t.Fatalf("PDU1: type 0x%x", hdr1.Type)
+	}
+	var got1 CapsuleResponse
+	r.Receive(&got1)
+	if got1.CID != 1 {
+		t.Fatalf("PDU1: CID=%d", got1.CID)
+	}
+
+	hdr2, _ := r.Dequeue()
+	if hdr2.Type != pduCapsuleResp {
+		t.Fatalf("PDU2: type 0x%x", hdr2.Type)
+	}
+	var got2 CapsuleResponse
+	r.Receive(&got2)
+	if got2.CID != 2 {
+		t.Fatalf("PDU2: CID=%d", got2.CID)
+	}
+}
diff --git a/weed/storage/blockvol/nvme/nvme_test.go b/weed/storage/blockvol/nvme/nvme_test.go
index 4e1c8f16b..75493819f 100644
--- a/weed/storage/blockvol/nvme/nvme_test.go
+++ b/weed/storage/blockvol/nvme/nvme_test.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"encoding/binary"
 	"errors"
+	"fmt"
 	"io"
 	"net"
 	"sync"
@@ -19,15 +20,16 @@ import (
 // ============================================================
 
 type mockBlockDevice struct {
-	mu        sync.Mutex
-	data      []byte
-	blockSize uint32
-	healthy   bool
-	anaState  uint8
-	readErr   error
-	writeErr  error
-	syncErr   error
-	trimErr   error
+	mu          sync.Mutex
+	data        []byte
+	blockSize   uint32
+	healthy     bool
+	anaState    uint8
+	readErr     error
+	writeErr    error
+	syncErr     error
+	trimErr     error
+	walPressure float64
 }
 
 func newMockDevice(blocks int, blockSize uint32) *mockBlockDevice {
@@ -96,6 +98,11 @@ func (m *mockBlockDevice) IsHealthy() bool      { return m.healthy }
 func (m *mockBlockDevice) ANAState() uint8      { return m.anaState }
 func (m *mockBlockDevice) ANAGroupID() uint16   { return 1 }
 func (m *mockBlockDevice) DeviceNGUID() [16]byte { return [16]byte{0x60, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15} }
+func (m *mockBlockDevice) WALPressure() float64 {
+	m.mu.Lock()
+	defer m.mu.Unlock()
+	return m.walPressure
+}
 
 // ============================================================
 // Protocol Marshal/Unmarshal Tests
@@ -616,13 +623,13 @@ func TestController_PropertyGetCAP(t *testing.T) {
 	client, r, w, _, _ := setupAdminSession(t, nqn)
 	defer client.Close()
 
-	// PropertyGet CAP (8 bytes)
+	// PropertyGet CAP (8 bytes) — CDW10=ATTRIB(size8), CDW11=OFST
 	cmd := CapsuleCommand{
 		OpCode: adminFabric,
 		FCType: fcPropertyGet,
 		CID:    1,
-		D10:    propCAP,
-		D11:    1, // 8-byte
+		D10:    1, // ATTRIB: 8-byte
+		D11:    propCAP,
 	}
 	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
 	resp := recvCapsuleResp(t, r)
@@ -643,13 +650,13 @@ func TestController_PropertySetCC_EN(t *testing.T) {
 	client, r, w, ctrl, _ := setupAdminSession(t, nqn)
 	defer client.Close()
 
-	// PropertySet CC.EN=1
+	// PropertySet CC.EN=1 — CDW11=OFST, CDW12=VALUE
 	cmd := CapsuleCommand{
 		OpCode: adminFabric,
 		FCType: fcPropertySet,
 		CID:    2,
-		D10:    propCC,
-		D14:    1, // CC.EN=1
+		D11:    propCC,
+		D12:    1, // CC.EN=1
 	}
 	w.SendWithData(pduCapsuleCmd, 0, &cmd, capsuleCmdSize, nil)
 	resp := recvCapsuleResp(t, r)
@@ -657,12 +664,12 @@ func TestController_PropertySetCC_EN(t *testing.T) {
 		t.Fatalf("PropertySet CC failed: 0x%04x", resp.Status)
 	}
 
-	// Verify CSTS.RDY via PropertyGet
+	// Verify CSTS.RDY via PropertyGet — CDW11=OFST
 	cmd2 := CapsuleCommand{
 		OpCode: adminFabric,
 		FCType: fcPropertyGet,
 		CID:    3,
-		D10:    propCSTS,
+		D11:    propCSTS,
 	}
 	w.SendWithData(pduCapsuleCmd, 0, &cmd2, capsuleCmdSize, nil)
 	resp2 := recvCapsuleResp(t, r)
@@ -724,8 +731,8 @@ func TestIdentify_Controller(t *testing.T) {
 	if data[77] != 3 {
 		t.Fatalf("MDTS = %d, want 3", data[77])
 	}
-	// SubNQN check
-	subNQN := string(bytes.TrimRight(data[768:1024], " "))
+	// SubNQN check (NUL-terminated, not space-padded)
+	subNQN := string(bytes.TrimRight(data[768:1024], "\x00"))
 	if subNQN != nqn {
 		t.Fatalf("SubNQN = %q, want %q", subNQN, nqn)
 	}
@@ -1339,8 +1346,8 @@ func TestIO_ReadOutOfBounds(t *testing.T) {
 	clientConn.Close()
 }
 
-func TestIO_WriteNoInlineData(t *testing.T) {
-	nqn := "nqn.test:io-noinline"
+func TestIO_WriteR2TFlow(t *testing.T) {
+	nqn := "nqn.test:io-r2t"
 	dev := newMockDevice(256, 512)
 
 	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
@@ -1361,23 +1368,59 @@ func TestIO_WriteNoInlineData(t *testing.T) {
 	sendICReq(w)
 	recvICResp(t, r)
 
-	// Write with no inline data (DataOffset=0)
+	// Write 1 block (512 bytes) with no inline data → triggers R2T flow
 	writeCmd := CapsuleCommand{
 		OpCode: ioWrite,
 		CID:    205,
-		D10:    0,
-		D12:    0,
+		NSID:   1,
+		D10:    0, // LBA 0
+		D12:    0, // NLB = 0 means 1 block
 	}
-	// Send header-only (no data)
 	w.SendWithData(pduCapsuleCmd, 0, &writeCmd, capsuleCmdSize, nil)
 
-	resp := recvCapsuleResp(t, r)
-	status := StatusWord(resp.Status)
-	if status != StatusInvalidField {
-		t.Fatalf("expected InvalidField for R2T write, got 0x%04x", resp.Status)
+	// Expect R2T from controller
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatal(err)
 	}
-	if !status.DNR() {
-		t.Fatal("InvalidField should have DNR=1")
+	if hdr.Type != pduR2T {
+		t.Fatalf("expected R2T (0x9), got 0x%x", hdr.Type)
+	}
+	var r2t R2THeader
+	r.Receive(&r2t)
+	if r2t.CCCID != 205 {
+		t.Fatalf("R2T CCCID = %d, want 205", r2t.CCCID)
+	}
+	if r2t.DATAL != 512 {
+		t.Fatalf("R2T DATAL = %d, want 512", r2t.DATAL)
+	}
+
+	// Send H2C Data with the write payload
+	writeData := make([]byte, 512)
+	for i := range writeData {
+		writeData[i] = 0xAB
+	}
+	h2c := H2CDataHeader{
+		CCCID: 205,
+		TAG:   r2t.TAG,
+		DATAO: 0,
+		DATAL: 512,
+	}
+	w.SendWithData(pduH2CData, 0x04, &h2c, h2cDataHdrSize, writeData) // flag 0x04 = LAST
+
+	// Expect CapsuleResp (success)
+	resp := recvCapsuleResp(t, r)
+	if StatusWord(resp.Status).IsError() {
+		t.Fatalf("write via R2T failed: 0x%04x", resp.Status)
+	}
+
+	// Verify data was written by reading it back
+	readBack, err := dev.ReadAt(0, 512)
+	if err != nil {
+		t.Fatalf("ReadAt: %v", err)
+	}
+	if readBack[0] != 0xAB {
+		t.Fatalf("data not written: got 0x%02x, want 0xAB", readBack[0])
 	}
 
 	clientConn.Close()
@@ -1729,13 +1772,13 @@ func TestController_KATOTimeout(t *testing.T) {
 	sendConnect(w, 0, 64, 100, nqn, "host", 0xFFFF)
 	recvCapsuleResp(t, r)
 
-	// Enable controller (which starts KATO timer)
+	// Enable controller (which starts KATO timer) — CDW11=OFST, CDW12=VALUE
 	propSet := CapsuleCommand{
 		OpCode: adminFabric,
 		FCType: fcPropertySet,
 		CID:    1,
-		D10:    propCC,
-		D14:    1, // CC.EN=1
+		D11:    propCC,
+		D12:    1, // CC.EN=1
 	}
 	w.SendWithData(pduCapsuleCmd, 0, &propSet, capsuleCmdSize, nil)
 	recvCapsuleResp(t, r)
@@ -1796,13 +1839,13 @@ func TestFullSequence_ICConnectIdentifyReadWrite(t *testing.T) {
 		t.Fatalf("SetFeatures NumQueues failed: 0x%04x", resp.Status)
 	}
 
-	// 4. PropertySet CC.EN=1
+	// 4. PropertySet CC.EN=1 — CDW11=OFST, CDW12=VALUE
 	propCmd := CapsuleCommand{
 		OpCode: adminFabric,
 		FCType: fcPropertySet,
 		CID:    6,
-		D10:    propCC,
-		D14:    1,
+		D11:    propCC,
+		D12:    1,
 	}
 	w.SendWithData(pduCapsuleCmd, 0, &propCmd, capsuleCmdSize, nil)
 	resp = recvCapsuleResp(t, r)
@@ -2375,3 +2418,1024 @@ func TestDisconnect_NoError(t *testing.T) {
 
 	client.Close()
 }
+
+// TestReader_LargePadding verifies that padding > maxHeaderSize (128) is handled
+// without panic. DataOffset is uint8 (max 255), HeaderLength for CapsuleCmd is 72,
+// so pad can be up to 183.
+func TestReader_LargePadding(t *testing.T) {
+	// Build a PDU with HeaderLength=72 (CapsuleCmd), DataOffset=250 → pad=178 > 128
+	headerLen := uint8(capsuleCmdHdrLen) // 72
+	dataOffset := uint8(250)
+	pad := int(dataOffset) - int(headerLen) // 178
+	dataPayload := []byte{0xDE, 0xAD}
+	totalDataLen := uint32(dataOffset) + uint32(len(dataPayload))
+
+	var wireBuf bytes.Buffer
+
+	ch := CommonHeader{
+		Type:         pduCapsuleCmd,
+		HeaderLength: headerLen,
+		DataOffset:   dataOffset,
+		DataLength:   totalDataLen,
+	}
+	chBytes := make([]byte, commonHeaderSize)
+	ch.Marshal(chBytes)
+	wireBuf.Write(chBytes)
+
+	// Specific header (72 - 8 = 64 bytes for CapsuleCommand)
+	specificBuf := make([]byte, int(headerLen)-commonHeaderSize)
+	wireBuf.Write(specificBuf)
+
+	// Padding (178 bytes)
+	padBytes := make([]byte, pad)
+	wireBuf.Write(padBytes)
+
+	// Payload
+	wireBuf.Write(dataPayload)
+
+	r := NewReader(&wireBuf)
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatalf("Dequeue: %v", err)
+	}
+	if hdr.DataOffset != dataOffset {
+		t.Fatalf("DataOffset = %d, want %d", hdr.DataOffset, dataOffset)
+	}
+
+	var capsule CapsuleCommand
+	if err := r.Receive(&capsule); err != nil {
+		t.Fatalf("Receive with large padding (%d bytes) should not panic: %v", pad, err)
+	}
+
+	// Verify payload is readable after padding skip
+	dataLen := r.Length()
+	if dataLen != uint32(len(dataPayload)) {
+		t.Fatalf("Length() = %d, want %d", dataLen, len(dataPayload))
+	}
+	got := make([]byte, dataLen)
+	if err := r.ReceiveData(got); err != nil {
+		t.Fatal(err)
+	}
+	if got[0] != 0xDE || got[1] != 0xAD {
+		t.Fatalf("payload = %x, want DEAD", got)
+	}
+}
+
+// ============================================================
+// CP10-3: Performance Optimization Tests
+// ============================================================
+
+// TestTuneConn_NoError verifies tuneConn does not error on a real TCP connection.
+func TestTuneConn_NoError(t *testing.T) {
+	ln, err := net.Listen("tcp", "127.0.0.1:0")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer ln.Close()
+
+	done := make(chan struct{})
+	go func() {
+		conn, err := ln.Accept()
+		if err == nil {
+			tuneConn(conn) // must not panic or error
+			conn.Close()
+		}
+		close(done)
+	}()
+
+	conn, err := net.Dial("tcp", ln.Addr().String())
+	if err != nil {
+		t.Fatal(err)
+	}
+	conn.Close()
+	<-done
+}
+
+// TestTuneConn_NonTCP verifies tuneConn is a no-op for non-TCP connections.
+func TestTuneConn_NonTCP(t *testing.T) {
+	c, _ := pipeConn()
+	defer c.Close()
+	tuneConn(c) // must not panic on net.Pipe (not *net.TCPConn)
+}
+
+// TestWriterBatchedFlush verifies writeHeaderAndData + FlushBuf produces
+// identical wire bytes as SendWithData.
+func TestWriterBatchedFlush(t *testing.T) {
+	payload := make([]byte, 4096)
+	for i := range payload {
+		payload[i] = byte(i)
+	}
+
+	// Reference: SendWithData
+	var ref bytes.Buffer
+	w1 := NewWriter(&ref)
+	c2h := C2HDataHeader{CCCID: 10, DATAO: 0, DATAL: 4096}
+	if err := w1.SendWithData(pduC2HData, c2hFlagLast, &c2h, c2hDataHdrSize, payload); err != nil {
+		t.Fatal(err)
+	}
+
+	// Batched: writeHeaderAndData + FlushBuf
+	var batched bytes.Buffer
+	w2 := NewWriter(&batched)
+	c2h2 := C2HDataHeader{CCCID: 10, DATAO: 0, DATAL: 4096}
+	if err := w2.writeHeaderAndData(pduC2HData, c2hFlagLast, &c2h2, c2hDataHdrSize, payload); err != nil {
+		t.Fatal(err)
+	}
+	if err := w2.FlushBuf(); err != nil {
+		t.Fatal(err)
+	}
+
+	if !bytes.Equal(ref.Bytes(), batched.Bytes()) {
+		t.Fatalf("batched output (%d bytes) differs from reference (%d bytes)",
+			batched.Len(), ref.Len())
+	}
+}
+
+// TestSendWithData_UsesSharedEncode ensures SendWithData/SendHeaderOnly produce
+// correct wire output after the refactor (regression test).
+func TestSendWithData_UsesSharedEncode(t *testing.T) {
+	// HeaderOnly
+	var buf1 bytes.Buffer
+	w := NewWriter(&buf1)
+	resp := CapsuleResponse{CID: 42, SQHD: 5, Status: uint16(StatusSuccess)}
+	if err := w.SendHeaderOnly(pduCapsuleResp, &resp, capsuleRespSize); err != nil {
+		t.Fatal(err)
+	}
+	r := NewReader(&buf1)
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if hdr.Type != pduCapsuleResp {
+		t.Fatalf("type = 0x%x, want 0x%x", hdr.Type, pduCapsuleResp)
+	}
+	if hdr.DataOffset != 0 {
+		t.Fatalf("DataOffset = %d, want 0 for header-only", hdr.DataOffset)
+	}
+
+	// WithData
+	var buf2 bytes.Buffer
+	w2 := NewWriter(&buf2)
+	c2h := C2HDataHeader{CCCID: 1, DATAO: 0, DATAL: 512}
+	data := make([]byte, 512)
+	data[0] = 0xAB
+	if err := w2.SendWithData(pduC2HData, c2hFlagLast, &c2h, c2hDataHdrSize, data); err != nil {
+		t.Fatal(err)
+	}
+	r2 := NewReader(&buf2)
+	hdr2, err := r2.Dequeue()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if hdr2.Type != pduC2HData {
+		t.Fatalf("type = 0x%x", hdr2.Type)
+	}
+	if hdr2.Flags != c2hFlagLast {
+		t.Fatalf("flags = 0x%x", hdr2.Flags)
+	}
+	var gotHdr C2HDataHeader
+	if err := r2.Receive(&gotHdr); err != nil {
+		t.Fatal(err)
+	}
+	gotData := make([]byte, r2.Length())
+	if err := r2.ReceiveData(gotData); err != nil {
+		t.Fatal(err)
+	}
+	if gotData[0] != 0xAB {
+		t.Fatalf("data[0] = 0x%x, want 0xAB", gotData[0])
+	}
+}
+
+// TestNewWriterSize verifies NewWriterSize creates a writer with larger buffer.
+func TestNewWriterSize(t *testing.T) {
+	var buf bytes.Buffer
+	w := NewWriterSize(&buf, 65536)
+	resp := ICResponse{MaxH2CDataLength: 65536}
+	if err := w.SendHeaderOnly(pduICResp, &resp, icBodySize); err != nil {
+		t.Fatal(err)
+	}
+	r := NewReader(&buf)
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if hdr.Type != pduICResp {
+		t.Fatalf("type = 0x%x", hdr.Type)
+	}
+}
+
+// TestBufPool_GetPut tests buffer pool get/put cycle.
+func TestBufPool_GetPut(t *testing.T) {
+	tests := []struct {
+		size    int
+		wantCap int
+	}{
+		{512, 4096},
+		{4096, 4096},
+		{4097, 65536},
+		{65536, 65536},
+		{65537, 262144},
+		{262144, 262144},
+		{262145, 262145}, // oversized: exact allocation
+	}
+	for _, tt := range tests {
+		buf := getBuffer(tt.size)
+		if len(buf) != tt.size {
+			t.Errorf("getBuffer(%d): len = %d, want %d", tt.size, len(buf), tt.size)
+		}
+		if cap(buf) != tt.wantCap {
+			t.Errorf("getBuffer(%d): cap = %d, want %d", tt.size, cap(buf), tt.wantCap)
+		}
+		putBuffer(buf) // must not panic
+	}
+}
+
+// TestBufPool_WriteReuse verifies write correctness across pool reuse cycles.
+func TestBufPool_WriteReuse(t *testing.T) {
+	nqn := "nqn.test:pool-reuse"
+	dev := newMockDevice(256, 512)
+
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+
+	sendICReq(w)
+	recvICResp(t, r)
+
+	// Do multiple write+read cycles to exercise pool reuse
+	for cycle := 0; cycle < 5; cycle++ {
+		pattern := byte(0xA0 + cycle)
+		writeData := make([]byte, 512)
+		for i := range writeData {
+			writeData[i] = pattern
+		}
+
+		writeCmd := CapsuleCommand{
+			OpCode: ioWrite,
+			CID:    uint16(100 + cycle),
+			D10:    0, // LBA 0
+			D12:    0, // 1 block
+		}
+		w.SendWithData(pduCapsuleCmd, 0, &writeCmd, capsuleCmdSize, writeData)
+
+		resp2 := recvCapsuleResp(t, r)
+		if StatusWord(resp2.Status).IsError() {
+			t.Fatalf("cycle %d: write failed: 0x%04x", cycle, resp2.Status)
+		}
+
+		// Read back
+		readCmd := CapsuleCommand{
+			OpCode: ioRead,
+			CID:    uint16(200 + cycle),
+			D10:    0,
+			D12:    0,
+		}
+		w.SendWithData(pduCapsuleCmd, 0, &readCmd, capsuleCmdSize, nil)
+
+		// Expect C2HData + CapsuleResp
+		hdr, err := r.Dequeue()
+		if err != nil {
+			t.Fatalf("cycle %d: read dequeue: %v", cycle, err)
+		}
+		if hdr.Type != pduC2HData {
+			t.Fatalf("cycle %d: expected C2HData, got 0x%x", cycle, hdr.Type)
+		}
+		var c2h C2HDataHeader
+		if err := r.Receive(&c2h); err != nil {
+			t.Fatal(err)
+		}
+		readBuf := make([]byte, r.Length())
+		if err := r.ReceiveData(readBuf); err != nil {
+			t.Fatal(err)
+		}
+		for i, b := range readBuf {
+			if b != pattern {
+				t.Fatalf("cycle %d: byte[%d] = 0x%x, want 0x%x", cycle, i, b, pattern)
+			}
+		}
+
+		// Consume CapsuleResp
+		recvCapsuleResp(t, r)
+	}
+
+	clientConn.Close()
+}
+
+// TestMaxH2CDataLen_Config verifies IC response uses Config value.
+func TestMaxH2CDataLen_Config(t *testing.T) {
+	customLen := uint32(65536)
+	srv := NewServer(Config{
+		Enabled:          true,
+		ListenAddr:       "127.0.0.1:0",
+		MaxH2CDataLength: customLen,
+	})
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+
+	sendICReq(w)
+
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if hdr.Type != pduICResp {
+		t.Fatalf("type = 0x%x", hdr.Type)
+	}
+	var icResp ICResponse
+	if err := r.Receive(&icResp); err != nil {
+		t.Fatal(err)
+	}
+	if icResp.MaxH2CDataLength != customLen {
+		t.Fatalf("MaxH2CDataLength = %d, want %d", icResp.MaxH2CDataLength, customLen)
+	}
+
+	clientConn.Close()
+}
+
+// TestMaxH2CDataLen_Default verifies default IC response uses the standard constant.
+func TestMaxH2CDataLen_Default(t *testing.T) {
+	srv := NewServer(DefaultConfig())
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+
+	sendICReq(w)
+
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if hdr.Type != pduICResp {
+		t.Fatalf("type = 0x%x", hdr.Type)
+	}
+	var icResp ICResponse
+	if err := r.Receive(&icResp); err != nil {
+		t.Fatal(err)
+	}
+	if icResp.MaxH2CDataLength != maxH2CDataLen {
+		t.Fatalf("MaxH2CDataLength = %d, want %d", icResp.MaxH2CDataLength, maxH2CDataLen)
+	}
+
+	clientConn.Close()
+}
+
+// TestC2HChunking_ConfigurableMaxDataLen verifies configurable MaxH2CDataLen
+// controls the chunk count in C2H responses.
+func TestC2HChunking_ConfigurableMaxDataLen(t *testing.T) {
+	customChunk := uint32(16384) // 16KB
+	nqn := "nqn.test:chunking"
+	dev := newMockDevice(256, 512)
+
+	for i := range dev.data {
+		dev.data[i] = 0xCC
+	}
+
+	srv := NewServer(Config{
+		Enabled:          true,
+		ListenAddr:       "127.0.0.1:0",
+		MaxIOQueues:      4,
+		MaxH2CDataLength: customChunk,
+	})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+
+	// Manual IC exchange (custom MaxH2CDataLength != default)
+	sendICReq(w)
+	hdr, err := r.Dequeue()
+	if err != nil {
+		t.Fatal(err)
+	}
+	if hdr.Type != pduICResp {
+		t.Fatalf("expected ICResp, got 0x%x", hdr.Type)
+	}
+	var icResp ICResponse
+	if err := r.Receive(&icResp); err != nil {
+		t.Fatal(err)
+	}
+	if icResp.MaxH2CDataLength != customChunk {
+		t.Fatalf("MaxH2CDataLength = %d, want %d", icResp.MaxH2CDataLength, customChunk)
+	}
+
+	// Read 64KB = 128 blocks of 512B
+	readCmd := CapsuleCommand{
+		OpCode: ioRead,
+		CID:    1,
+		D10:    0,
+		D12:    127, // 128 blocks (0-based)
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &readCmd, capsuleCmdSize, nil)
+
+	// Expect 4 C2HData chunks (64KB / 16KB) + 1 CapsuleResp
+	chunkCount := 0
+	totalData := 0
+	for {
+		hdr, err := r.Dequeue()
+		if err != nil {
+			t.Fatal(err)
+		}
+		if hdr.Type == pduCapsuleResp {
+			var capsResp CapsuleResponse
+			r.Receive(&capsResp)
+			if StatusWord(capsResp.Status).IsError() {
+				t.Fatalf("read failed: 0x%04x", capsResp.Status)
+			}
+			break
+		}
+		if hdr.Type == pduC2HData {
+			chunkCount++
+			var c2h C2HDataHeader
+			r.Receive(&c2h)
+			dataBuf := make([]byte, r.Length())
+			r.ReceiveData(dataBuf)
+			totalData += len(dataBuf)
+		}
+	}
+
+	if chunkCount != 4 {
+		t.Fatalf("expected 4 chunks (64KB/16KB), got %d", chunkCount)
+	}
+	if totalData != 65536 {
+		t.Fatalf("total data = %d, want 65536", totalData)
+	}
+
+	clientConn.Close()
+}
+
+// TestDataOffset_LargePadding verifies that a PDU with DataOffset > maxHeaderSize
+// is handled safely via chunked discard (no padBuf overflow).
+func TestDataOffset_LargePadding(t *testing.T) {
+	// Craft a PDU with DataOffset=200, HeaderLength=8.
+	// Padding = 192 bytes, which exceeds padBuf (128).
+	// The chunked discard in Receive() should handle this safely.
+	dataOffset := uint8(200)
+	totalPad := int(dataOffset) - commonHeaderSize // 192
+	payloadSize := 4
+	dataLength := uint32(dataOffset) + uint32(payloadSize) // 204
+
+	var hdr [commonHeaderSize]byte
+	ch := CommonHeader{
+		Type:         pduCapsuleCmd,
+		HeaderLength: commonHeaderSize,
+		DataOffset:   dataOffset,
+		DataLength:   dataLength,
+	}
+	ch.Marshal(hdr[:])
+
+	// Build full PDU: 8-byte header + 192-byte padding + 4-byte payload
+	var buf bytes.Buffer
+	buf.Write(hdr[:])
+	buf.Write(make([]byte, totalPad))                      // padding
+	buf.Write([]byte{0xDE, 0xAD, 0xBE, 0xEF})             // payload
+
+	r := NewReader(&buf)
+	_, err := r.Dequeue()
+	if err != nil {
+		t.Fatalf("Dequeue: %v", err)
+	}
+
+	// Receive should skip 192 bytes of padding without panic
+	var capsule CapsuleCommand
+	if err := r.Receive(&capsule); err != nil {
+		t.Fatalf("Receive: %v", err)
+	}
+
+	// Payload should be readable
+	if r.Length() != uint32(payloadSize) {
+		t.Fatalf("Length = %d, want %d", r.Length(), payloadSize)
+	}
+	data := make([]byte, r.Length())
+	if err := r.ReceiveData(data); err != nil {
+		t.Fatalf("ReceiveData: %v", err)
+	}
+	if data[0] != 0xDE || data[1] != 0xAD {
+		t.Fatalf("payload = %x, want DEADBEEF", data)
+	}
+}
+
+// TestNQN_Sanitization verifies Server.NQN() sanitizes volume names
+// using the shared BuildNQN helper.
+func TestNQN_Sanitization(t *testing.T) {
+	srv := NewServer(Config{NQNPrefix: "nqn.2024-01.com.seaweedfs:vol."})
+
+	// Uppercase should be lowered, underscores replaced with hyphens.
+	got := srv.NQN("My_Volume")
+	want := "nqn.2024-01.com.seaweedfs:vol.my-volume"
+	if got != want {
+		t.Fatalf("NQN(%q) = %q, want %q", "My_Volume", got, want)
+	}
+}
+
+// ============================================================
+// BUG-CP103-1: WAL Pressure Retry / Throttle Tests
+// ============================================================
+
+// TestIsRetryableWALPressure_Classification verifies the error classifier
+// for WAL-pressure retry decisions.
+func TestIsRetryableWALPressure_Classification(t *testing.T) {
+	t.Run("nil_error", func(t *testing.T) {
+		if isRetryableWALPressure(nil) {
+			t.Fatal("nil error should not be retryable")
+		}
+	})
+	t.Run("ErrWALFull_direct", func(t *testing.T) {
+		if !isRetryableWALPressure(blockvol.ErrWALFull) {
+			t.Fatal("ErrWALFull should be retryable")
+		}
+	})
+	t.Run("ErrWALFull_wrapped", func(t *testing.T) {
+		wrapped := fmt.Errorf("blockvol: WAL full timeout: %w", blockvol.ErrWALFull)
+		if !isRetryableWALPressure(wrapped) {
+			t.Fatal("wrapped ErrWALFull should be retryable")
+		}
+	})
+	t.Run("non_WAL_error", func(t *testing.T) {
+		if isRetryableWALPressure(errors.New("disk full")) {
+			t.Fatal("non-WAL error should not be retryable")
+		}
+	})
+	t.Run("ErrLeaseExpired", func(t *testing.T) {
+		if isRetryableWALPressure(blockvol.ErrLeaseExpired) {
+			t.Fatal("ErrLeaseExpired should not be retryable WAL pressure")
+		}
+	})
+	t.Run("ErrDurabilityBarrierFailed", func(t *testing.T) {
+		if isRetryableWALPressure(blockerr.ErrDurabilityBarrierFailed) {
+			t.Fatal("ErrDurabilityBarrierFailed should not be retryable WAL pressure")
+		}
+	})
+}
+
+// TestWriteWithRetry_TransientSuccess verifies that writeWithRetry succeeds
+// when WAL pressure clears within the retry budget.
+func TestWriteWithRetry_TransientSuccess(t *testing.T) {
+	// Replace sleep/jitter hooks for deterministic behavior.
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+
+	var sleepCalls []time.Duration
+	sleepFn = func(d time.Duration) { sleepCalls = append(sleepCalls, d) }
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	dev := newMockDevice(10, 512)
+	callCount := 0
+	dev.writeErr = blockvol.ErrWALFull
+
+	// Override WriteAt to clear error after 2 failures.
+	origWriteAt := dev.WriteAt
+	_ = origWriteAt
+	dev2 := &countingWriteDevice{
+		mockBlockDevice: dev,
+		writeFunc: func(lba uint64, data []byte) error {
+			callCount++
+			if callCount <= 2 {
+				return blockvol.ErrWALFull
+			}
+			dev.mu.Lock()
+			dev.writeErr = nil
+			dev.mu.Unlock()
+			return dev.WriteAt(lba, data)
+		},
+	}
+
+	payload := []byte{1, 2, 3, 4}
+	err := writeWithRetry(dev2, 0, payload)
+	if err != nil {
+		t.Fatalf("expected success after transient WAL pressure, got: %v", err)
+	}
+	// First call fails, then 2 retries (first retry fails, second succeeds).
+	// So we should have 2 sleep calls (for the 2 backoffs before retry 1 and 2).
+	if len(sleepCalls) != 2 {
+		t.Fatalf("expected 2 sleep calls, got %d: %v", len(sleepCalls), sleepCalls)
+	}
+	if sleepCalls[0] != 50*time.Millisecond {
+		t.Fatalf("first backoff = %v, want 50ms", sleepCalls[0])
+	}
+	if sleepCalls[1] != 200*time.Millisecond {
+		t.Fatalf("second backoff = %v, want 200ms", sleepCalls[1])
+	}
+}
+
+// countingWriteDevice wraps mockBlockDevice with a custom WriteAt.
+type countingWriteDevice struct {
+	*mockBlockDevice
+	writeFunc func(lba uint64, data []byte) error
+}
+
+func (d *countingWriteDevice) WriteAt(lba uint64, data []byte) error {
+	return d.writeFunc(lba, data)
+}
+
+// TestWriteWithRetry_PersistentFailure verifies that writeWithRetry exhausts
+// its retry budget and returns the last retryable error unchanged.
+func TestWriteWithRetry_PersistentFailure(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+
+	var sleepCalls []time.Duration
+	sleepFn = func(d time.Duration) { sleepCalls = append(sleepCalls, d) }
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	dev := newMockDevice(10, 512)
+	dev.writeErr = blockvol.ErrWALFull
+
+	err := writeWithRetry(dev, 0, []byte{1, 2, 3, 4})
+	if err == nil {
+		t.Fatal("expected error after exhausting retries")
+	}
+	if !errors.Is(err, blockvol.ErrWALFull) {
+		t.Fatalf("expected ErrWALFull, got: %v", err)
+	}
+	// 1 initial + 3 retries = 4 total calls, 3 sleeps.
+	if len(sleepCalls) != 3 {
+		t.Fatalf("expected 3 sleep calls (full retry budget), got %d", len(sleepCalls))
+	}
+}
+
+// TestWriteWithRetry_NonWALError verifies that writeWithRetry does NOT retry
+// non-WAL errors.
+func TestWriteWithRetry_NonWALError(t *testing.T) {
+	origSleep := sleepFn
+	defer func() { sleepFn = origSleep }()
+
+	sleepCalled := false
+	sleepFn = func(d time.Duration) { sleepCalled = true }
+
+	dev := newMockDevice(10, 512)
+	dev.writeErr = errors.New("disk I/O error")
+
+	err := writeWithRetry(dev, 0, []byte{1, 2, 3, 4})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	if sleepCalled {
+		t.Fatal("should not sleep/retry on non-WAL errors")
+	}
+}
+
+// TestWriteWithRetry_ImmediateSuccess verifies no retry on success.
+func TestWriteWithRetry_ImmediateSuccess(t *testing.T) {
+	origSleep := sleepFn
+	defer func() { sleepFn = origSleep }()
+
+	sleepCalled := false
+	sleepFn = func(d time.Duration) { sleepCalled = true }
+
+	dev := newMockDevice(10, 512)
+	err := writeWithRetry(dev, 0, []byte{1, 2, 3, 4})
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+	if sleepCalled {
+		t.Fatal("should not sleep on immediate success")
+	}
+}
+
+// TestThrottleOnWALPressure_Deterministic verifies throttle behavior using
+// injected sleep hooks (no wall-clock timing).
+func TestThrottleOnWALPressure_Deterministic(t *testing.T) {
+	origSleep := sleepFn
+	defer func() { sleepFn = origSleep }()
+
+	var sleptDuration time.Duration
+	sleepFn = func(d time.Duration) { sleptDuration = d }
+
+	t.Run("no_provider", func(t *testing.T) {
+		sleptDuration = 0
+		plain := &plainDevice{}
+		throttleOnWALPressure(plain)
+		if sleptDuration != 0 {
+			t.Fatal("should not throttle when device has no WALPressureProvider")
+		}
+	})
+
+	t.Run("low_pressure", func(t *testing.T) {
+		sleptDuration = 0
+		dev := newMockDevice(10, 512)
+		dev.walPressure = 0.5
+		throttleOnWALPressure(dev)
+		if sleptDuration != 0 {
+			t.Fatalf("should not throttle at pressure 0.5, got sleep %v", sleptDuration)
+		}
+	})
+
+	t.Run("threshold_pressure_0.9", func(t *testing.T) {
+		sleptDuration = 0
+		dev := newMockDevice(10, 512)
+		dev.walPressure = 0.9
+		throttleOnWALPressure(dev)
+		// (0.9 - 0.9) * 50 = 0 → no sleep
+		if sleptDuration != 0 {
+			t.Fatalf("should not throttle at exactly 0.9, got sleep %v", sleptDuration)
+		}
+	})
+
+	t.Run("high_pressure_0.95", func(t *testing.T) {
+		sleptDuration = 0
+		dev := newMockDevice(10, 512)
+		dev.walPressure = 0.95
+		throttleOnWALPressure(dev)
+		// (0.95 - 0.9) * 50 ≈ 2.5ms (float precision)
+		if sleptDuration < 2*time.Millisecond || sleptDuration > 3*time.Millisecond {
+			t.Fatalf("pressure 0.95: sleep = %v, want ~2.5ms", sleptDuration)
+		}
+	})
+
+	t.Run("full_pressure_1.0", func(t *testing.T) {
+		sleptDuration = 0
+		dev := newMockDevice(10, 512)
+		dev.walPressure = 1.0
+		throttleOnWALPressure(dev)
+		// (1.0 - 0.9) * 50 ≈ 5ms (float precision)
+		if sleptDuration < 4*time.Millisecond || sleptDuration > 6*time.Millisecond {
+			t.Fatalf("pressure 1.0: sleep = %v, want ~5ms", sleptDuration)
+		}
+	})
+}
+
+// plainDevice implements BlockDevice but NOT WALPressureProvider.
+type plainDevice struct{}
+
+func (p *plainDevice) ReadAt(lba uint64, length uint32) ([]byte, error) { return make([]byte, length), nil }
+func (p *plainDevice) WriteAt(lba uint64, data []byte) error            { return nil }
+func (p *plainDevice) Trim(lba uint64, length uint32) error             { return nil }
+func (p *plainDevice) SyncCache() error                                 { return nil }
+func (p *plainDevice) BlockSize() uint32                                { return 512 }
+func (p *plainDevice) VolumeSize() uint64                               { return 512 * 100 }
+func (p *plainDevice) IsHealthy() bool                                  { return true }
+
+// TestWriteWithRetry_ConcurrentPressure verifies that concurrent writes
+// under WAL pressure do not hang or deadlock and return retryable errors.
+func TestWriteWithRetry_ConcurrentPressure(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+
+	// No-op sleep for speed.
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	dev := newMockDevice(100, 512)
+	dev.writeErr = blockvol.ErrWALFull
+
+	const goroutines = 16
+	var wg sync.WaitGroup
+	errs := make([]error, goroutines)
+
+	wg.Add(goroutines)
+	for i := 0; i < goroutines; i++ {
+		go func(idx int) {
+			defer wg.Done()
+			errs[idx] = writeWithRetry(dev, uint64(idx), make([]byte, 512))
+		}(i)
+	}
+	wg.Wait()
+
+	for i, err := range errs {
+		if err == nil {
+			t.Fatalf("goroutine %d: expected error, got nil", i)
+		}
+		if !errors.Is(err, blockvol.ErrWALFull) {
+			t.Fatalf("goroutine %d: expected ErrWALFull, got: %v", i, err)
+		}
+	}
+}
+
+// TestWriteWithRetry_ConcurrentTransient verifies concurrent writes
+// succeed after transient WAL pressure clears.
+func TestWriteWithRetry_ConcurrentTransient(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	dev := newMockDevice(100, 512)
+
+	// Per-goroutine failure tracking: each goroutine fails once then succeeds.
+	var perGoroutineFailed sync.Map
+
+	wrapped := &countingWriteDevice{
+		mockBlockDevice: dev,
+		writeFunc: func(lba uint64, data []byte) error {
+			if _, loaded := perGoroutineFailed.LoadOrStore(lba, true); !loaded {
+				// First call per LBA fails with WAL pressure.
+				return blockvol.ErrWALFull
+			}
+			return dev.WriteAt(lba, data)
+		},
+	}
+
+	const goroutines = 4
+	var wg sync.WaitGroup
+	errs := make([]error, goroutines)
+
+	wg.Add(goroutines)
+	for i := 0; i < goroutines; i++ {
+		go func(idx int) {
+			defer wg.Done()
+			errs[idx] = writeWithRetry(wrapped, uint64(idx), make([]byte, 512))
+		}(i)
+	}
+	wg.Wait()
+
+	for i, err := range errs {
+		if err != nil {
+			t.Fatalf("goroutine %d: expected success after transient pressure, got: %v", i, err)
+		}
+	}
+}
+
+// TestWriteWithRetry_WrappedWALError verifies retry works with wrapped ErrWALFull.
+func TestWriteWithRetry_WrappedWALError(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	dev := newMockDevice(10, 512)
+	dev.writeErr = fmt.Errorf("blockvol: WAL full timeout: %w", blockvol.ErrWALFull)
+
+	err := writeWithRetry(dev, 0, []byte{1, 2, 3, 4})
+	if err == nil {
+		t.Fatal("expected error")
+	}
+	if !errors.Is(err, blockvol.ErrWALFull) {
+		t.Fatalf("expected ErrWALFull in chain, got: %v", err)
+	}
+}
+
+// TestMockDevice_WALPressureProvider verifies the mock implements the interface.
+func TestMockDevice_WALPressureProvider(t *testing.T) {
+	dev := newMockDevice(10, 512)
+	dev.walPressure = 0.75
+
+	var bd BlockDevice = dev
+	prov, ok := bd.(WALPressureProvider)
+	if !ok {
+		t.Fatal("mockBlockDevice should implement WALPressureProvider")
+	}
+	if got := prov.WALPressure(); got != 0.75 {
+		t.Fatalf("WALPressure() = %v, want 0.75", got)
+	}
+}
+
+// TestIO_WriteWALPressure_ProtocolResponse verifies the full protocol path:
+// persistent WAL pressure → writeWithRetry exhausts → mapBlockError → NVMe
+// response is StatusNSNotReady with DNR=0 (no permanent failure).
+func TestIO_WriteWALPressure_ProtocolResponse(t *testing.T) {
+	// Replace sleep/jitter to avoid real delays.
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	nqn := "nqn.test:wal-pressure"
+	dev := newMockDevice(256, 512)
+	dev.writeErr = blockvol.ErrWALFull
+
+	srv := NewServer(Config{Enabled: true, ListenAddr: "127.0.0.1:0", MaxIOQueues: 4})
+	srv.AddVolume(nqn, dev, dev.DeviceNGUID())
+
+	clientConn, serverConn := pipeConn()
+	defer clientConn.Close()
+
+	ctrl := newController(serverConn, srv)
+	ctrl.subsystem = srv.findSubsystem(nqn)
+	ctrl.queueID = 1
+	ctrl.queueSize = 64
+	go ctrl.Serve()
+
+	r := NewReader(clientConn)
+	w := NewWriter(clientConn)
+
+	sendICReq(w)
+	recvICResp(t, r)
+
+	writeData := make([]byte, 512)
+	writeCmd := CapsuleCommand{
+		OpCode: ioWrite,
+		CID:    300,
+		D10:    0, // LBA 0
+		D12:    0, // NLB 0 = 1 block
+	}
+	w.SendWithData(pduCapsuleCmd, 0, &writeCmd, capsuleCmdSize, writeData)
+
+	resp := recvCapsuleResp(t, r)
+	status := StatusWord(resp.Status)
+
+	// Must be StatusNSNotReady (retryable, not permanent failure).
+	if status != StatusNSNotReady {
+		t.Fatalf("expected StatusNSNotReady (0x%04x), got 0x%04x", StatusNSNotReady, status)
+	}
+	// DNR must be 0 (retryable).
+	if status.DNR() {
+		t.Fatal("DNR must be 0 for transient WAL pressure — host should retry")
+	}
+	// Must NOT be a permanent write fault.
+	if status == StatusMediaWriteFault {
+		t.Fatal("WAL pressure must not map to permanent MediaWriteFault")
+	}
+
+	clientConn.Close()
+}
+
+// TestWriteWithRetry_SharedTransientConcurrency verifies the benchmark failure
+// mode: multiple writers hit a shared transient pressure window, pressure clears,
+// and all writes complete successfully without surfacing permanent failure.
+func TestWriteWithRetry_SharedTransientConcurrency(t *testing.T) {
+	origSleep := sleepFn
+	origJitter := jitterFn
+	defer func() { sleepFn = origSleep; jitterFn = origJitter }()
+
+	sleepFn = func(d time.Duration) {}
+	jitterFn = func(max time.Duration) time.Duration { return 0 }
+
+	dev := newMockDevice(100, 512)
+
+	// Shared atomic counter: first N total calls across all goroutines fail.
+	// This simulates the real thundering-herd case where all writers hit the
+	// same WAL-full window simultaneously.
+	// Shared global counter: first N total calls fail across all goroutines.
+	// This simulates real thundering-herd behavior where all writers hit the
+	// same WAL-full window. With no-op sleep, goroutines may be scheduled
+	// sequentially, so the failure budget must be < retry budget per goroutine
+	// (4 attempts = 1 initial + 3 retries) to guarantee success.
+	var globalCallCount int64
+	var mu sync.Mutex
+	const failForFirstN = 2 // conservative: even if 1 goroutine gets all failures, it still has retries
+
+	wrapped := &countingWriteDevice{
+		mockBlockDevice: dev,
+		writeFunc: func(lba uint64, data []byte) error {
+			mu.Lock()
+			globalCallCount++
+			n := globalCallCount
+			mu.Unlock()
+			if n <= failForFirstN {
+				return blockvol.ErrWALFull
+			}
+			return dev.WriteAt(lba, data)
+		},
+	}
+
+	const goroutines = 8
+	var wg sync.WaitGroup
+	errs := make([]error, goroutines)
+
+	wg.Add(goroutines)
+	for i := 0; i < goroutines; i++ {
+		go func(idx int) {
+			defer wg.Done()
+			errs[idx] = writeWithRetry(wrapped, uint64(idx), make([]byte, 512))
+		}(i)
+	}
+	wg.Wait()
+
+	// All goroutines must succeed. The shared pressure window (first 2 calls)
+	// is absorbed by the retry budget regardless of scheduling order.
+	for i, err := range errs {
+		if err != nil {
+			t.Fatalf("goroutine %d: expected success after shared transient pressure, got: %v", i, err)
+		}
+	}
+}
diff --git a/weed/storage/blockvol/nvme/protocol.go b/weed/storage/blockvol/nvme/protocol.go
index a5eb803e9..ce272b90d 100644
--- a/weed/storage/blockvol/nvme/protocol.go
+++ b/weed/storage/blockvol/nvme/protocol.go
@@ -19,6 +19,7 @@ const (
 	pduC2HTermReq  uint8 = 0x3 // Controller-to-Host Termination Request
 	pduCapsuleCmd  uint8 = 0x4 // NVMe Capsule Command
 	pduCapsuleResp uint8 = 0x5 // NVMe Capsule Response
+	pduH2CData     uint8 = 0x6 // Host-to-Controller Data Transfer
 	pduC2HData     uint8 = 0x7 // Controller-to-Host Data Transfer
 	pduR2T         uint8 = 0x9 // Ready-to-Transfer
 )
@@ -109,6 +110,8 @@ const (
 	capsuleCmdSize  = 64 // CapsuleCommand specific header size (after CommonHeader)
 	capsuleRespSize = 16 // CapsuleResponse specific header size
 	c2hDataHdrSize  = 16 // C2HDataHeader specific header size
+	h2cDataHdrSize  = 16 // H2CDataHeader specific header size
+	r2tHdrSize      = 16 // R2THeader specific header size
 	icBodySize      = 120 // ICReq/ICResp body size (after CommonHeader)
 	connectDataSize = 1024
 
@@ -354,6 +357,62 @@ func (h *C2HDataHeader) Unmarshal(buf []byte) {
 	h.DATAL = binary.LittleEndian.Uint32(buf[8:])
 }
 
+// ---------- R2THeader (16-byte specific header) ----------
+
+// R2THeader is the Ready-to-Transfer PDU specific header.
+type R2THeader struct {
+	CCCID uint16 // Command Capsule CID
+	TAG   uint16 // R2T Tag (echoed by host in H2CData)
+	DATAO uint32 // Data offset
+	DATAL uint32 // Data length requested
+	_pad  uint32
+}
+
+func (h *R2THeader) Marshal(buf []byte) {
+	for i := range buf[:r2tHdrSize] {
+		buf[i] = 0
+	}
+	binary.LittleEndian.PutUint16(buf[0:], h.CCCID)
+	binary.LittleEndian.PutUint16(buf[2:], h.TAG)
+	binary.LittleEndian.PutUint32(buf[4:], h.DATAO)
+	binary.LittleEndian.PutUint32(buf[8:], h.DATAL)
+}
+
+func (h *R2THeader) Unmarshal(buf []byte) {
+	h.CCCID = binary.LittleEndian.Uint16(buf[0:])
+	h.TAG = binary.LittleEndian.Uint16(buf[2:])
+	h.DATAO = binary.LittleEndian.Uint32(buf[4:])
+	h.DATAL = binary.LittleEndian.Uint32(buf[8:])
+}
+
+// ---------- H2CDataHeader (16-byte specific header) ----------
+
+// H2CDataHeader is the host-to-controller data transfer header.
+type H2CDataHeader struct {
+	CCCID uint16 // Command Capsule CID
+	TAG   uint16 // Matches R2T Tag
+	DATAO uint32 // Data offset
+	DATAL uint32 // Data length in this PDU
+	_pad  uint32
+}
+
+func (h *H2CDataHeader) Marshal(buf []byte) {
+	for i := range buf[:h2cDataHdrSize] {
+		buf[i] = 0
+	}
+	binary.LittleEndian.PutUint16(buf[0:], h.CCCID)
+	binary.LittleEndian.PutUint16(buf[2:], h.TAG)
+	binary.LittleEndian.PutUint32(buf[4:], h.DATAO)
+	binary.LittleEndian.PutUint32(buf[8:], h.DATAL)
+}
+
+func (h *H2CDataHeader) Unmarshal(buf []byte) {
+	h.CCCID = binary.LittleEndian.Uint16(buf[0:])
+	h.TAG = binary.LittleEndian.Uint16(buf[2:])
+	h.DATAO = binary.LittleEndian.Uint32(buf[4:])
+	h.DATAL = binary.LittleEndian.Uint32(buf[8:])
+}
+
 // ---------- ConnectData (1024 bytes, payload of Fabric Connect) ----------
 
 // ConnectData is the 1024-byte payload sent with a Fabric Connect command.
diff --git a/weed/storage/blockvol/nvme/server.go b/weed/storage/blockvol/nvme/server.go
index a60626d27..a2ca9437f 100644
--- a/weed/storage/blockvol/nvme/server.go
+++ b/weed/storage/blockvol/nvme/server.go
@@ -7,6 +7,8 @@ import (
 	"sync"
 	"sync/atomic"
 	"time"
+
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
 )
 
 // Config holds NVMe/TCP target configuration.
@@ -118,6 +120,7 @@ func (s *Server) acceptLoop() {
 			continue
 		}
 
+		tuneConn(conn)
 		ctrl := newController(conn, s)
 		s.addSession(ctrl)
 
@@ -204,7 +207,18 @@ func (s *Server) Close() error {
 	return nil
 }
 
-// NQN returns the full NQN for a volume name.
-func (s *Server) NQN(volName string) string {
-	return s.cfg.NQNPrefix + volName
+// tuneConn applies TCP optimizations to accepted connections.
+func tuneConn(conn net.Conn) {
+	tc, ok := conn.(*net.TCPConn)
+	if !ok {
+		return
+	}
+	tc.SetNoDelay(true)          // TCP_NODELAY — disable Nagle
+	tc.SetReadBuffer(262144)     // SO_RCVBUF 256KB
+	tc.SetWriteBuffer(262144)    // SO_SNDBUF 256KB
+}
+
+// NQN returns the full NQN for a volume name using the shared builder.
+func (s *Server) NQN(volName string) string {
+	return blockvol.BuildNQN(s.cfg.NQNPrefix, volName)
 }
diff --git a/weed/storage/blockvol/nvme/wire.go b/weed/storage/blockvol/nvme/wire.go
index b8ac979b6..222dd42a2 100644
--- a/weed/storage/blockvol/nvme/wire.go
+++ b/weed/storage/blockvol/nvme/wire.go
@@ -23,6 +23,7 @@ type Reader struct {
 	rd     io.Reader
 	CH     CommonHeader
 	header [maxHeaderSize]byte
+	padBuf [maxHeaderSize]byte // reuse for padding skip
 }
 
 // NewReader wraps an io.Reader for NVMe/TCP PDU decoding.
@@ -67,20 +68,26 @@ func (r *Reader) Dequeue() (*CommonHeader, error) {
 // data (DataOffset - HeaderLength bytes).
 func (r *Reader) Receive(pdu PDU) error {
 	remain := int(r.CH.HeaderLength) - commonHeaderSize
-	if remain <= 0 {
-		return nil
-	}
-	if _, err := io.ReadFull(r.rd, r.header[commonHeaderSize:r.CH.HeaderLength]); err != nil {
-		return err
-	}
-	pdu.Unmarshal(r.header[commonHeaderSize:r.CH.HeaderLength])
-
-	// Skip padding between header and data.
-	pad := int(r.CH.DataOffset) - int(r.CH.HeaderLength)
-	if pad > 0 {
-		if _, err := io.ReadFull(r.rd, make([]byte, pad)); err != nil {
+	if remain > 0 {
+		if _, err := io.ReadFull(r.rd, r.header[commonHeaderSize:r.CH.HeaderLength]); err != nil {
 			return err
 		}
+		pdu.Unmarshal(r.header[commonHeaderSize:r.CH.HeaderLength])
+	}
+
+	// Skip padding between header and data.
+	// DataOffset can be up to 255 (uint8), so pad may exceed padBuf size.
+	// Use chunked discard to handle any valid padding length.
+	pad := int(r.CH.DataOffset) - int(r.CH.HeaderLength)
+	for pad > 0 {
+		n := pad
+		if n > len(r.padBuf) {
+			n = len(r.padBuf)
+		}
+		if _, err := io.ReadFull(r.rd, r.padBuf[:n]); err != nil {
+			return err
+		}
+		pad -= n
 	}
 	return nil
 }
@@ -113,6 +120,11 @@ func NewWriter(w io.Writer) *Writer {
 	return &Writer{wr: bufio.NewWriter(w)}
 }
 
+// NewWriterSize wraps an io.Writer with a specified buffer size.
+func NewWriterSize(w io.Writer, size int) *Writer {
+	return &Writer{wr: bufio.NewWriterSize(w, size)}
+}
+
 // PrepareHeaderOnly sets up a header-only PDU (no payload).
 // Call Flush() to write it to the wire.
 func (w *Writer) PrepareHeaderOnly(pduType uint8, pdu PDU, specificLen uint8) {
@@ -140,8 +152,8 @@ func (w *Writer) PrepareWithData(pduType, flags uint8, pdu PDU, specificLen uint
 	pdu.Marshal(w.header[commonHeaderSize:])
 }
 
-// Flush writes the prepared CommonHeader + specific header to the wire.
-// If there was payload data (from PrepareWithData), call FlushData after.
+// Flush writes the prepared CommonHeader + specific header to the bufio buffer.
+// Does NOT flush the underlying writer — call FlushBuf() for that.
 func (w *Writer) Flush() error {
 	w.CH.Marshal(w.header[:commonHeaderSize])
 	if _, err := w.wr.Write(w.header[:w.CH.HeaderLength]); err != nil {
@@ -150,32 +162,43 @@ func (w *Writer) Flush() error {
 	return nil
 }
 
-// FlushData writes payload data and flushes the underlying buffered writer.
-func (w *Writer) FlushData(data []byte) error {
+// FlushBuf flushes the underlying buffered writer to the wire.
+func (w *Writer) FlushBuf() error {
+	return w.wr.Flush()
+}
+
+// writeHeaderAndData encodes header (+optional data) into bufio. Does NOT flush.
+func (w *Writer) writeHeaderAndData(pduType, flags uint8, pdu PDU, specificLen uint8, data []byte) error {
+	if data != nil {
+		w.PrepareWithData(pduType, flags, pdu, specificLen, data)
+	} else {
+		w.PrepareHeaderOnly(pduType, pdu, specificLen)
+	}
+	if err := w.Flush(); err != nil {
+		return err
+	}
 	if len(data) > 0 {
 		if _, err := w.wr.Write(data); err != nil {
 			return err
 		}
 	}
-	return w.wr.Flush()
+	return nil
 }
 
-// SendHeaderOnly writes a complete header-only PDU (prepare + flush).
+// SendHeaderOnly writes a complete header-only PDU (prepare + flush to wire).
 func (w *Writer) SendHeaderOnly(pduType uint8, pdu PDU, specificLen uint8) error {
-	w.PrepareHeaderOnly(pduType, pdu, specificLen)
-	if err := w.Flush(); err != nil {
+	if err := w.writeHeaderAndData(pduType, 0, pdu, specificLen, nil); err != nil {
 		return err
 	}
-	return w.wr.Flush()
+	return w.FlushBuf()
 }
 
-// SendWithData writes a complete PDU with payload data.
+// SendWithData writes a complete PDU with payload data (prepare + flush to wire).
 func (w *Writer) SendWithData(pduType, flags uint8, pdu PDU, specificLen uint8, data []byte) error {
-	w.PrepareWithData(pduType, flags, pdu, specificLen, data)
-	if err := w.Flush(); err != nil {
+	if err := w.writeHeaderAndData(pduType, flags, pdu, specificLen, data); err != nil {
 		return err
 	}
-	return w.FlushData(data)
+	return w.FlushBuf()
 }
 
 // writeRaw writes raw bytes directly (used for ConnectData inline in capsule).
@@ -184,11 +207,6 @@ func (w *Writer) writeRaw(data []byte) error {
 	return err
 }
 
-// flushBuf flushes the underlying buffered writer.
-func (w *Writer) flushBuf() error {
-	return w.wr.Flush()
-}
-
 // ---------- Helpers ----------
 
 // putLE32 writes a uint32 in little-endian.
diff --git a/weed/storage/blockvol/nvme/write_retry.go b/weed/storage/blockvol/nvme/write_retry.go
new file mode 100644
index 000000000..f37892817
--- /dev/null
+++ b/weed/storage/blockvol/nvme/write_retry.go
@@ -0,0 +1,80 @@
+package nvme
+
+import (
+	"errors"
+	"math/rand"
+	"time"
+
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
+)
+
+// WALPressureProvider extends BlockDevice with WAL pressure reporting.
+type WALPressureProvider interface {
+	WALPressure() float64 // 0.0 = empty, 1.0 = full
+}
+
+// isRetryableWALPressure returns true if the error represents transient
+// WAL pressure that may clear with a short retry.
+func isRetryableWALPressure(err error) bool {
+	return err != nil && errors.Is(err, blockvol.ErrWALFull)
+}
+
+// writeRetryBackoffs defines the backoff schedule for writeWithRetry.
+var writeRetryBackoffs = [3]time.Duration{
+	50 * time.Millisecond,
+	200 * time.Millisecond,
+	800 * time.Millisecond,
+}
+
+// sleepFn is the sleep function used by retry/throttle helpers.
+// Replaced in tests for deterministic behavior.
+var sleepFn = time.Sleep
+
+// jitterFn returns a jitter duration given a max value.
+// Replaced in tests for deterministic behavior.
+var jitterFn = func(max time.Duration) time.Duration {
+	if max <= 0 {
+		return 0
+	}
+	return time.Duration(rand.Int63n(int64(max)))
+}
+
+// writeWithRetry wraps dev.WriteAt with target-side retry on WAL pressure.
+// Non-WAL errors return immediately. On WAL pressure, retries with jittered
+// backoff before giving up. Returns the last error unchanged so mapBlockError
+// preserves DNR=0 semantics.
+func writeWithRetry(dev BlockDevice, lba uint64, data []byte) error {
+	err := dev.WriteAt(lba, data)
+	if err == nil || !isRetryableWALPressure(err) {
+		return err
+	}
+
+	for _, backoff := range writeRetryBackoffs {
+		jitter := jitterFn(backoff / 4)
+		sleepFn(backoff + jitter)
+		err = dev.WriteAt(lba, data)
+		if err == nil || !isRetryableWALPressure(err) {
+			return err
+		}
+	}
+	return err
+}
+
+// throttleOnWALPressure inserts a small delay when WAL pressure is high,
+// desynchronizing concurrent writers to reduce thundering-herd retry storms.
+// No-op if the device does not implement WALPressureProvider.
+func throttleOnWALPressure(dev BlockDevice) {
+	prov, ok := dev.(WALPressureProvider)
+	if !ok {
+		return
+	}
+	p := prov.WALPressure()
+	if p < 0.9 {
+		return
+	}
+	// Scale: 0.9→1ms, 0.95→3ms, 1.0→5ms
+	ms := (p - 0.9) * 50
+	if ms > 0 {
+		sleepFn(time.Duration(ms * float64(time.Millisecond)))
+	}
+}
diff --git a/weed/storage/blockvol/operator/internal/controller/qa_reconciler_test.go b/weed/storage/blockvol/operator/internal/controller/qa_reconciler_test.go
index 80520a89c..81d41c17f 100644
--- a/weed/storage/blockvol/operator/internal/controller/qa_reconciler_test.go
+++ b/weed/storage/blockvol/operator/internal/controller/qa_reconciler_test.go
@@ -10,6 +10,7 @@ import (
 
 	appsv1 "k8s.io/api/apps/v1"
 	corev1 "k8s.io/api/core/v1"
+	rbacv1 "k8s.io/api/rbac/v1"
 	storagev1 "k8s.io/api/storage/v1"
 	apierrors "k8s.io/apimachinery/pkg/api/errors"
 	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
@@ -811,3 +812,543 @@ func TestQA_RotationTimestamp_ExactSame_NoRotation(t *testing.T) {
 	}
 }
 
+// =============================================================================
+// 9B Track A: Spec Mutation Tests
+//
+// Verify that the reconciler correctly handles spec field changes between
+// reconcile cycles (image bump, address change, port change).
+// =============================================================================
+
+// 9B-M1: Image update propagates to CSI controller Deployment.
+func Test9B_SpecMutation_ImageUpdate_PropagatedToCSIController(t *testing.T) {
+	cluster := csiOnlyCluster()
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-block", "default") // finalizer
+	reconcile(t, r, "test-block", "default") // create resources
+
+	ctx := context.Background()
+
+	// Verify initial image
+	var dep appsv1.Deployment
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block-csi-controller", Namespace: "kube-system"}, &dep); err != nil {
+		t.Fatal(err)
+	}
+	initialImage := dep.Spec.Template.Spec.Containers[0].Image
+
+	// Update image in CR spec
+	var latest blockv1alpha1.SeaweedBlockCluster
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block", Namespace: "default"}, &latest); err != nil {
+		t.Fatal(err)
+	}
+	latest.Spec.CSIImage = "sw-block-csi:v2.0"
+	if err := c.Update(ctx, &latest); err != nil {
+		t.Fatal(err)
+	}
+
+	// Reconcile with updated spec
+	reconcile(t, r, "test-block", "default")
+
+	// Image should be updated
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block-csi-controller", Namespace: "kube-system"}, &dep); err != nil {
+		t.Fatal(err)
+	}
+	newImage := dep.Spec.Template.Spec.Containers[0].Image
+	if newImage == initialImage {
+		t.Errorf("CSI controller image not updated: still %q after spec change to sw-block-csi:v2.0", newImage)
+	}
+	if newImage != "sw-block-csi:v2.0" {
+		t.Errorf("CSI controller image = %q, want %q", newImage, "sw-block-csi:v2.0")
+	}
+}
+
+// 9B-M2: MasterRef address change propagates to CSI controller args.
+func Test9B_SpecMutation_MasterRefAddressChange(t *testing.T) {
+	cluster := csiOnlyCluster()
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-block", "default")
+	reconcile(t, r, "test-block", "default")
+
+	ctx := context.Background()
+
+	// Change master address
+	var latest blockv1alpha1.SeaweedBlockCluster
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block", Namespace: "default"}, &latest); err != nil {
+		t.Fatal(err)
+	}
+	latest.Spec.MasterRef.Address = "new-master.prod:9333"
+	if err := c.Update(ctx, &latest); err != nil {
+		t.Fatal(err)
+	}
+
+	reconcile(t, r, "test-block", "default")
+
+	// Status should reflect new master address
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block", Namespace: "default"}, &latest); err != nil {
+		t.Fatal(err)
+	}
+	if latest.Status.MasterAddress != "new-master.prod:9333" {
+		t.Errorf("masterAddress = %q, want %q", latest.Status.MasterAddress, "new-master.prod:9333")
+	}
+}
+
+// 9B-M3: StorageClassName change propagates — old SC retained, new SC created.
+func Test9B_SpecMutation_StorageClassNameChange(t *testing.T) {
+	cluster := csiOnlyCluster()
+	cluster.Spec.StorageClassName = "sc-v1"
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-block", "default")
+	reconcile(t, r, "test-block", "default")
+
+	ctx := context.Background()
+
+	// Old SC should exist
+	var oldSC storagev1.StorageClass
+	if err := c.Get(ctx, types.NamespacedName{Name: "sc-v1"}, &oldSC); err != nil {
+		t.Fatalf("initial SC should exist: %v", err)
+	}
+
+	// Change StorageClassName
+	var latest blockv1alpha1.SeaweedBlockCluster
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block", Namespace: "default"}, &latest); err != nil {
+		t.Fatal(err)
+	}
+	latest.Spec.StorageClassName = "sc-v2"
+	if err := c.Update(ctx, &latest); err != nil {
+		t.Fatal(err)
+	}
+
+	reconcile(t, r, "test-block", "default")
+
+	// New SC should exist
+	var newSC storagev1.StorageClass
+	if err := c.Get(ctx, types.NamespacedName{Name: "sc-v2"}, &newSC); err != nil {
+		t.Errorf("new SC should exist after name change: %v", err)
+	}
+
+	// Old SC still exists (operator doesn't garbage-collect renamed SCs mid-lifecycle)
+	// This is expected behavior — cleanup happens on CR deletion
+}
+
+// =============================================================================
+// 9B Track A: Resource Drift Correction Tests
+//
+// Verify that if someone externally modifies operator-managed resources,
+// the next reconcile restores them to desired state.
+// =============================================================================
+
+// 9B-D1: External image change on CSI controller is corrected by reconciler.
+func Test9B_DriftCorrection_CSIControllerImage(t *testing.T) {
+	cluster := csiOnlyCluster()
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-block", "default")
+	reconcile(t, r, "test-block", "default")
+
+	ctx := context.Background()
+
+	// Tamper: change CSI controller image externally
+	var dep appsv1.Deployment
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block-csi-controller", Namespace: "kube-system"}, &dep); err != nil {
+		t.Fatal(err)
+	}
+	dep.Spec.Template.Spec.Containers[0].Image = "evil-image:latest"
+	if err := c.Update(ctx, &dep); err != nil {
+		t.Fatal(err)
+	}
+
+	// Reconcile should restore
+	reconcile(t, r, "test-block", "default")
+
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block-csi-controller", Namespace: "kube-system"}, &dep); err != nil {
+		t.Fatal(err)
+	}
+	if dep.Spec.Template.Spec.Containers[0].Image == "evil-image:latest" {
+		t.Error("BUG: reconciler did not correct externally-tampered CSI controller image")
+	}
+}
+
+// 9B-D2: External label removal on cluster-scoped resource is corrected.
+func Test9B_DriftCorrection_ClusterRoleLabels(t *testing.T) {
+	cluster := csiOnlyCluster()
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-block", "default")
+	reconcile(t, r, "test-block", "default")
+
+	ctx := context.Background()
+
+	// Tamper: remove owner labels from ClusterRole
+	var cr rbacv1.ClusterRole
+	if err := c.Get(ctx, types.NamespacedName{Name: resources.ClusterRoleName()}, &cr); err != nil {
+		t.Fatal(err)
+	}
+	cr.Labels = map[string]string{"random": "label"} // wipe ownership
+	if err := c.Update(ctx, &cr); err != nil {
+		t.Fatal(err)
+	}
+
+	// Reconcile — since owner labels are gone, this is now an orphan.
+	// Reconciler should detect conflict (orphan without adopt = conflict).
+	reconcile(t, r, "test-block", "default")
+
+	var latest blockv1alpha1.SeaweedBlockCluster
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block", Namespace: "default"}, &latest); err != nil {
+		t.Fatal(err)
+	}
+
+	// The reconciler should fail because the ClusterRole is now an orphan
+	// (has labels but not the right owner labels)
+	if latest.Status.Phase != blockv1alpha1.PhaseFailed {
+		t.Errorf("phase = %q after label tampering; want Failed (orphan ClusterRole)", latest.Status.Phase)
+	}
+}
+
+// 9B-D3: Master StatefulSet replica count externally scaled → reconciler restores.
+func Test9B_DriftCorrection_MasterReplicaCount(t *testing.T) {
+	cluster := fullStackClusterWithVolume()
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-full", "default")
+	reconcile(t, r, "test-full", "default")
+
+	ctx := context.Background()
+
+	// Tamper: externally scale master to 3
+	var sts appsv1.StatefulSet
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-full-master", Namespace: "default"}, &sts); err != nil {
+		t.Fatal(err)
+	}
+	scaled := int32(3)
+	sts.Spec.Replicas = &scaled
+	if err := c.Update(ctx, &sts); err != nil {
+		t.Fatal(err)
+	}
+
+	// Reconcile should restore to spec value (1)
+	reconcile(t, r, "test-full", "default")
+
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-full-master", Namespace: "default"}, &sts); err != nil {
+		t.Fatal(err)
+	}
+	if sts.Spec.Replicas != nil && *sts.Spec.Replicas != 1 {
+		t.Errorf("master replicas = %d after drift correction, want 1", *sts.Spec.Replicas)
+	}
+}
+
+// =============================================================================
+// 9B Track A: Cleanup Edge Cases
+//
+// Verify cleanup handles: full-stack resources, custom namespaces,
+// partial resource sets (some already deleted).
+// =============================================================================
+
+// 9B-C1: Full-stack cleanup deletes master + volume StatefulSets + Services.
+func Test9B_Cleanup_FullStack_AllResources(t *testing.T) {
+	cluster := fullStackClusterWithVolume()
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-full", "default")
+	reconcile(t, r, "test-full", "default")
+
+	ctx := context.Background()
+
+	// Verify resources exist before cleanup
+	var masterSts appsv1.StatefulSet
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-full-master", Namespace: "default"}, &masterSts); err != nil {
+		t.Fatalf("master STS should exist: %v", err)
+	}
+	var volSts appsv1.StatefulSet
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-full-volume", Namespace: "default"}, &volSts); err != nil {
+		t.Fatalf("volume STS should exist: %v", err)
+	}
+
+	// Run cleanup
+	var latest blockv1alpha1.SeaweedBlockCluster
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-full", Namespace: "default"}, &latest); err != nil {
+		t.Fatal(err)
+	}
+	if err := r.cleanupOwnedResources(ctx, &latest); err != nil {
+		t.Fatal(err)
+	}
+
+	// CSI cross-namespace resources should be cleaned
+	var dep appsv1.Deployment
+	err := c.Get(ctx, types.NamespacedName{Name: "test-full-csi-controller", Namespace: "kube-system"}, &dep)
+	if !apierrors.IsNotFound(err) {
+		t.Error("CSI controller should be deleted in full-stack cleanup")
+	}
+
+	var csiDriver storagev1.CSIDriver
+	err = c.Get(ctx, types.NamespacedName{Name: blockv1alpha1.CSIDriverName}, &csiDriver)
+	if !apierrors.IsNotFound(err) {
+		t.Error("CSIDriver should be deleted in full-stack cleanup")
+	}
+
+	// Note: master/volume StatefulSets are same-namespace with ownerRef,
+	// so K8s GC handles them (not the cleanup function). We verify the
+	// cleanup function doesn't error when they exist.
+}
+
+// 9B-C2: Cleanup with custom CSI namespace (non-default).
+func Test9B_Cleanup_CustomCSINamespace(t *testing.T) {
+	cluster := csiOnlyCluster()
+	cluster.Spec.CSINamespace = "custom-csi"
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-block", "default")
+	reconcile(t, r, "test-block", "default")
+
+	ctx := context.Background()
+
+	// Verify CSI resources are in custom namespace
+	var dep appsv1.Deployment
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block-csi-controller", Namespace: "custom-csi"}, &dep); err != nil {
+		t.Fatalf("CSI controller should be in custom-csi: %v", err)
+	}
+
+	// Cleanup
+	var latest blockv1alpha1.SeaweedBlockCluster
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block", Namespace: "default"}, &latest); err != nil {
+		t.Fatal(err)
+	}
+	if err := r.cleanupOwnedResources(ctx, &latest); err != nil {
+		t.Fatal(err)
+	}
+
+	// Resources in custom namespace should be cleaned
+	err := c.Get(ctx, types.NamespacedName{Name: "test-block-csi-controller", Namespace: "custom-csi"}, &dep)
+	if !apierrors.IsNotFound(err) {
+		t.Error("CSI controller in custom namespace should be deleted during cleanup")
+	}
+
+	var sa corev1.ServiceAccount
+	err = c.Get(ctx, types.NamespacedName{Name: resources.ServiceAccountName(), Namespace: "custom-csi"}, &sa)
+	if !apierrors.IsNotFound(err) {
+		t.Error("ServiceAccount in custom namespace should be deleted during cleanup")
+	}
+}
+
+// 9B-C3: Cleanup with partially-deleted resources (some already gone).
+func Test9B_Cleanup_PartialResources_NoError(t *testing.T) {
+	cluster := csiOnlyCluster()
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-block", "default")
+	reconcile(t, r, "test-block", "default")
+
+	ctx := context.Background()
+
+	// Manually delete some resources (simulating partial manual cleanup)
+	var dep appsv1.Deployment
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block-csi-controller", Namespace: "kube-system"}, &dep); err == nil {
+		_ = c.Delete(ctx, &dep)
+	}
+	var csiDriver storagev1.CSIDriver
+	if err := c.Get(ctx, types.NamespacedName{Name: blockv1alpha1.CSIDriverName}, &csiDriver); err == nil {
+		_ = c.Delete(ctx, &csiDriver)
+	}
+
+	// Cleanup should still succeed (remaining resources cleaned, missing ones skipped)
+	var latest blockv1alpha1.SeaweedBlockCluster
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block", Namespace: "default"}, &latest); err != nil {
+		t.Fatal(err)
+	}
+	if err := r.cleanupOwnedResources(ctx, &latest); err != nil {
+		t.Errorf("cleanup with partially-deleted resources should succeed: %v", err)
+	}
+
+	// Remaining resources should still be cleaned
+	var sc storagev1.StorageClass
+	err := c.Get(ctx, types.NamespacedName{Name: "sw-block"}, &sc)
+	if !apierrors.IsNotFound(err) {
+		t.Error("StorageClass should be deleted even though other resources were already gone")
+	}
+}
+
+// =============================================================================
+// 9B Track A: CSINamespace Mutation Rejection
+//
+// Per 9B plan: reject namespace migration to avoid resource leak/partial
+// migration risk. Changing csiNamespace after initial reconcile should fail.
+// =============================================================================
+
+// 9B-N1: CSINamespace change after resources exist should be detected.
+// Note: This test documents the current behavior. If the reconciler doesn't
+// reject namespace changes yet, this test reveals the gap.
+func Test9B_CSINamespace_ChangeAfterCreation(t *testing.T) {
+	cluster := csiOnlyCluster()
+	cluster.Spec.CSINamespace = "ns-v1"
+	scheme := testScheme()
+	c := fake.NewClientBuilder().
+		WithScheme(scheme).
+		WithObjects(cluster).
+		WithStatusSubresource(cluster).
+		Build()
+
+	r := &Reconciler{Client: c, Scheme: scheme}
+	reconcile(t, r, "test-block", "default")
+	reconcile(t, r, "test-block", "default")
+
+	ctx := context.Background()
+
+	// Verify resources exist in ns-v1
+	var dep appsv1.Deployment
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block-csi-controller", Namespace: "ns-v1"}, &dep); err != nil {
+		t.Fatalf("CSI controller should be in ns-v1: %v", err)
+	}
+
+	// Change CSI namespace
+	var latest blockv1alpha1.SeaweedBlockCluster
+	if err := c.Get(ctx, types.NamespacedName{Name: "test-block", Namespace: "default"}, &latest); err != nil {
+		t.Fatal(err)
+	}
+	latest.Spec.CSINamespace = "ns-v2"
+	if err := c.Update(ctx, &latest); err != nil {
+		t.Fatal(err)
+	}
+
+	// Reconcile — resources in ns-v1 are now orphaned, ns-v2 gets new resources.
+	// This is the dangerous behavior we want to detect.
+	reconcile(t, r, "test-block", "default")
+
+	// Check: old resources in ns-v1 should ideally be cleaned up OR the change rejected.
+	// Current behavior: ns-v1 resources are leaked (no cleanup for old namespace).
+	var oldDep appsv1.Deployment
+	err := c.Get(ctx, types.NamespacedName{Name: "test-block-csi-controller", Namespace: "ns-v1"}, &oldDep)
+	if err == nil {
+		// Resources leaked in old namespace — this is the known gap.
+		// The 9B plan says to REJECT namespace changes. This test documents the issue
+		// until validation is added.
+		t.Log("KNOWN GAP: CSI resources leaked in old namespace ns-v1 after namespace change. " +
+			"TODO: Add validation to reject csiNamespace mutation after initial reconcile.")
+	}
+}
+
+// =============================================================================
+// 9B Track A: Validation Completeness
+//
+// Additional validation edge cases not covered by existing QA tests.
+// =============================================================================
+
+// 9B-V1: ExtraArgs with spaces around flag should still be caught.
+func Test9B_Validation_ExtraArgs_SpacedFlag(t *testing.T) {
+	cluster := fullStackClusterWithVolume()
+	// Try with spaces — some users might format flags with spaces
+	cluster.Spec.Volume.ExtraArgs = []string{"-block.listen=0.0.0.0:4444"}
+
+	err := validate(&cluster.Spec)
+	if err == nil {
+		t.Error("ExtraArgs with -block.listen= should be rejected")
+	}
+}
+
+// 9B-V2: Multiple ExtraArgs, one valid one invalid.
+func Test9B_Validation_ExtraArgs_MixedValidInvalid(t *testing.T) {
+	cluster := fullStackClusterWithVolume()
+	cluster.Spec.Volume.ExtraArgs = []string{"-custom.flag=ok", "-port=9999", "-another=fine"}
+
+	err := validate(&cluster.Spec)
+	if err == nil {
+		t.Error("ExtraArgs containing -port= should be rejected even with other valid flags")
+	}
+	if err != nil && !strings.Contains(err.Error(), "-port=9999") {
+		t.Errorf("error should mention the specific offending flag, got: %v", err)
+	}
+}
+
+// 9B-V3: Negative storage size is rejected.
+func Test9B_Validation_NegativeStorageSize(t *testing.T) {
+	replicas := int32(1)
+	spec := &blockv1alpha1.SeaweedBlockClusterSpec{
+		Master: &blockv1alpha1.MasterSpec{
+			Replicas: &replicas,
+			Storage:  &blockv1alpha1.StorageSpec{Size: "-1Gi"},
+		},
+	}
+
+	err := validate(spec)
+	if err == nil {
+		t.Error("negative storage size should be rejected")
+	}
+}
+
+// 9B-V4: Empty DNS name (single character boundary).
+func Test9B_Validation_NameBoundary(t *testing.T) {
+	// Single char name should be valid
+	if err := validateName("a"); err != nil {
+		t.Errorf("single char name should be valid: %v", err)
+	}
+
+	// Exactly maxCRNameLength should be valid
+	if err := validateName(strings.Repeat("x", maxCRNameLength)); err != nil {
+		t.Errorf("max length name should be valid: %v", err)
+	}
+
+	// maxCRNameLength+1 should fail
+	if err := validateName(strings.Repeat("x", maxCRNameLength+1)); err == nil {
+		t.Error("maxCRNameLength+1 should be rejected")
+	}
+
+	// Uppercase should be rejected (DNS labels are lowercase)
+	if err := validateName("MyCluster"); err == nil {
+		t.Error("uppercase name should be rejected as invalid DNS label")
+	}
+}
+
diff --git a/weed/storage/blockvol/qa_phase4a_cp3_test.go b/weed/storage/blockvol/qa_phase4a_cp3_test.go
index 824363eaa..0869408ac 100644
--- a/weed/storage/blockvol/qa_phase4a_cp3_test.go
+++ b/weed/storage/blockvol/qa_phase4a_cp3_test.go
@@ -78,6 +78,10 @@ func cp3Vol(t *testing.T, name string, walSize uint64) *BlockVol {
 	cfg := DefaultConfig()
 	cfg.FlushInterval = 5 * time.Millisecond
 	cfg.WALFullTimeout = 200 * time.Millisecond
+	// Relax admission control for tiny test WALs: prevent watermark delays
+	// from changing flusher/rebuild timing on 64KB WALs.
+	cfg.WALSoftWatermark = 0.95
+	cfg.WALHardWatermark = 0.99
 	vol, err := CreateBlockVol(filepath.Join(dir, name), CreateOptions{
 		VolumeSize: 64 * 1024,
 		BlockSize:  4096,
diff --git a/weed/storage/blockvol/qa_wal_admission_test.go b/weed/storage/blockvol/qa_wal_admission_test.go
new file mode 100644
index 000000000..b29487944
--- /dev/null
+++ b/weed/storage/blockvol/qa_wal_admission_test.go
@@ -0,0 +1,462 @@
+package blockvol
+
+import (
+	"errors"
+	"math/rand"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+// =============================================================================
+// QA Adversarial Tests for WALAdmission (BUG-CP103-2)
+//
+// These tests exercise race conditions, starvation scenarios, and edge cases
+// that go beyond the dev-test coverage. All tests are deterministic where
+// possible (injectable sleepFn) and use real concurrency where needed.
+// =============================================================================
+
+// TestQA_Admission_PressureOscillation rapidly cycles pressure between all
+// three zones (below-soft, soft-to-hard, above-hard) while concurrent writers
+// attempt to acquire. No writer should panic or deadlock.
+func TestQA_Admission_PressureOscillation(t *testing.T) {
+	var pressure atomic.Int64
+	pressure.Store(50) // start below soft
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 8,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+
+	// Oscillator: cycles pressure through all zones every 2ms.
+	stopOsc := make(chan struct{})
+	go func() {
+		zones := []int64{30, 80, 95, 50, 75, 92, 40, 85, 98, 20}
+		i := 0
+		for {
+			select {
+			case <-stopOsc:
+				return
+			default:
+				pressure.Store(zones[i%len(zones)])
+				i++
+				time.Sleep(500 * time.Microsecond)
+			}
+		}
+	}()
+
+	// 16 writers doing rapid acquire/release cycles.
+	var wg sync.WaitGroup
+	var successes, failures atomic.Int64
+	const writers = 16
+	const iterations = 50
+
+	wg.Add(writers)
+	for i := 0; i < writers; i++ {
+		go func() {
+			defer wg.Done()
+			for j := 0; j < iterations; j++ {
+				err := a.Acquire(50 * time.Millisecond)
+				if err == nil {
+					successes.Add(1)
+					time.Sleep(time.Duration(rand.Intn(100)) * time.Microsecond)
+					a.Release()
+				} else {
+					failures.Add(1)
+					if !errors.Is(err, ErrWALFull) {
+						t.Errorf("unexpected error: %v", err)
+					}
+				}
+			}
+		}()
+	}
+
+	wg.Wait()
+	close(stopOsc)
+
+	total := successes.Load() + failures.Load()
+	if total != writers*iterations {
+		t.Fatalf("expected %d total operations, got %d", writers*iterations, total)
+	}
+	// With oscillating pressure and 50ms timeout, most should succeed.
+	if successes.Load() == 0 {
+		t.Fatal("all writers failed — admission too aggressive")
+	}
+	t.Logf("successes=%d failures=%d (of %d)", successes.Load(), failures.Load(), total)
+}
+
+// TestQA_Admission_StarvationUnderSoftPressure verifies that soft-watermark
+// throttling doesn't cause starvation. Even at pressure just below hard mark,
+// all writers should eventually complete (with delay, not rejection).
+func TestQA_Admission_StarvationUnderSoftPressure(t *testing.T) {
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 4,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.89 }, // just below hard
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+	// Soft watermark delay is real (not replaced) but max ~5ms, so this
+	// should complete in reasonable time.
+
+	var wg sync.WaitGroup
+	const writers = 20
+
+	wg.Add(writers)
+	for i := 0; i < writers; i++ {
+		go func(id int) {
+			defer wg.Done()
+			if err := a.Acquire(5 * time.Second); err != nil {
+				t.Errorf("writer %d starved: %v", id, err)
+			} else {
+				time.Sleep(100 * time.Microsecond)
+				a.Release()
+			}
+		}(i)
+	}
+	wg.Wait()
+}
+
+// TestQA_Admission_HardToSoftTransitionNoDeadlock verifies that writers
+// blocked in the hard-watermark loop properly transition when pressure drops
+// to the soft zone (not below soft). They should proceed to semaphore
+// acquisition, not re-enter the hard loop.
+func TestQA_Admission_HardToSoftTransitionNoDeadlock(t *testing.T) {
+	var pressure atomic.Int64
+	pressure.Store(95) // above hard
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+
+	var sleepCount atomic.Int64
+	a.sleepFn = func(d time.Duration) {
+		n := sleepCount.Add(1)
+		// After 3 polls in hard loop, drop pressure to soft zone (not below soft).
+		if n == 3 {
+			pressure.Store(80) // between soft and hard
+		}
+	}
+
+	if err := a.Acquire(1 * time.Second); err != nil {
+		t.Fatalf("Acquire failed: %v", err)
+	}
+	a.Release()
+
+	if sleepCount.Load() < 3 {
+		t.Fatalf("expected >= 3 hard-loop sleeps, got %d", sleepCount.Load())
+	}
+}
+
+// TestQA_Admission_SemaphoreFullWithHardPressureDrain tests the combined
+// scenario: hard pressure AND full semaphore. The writer should wait for
+// pressure to drop, then wait for a semaphore slot, all within a single
+// timeout budget.
+func TestQA_Admission_SemaphoreFullWithHardPressureDrain(t *testing.T) {
+	var pressure atomic.Int64
+	pressure.Store(95)
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 1,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+
+	// Fill semaphore.
+	a.sem <- struct{}{}
+
+	// Drop pressure after 10ms, release semaphore after 30ms.
+	go func() {
+		time.Sleep(10 * time.Millisecond)
+		pressure.Store(50)
+		time.Sleep(20 * time.Millisecond)
+		<-a.sem
+	}()
+
+	start := time.Now()
+	err := a.Acquire(500 * time.Millisecond)
+	elapsed := time.Since(start)
+
+	if err != nil {
+		t.Fatalf("expected success after pressure+semaphore drain, got: %v", err)
+	}
+	a.Release()
+
+	// Should complete in ~30-50ms, not 500ms.
+	if elapsed > 200*time.Millisecond {
+		t.Fatalf("elapsed %v, expected < 200ms", elapsed)
+	}
+	t.Logf("combined hard+semaphore wait: %v", elapsed)
+}
+
+// TestQA_Admission_ReleaseWithoutAcquire verifies that an unpaired Release
+// panics with a channel receive on empty channel (tests the invariant, not
+// the behavior — this is a programmer error). We verify the semaphore can
+// still be used correctly after proper acquire/release cycles.
+func TestQA_Admission_DoubleReleaseSafety(t *testing.T) {
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 2,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+
+	// Normal acquire/release cycle should work.
+	if err := a.Acquire(100 * time.Millisecond); err != nil {
+		t.Fatalf("Acquire: %v", err)
+	}
+	a.Release()
+
+	// Verify semaphore is clean: can acquire maxConcurrent times.
+	for i := 0; i < 2; i++ {
+		if err := a.Acquire(100 * time.Millisecond); err != nil {
+			t.Fatalf("Acquire %d after release: %v", i, err)
+		}
+	}
+	// Should be full now.
+	err := a.Acquire(5 * time.Millisecond)
+	if !errors.Is(err, ErrWALFull) {
+		t.Fatalf("expected ErrWALFull with full semaphore, got %v", err)
+	}
+	// Clean up.
+	a.Release()
+	a.Release()
+}
+
+// TestQA_Admission_SoftDelayScalingBoundary checks delay calculation at
+// exact boundary values: exactly soft, exactly (hard-epsilon), mid-point.
+func TestQA_Admission_SoftDelayScalingBoundary(t *testing.T) {
+	cases := []struct {
+		name     string
+		pressure float64
+		minDelay time.Duration
+		maxDelay time.Duration
+	}{
+		{"at_soft", 0.70, 0, 100 * time.Microsecond},           // scale=0, delay≈0
+		{"mid", 0.80, 2 * time.Millisecond, 3 * time.Millisecond}, // scale=0.5, delay=2.5ms
+		{"near_hard", 0.899, 4 * time.Millisecond, 5500 * time.Microsecond}, // scale≈0.995, delay≈4.98ms
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			var sleepDur time.Duration
+			a := NewWALAdmission(WALAdmissionConfig{
+				MaxConcurrent: 16,
+				SoftWatermark: 0.7,
+				HardWatermark: 0.9,
+				WALUsedFn:     func() float64 { return tc.pressure },
+				NotifyFn:      func() {},
+				ClosedFn:      func() bool { return false },
+			})
+			a.sleepFn = func(d time.Duration) { sleepDur = d }
+
+			if err := a.Acquire(100 * time.Millisecond); err != nil {
+				t.Fatalf("Acquire: %v", err)
+			}
+			a.Release()
+
+			if sleepDur < tc.minDelay || sleepDur > tc.maxDelay {
+				t.Fatalf("pressure=%.3f: delay=%v, want [%v, %v]",
+					tc.pressure, sleepDur, tc.minDelay, tc.maxDelay)
+			}
+		})
+	}
+}
+
+// TestQA_Admission_CloseRaceBothPaths starts many goroutines that will hit
+// both the hard-watermark path and the semaphore-wait path, then closes the
+// volume. All goroutines must return ErrVolumeClosed or nil (success before
+// close), never hang.
+func TestQA_Admission_CloseRaceBothPaths(t *testing.T) {
+	var closed atomic.Bool
+	var pressure atomic.Int64
+	pressure.Store(95) // start above hard
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 2,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      closed.Load,
+	})
+
+	var wg sync.WaitGroup
+	const writers = 20
+
+	wg.Add(writers)
+	for i := 0; i < writers; i++ {
+		go func() {
+			defer wg.Done()
+			err := a.Acquire(5 * time.Second)
+			if err == nil {
+				a.Release()
+				return
+			}
+			if !errors.Is(err, ErrVolumeClosed) && !errors.Is(err, ErrWALFull) {
+				t.Errorf("unexpected error: %v", err)
+			}
+		}()
+	}
+
+	// Let writers enter the hard-watermark loop, then close.
+	time.Sleep(10 * time.Millisecond)
+	closed.Store(true)
+
+	// Wait with a hard deadline — if any goroutine hangs, this test hangs
+	// and the test framework's timeout will catch it.
+	done := make(chan struct{})
+	go func() {
+		wg.Wait()
+		close(done)
+	}()
+
+	select {
+	case <-done:
+		// All writers returned — good.
+	case <-time.After(5 * time.Second):
+		t.Fatal("deadlock: some writers did not return after close")
+	}
+}
+
+// TestQA_Admission_ZeroPressureThroughput verifies that under zero WAL
+// pressure, admission adds negligible overhead. 1000 acquire/release cycles
+// should complete in under 100ms (no sleeps, no waits).
+func TestQA_Admission_ZeroPressureThroughput(t *testing.T) {
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 64,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+
+	start := time.Now()
+	const iterations = 1000
+	for i := 0; i < iterations; i++ {
+		if err := a.Acquire(100 * time.Millisecond); err != nil {
+			t.Fatalf("Acquire %d: %v", i, err)
+		}
+		a.Release()
+	}
+	elapsed := time.Since(start)
+
+	if elapsed > 100*time.Millisecond {
+		t.Fatalf("zero-pressure throughput too slow: %d ops in %v (expected < 100ms)", iterations, elapsed)
+	}
+	t.Logf("zero-pressure: %d acquire/release cycles in %v", iterations, elapsed)
+}
+
+// TestQA_Admission_NotifyFnPanicRecovery verifies that if notifyFn panics
+// (flusher bug), the panic propagates — we do NOT silently swallow it.
+// This test documents the contract: notifyFn must not panic.
+func TestQA_Admission_NotifyFnPanicPropagates(t *testing.T) {
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.8 }, // soft zone triggers notify
+		NotifyFn:      func() { panic("flusher bug") },
+		ClosedFn:      func() bool { return false },
+	})
+	a.sleepFn = func(d time.Duration) {}
+
+	defer func() {
+		r := recover()
+		if r == nil {
+			t.Fatal("expected panic from notifyFn to propagate")
+		}
+		if r != "flusher bug" {
+			t.Fatalf("unexpected panic value: %v", r)
+		}
+	}()
+
+	a.Acquire(100 * time.Millisecond)
+}
+
+// TestQA_Admission_WALUsedFnReturnsAboveOne tests edge case where WALUsedFn
+// returns > 1.0 (shouldn't happen, but defensive). Should be treated as
+// above hard watermark.
+func TestQA_Admission_WALUsedFnReturnsAboveOne(t *testing.T) {
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 1.5 }, // bogus value > 1.0
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+	a.sleepFn = func(d time.Duration) {} // no-op to speed up
+
+	err := a.Acquire(10 * time.Millisecond)
+	if !errors.Is(err, ErrWALFull) {
+		t.Fatalf("expected ErrWALFull for pressure > 1.0, got %v", err)
+	}
+}
+
+// TestQA_Admission_WriteLBAIntegration creates a real BlockVol and verifies
+// that concurrent writes at maximum concurrency all succeed without ErrWALFull
+// when the flusher is active and WAL is adequately sized.
+func TestQA_Admission_WriteLBAIntegration(t *testing.T) {
+	dir := t.TempDir()
+	cfg := DefaultConfig()
+	cfg.WALMaxConcurrentWrites = 4
+	cfg.FlushInterval = 5 * time.Millisecond
+	cfg.WALFullTimeout = 2 * time.Second
+
+	vol, err := CreateBlockVol(dir+"/test.blk", CreateOptions{
+		VolumeSize: 256 * 1024,  // 256KB
+		BlockSize:  4096,
+		WALSize:    128 * 1024,  // 128KB — enough for concurrent writes
+	}, cfg)
+	if err != nil {
+		t.Fatalf("CreateBlockVol: %v", err)
+	}
+	defer vol.Close()
+
+	// 16 goroutines, each writing 10 blocks concurrently.
+	// Admission control should bound to 4 concurrent, preventing WAL overflow.
+	var wg sync.WaitGroup
+	var writeErrors atomic.Int64
+	const writers = 16
+	const writesPerWriter = 10
+
+	wg.Add(writers)
+	for i := 0; i < writers; i++ {
+		go func(id int) {
+			defer wg.Done()
+			data := make([]byte, 4096)
+			data[0] = byte(id)
+			for j := 0; j < writesPerWriter; j++ {
+				lba := uint64((id*writesPerWriter + j) % 64) // 64 blocks in 256KB
+				if err := vol.WriteLBA(lba, data); err != nil {
+					writeErrors.Add(1)
+					t.Errorf("writer %d write %d: %v", id, j, err)
+				}
+			}
+		}(i)
+	}
+	wg.Wait()
+
+	if writeErrors.Load() > 0 {
+		t.Fatalf("%d writes failed — admission control should have prevented WAL overflow", writeErrors.Load())
+	}
+	t.Logf("all %d writes succeeded with maxConcurrent=4", writers*writesPerWriter)
+}
diff --git a/weed/storage/blockvol/testrunner/actions/bench.go b/weed/storage/blockvol/testrunner/actions/bench.go
new file mode 100644
index 000000000..df51eae9e
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/actions/bench.go
@@ -0,0 +1,448 @@
+package actions
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"math"
+	"sort"
+	"strconv"
+	"strings"
+
+	tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
+)
+
+// RegisterBenchActions registers benchmark-related actions.
+func RegisterBenchActions(r *tr.Registry) {
+	r.RegisterFunc("fio_json", tr.TierBlock, fioJSON)
+	r.RegisterFunc("fio_parse", tr.TierCore, fioParse)
+	r.RegisterFunc("bench_compare", tr.TierCore, benchCompare)
+	r.RegisterFunc("bench_stats", tr.TierCore, benchStats)
+}
+
+// fioJSON runs fio with JSON output. Supports numjobs for multi-queue testing.
+// Params:
+//   - device (required): block device path
+//   - rw: IO pattern (default: "randwrite")
+//   - bs: block size (default: "4k")
+//   - iodepth: queue depth per job (default: "32")
+//   - numjobs: number of parallel jobs (default: "1")
+//   - runtime: seconds (default: "60")
+//   - size: file/device size (default: "256M")
+//   - name: job name (default: "bench")
+//   - rwmixread: read percentage for randrw (optional)
+//
+// Returns: value = fio JSON output string
+func fioJSON(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	device := act.Params["device"]
+	if device == "" {
+		return nil, fmt.Errorf("fio_json: device param required")
+	}
+
+	rw := paramDefault(act.Params, "rw", "randwrite")
+	bs := paramDefault(act.Params, "bs", "4k")
+	iodepth := paramDefault(act.Params, "iodepth", "32")
+	numjobs := paramDefault(act.Params, "numjobs", "1")
+	runtime := paramDefault(act.Params, "runtime", "60")
+	size := paramDefault(act.Params, "size", "256M")
+	name := paramDefault(act.Params, "name", "bench")
+
+	node, err := getNode(actx, act.Node)
+	if err != nil {
+		return nil, err
+	}
+
+	cmd := fmt.Sprintf("fio --name=%s --filename=%s --rw=%s --bs=%s --iodepth=%s --numjobs=%s --direct=1 --ioengine=libaio --runtime=%s --time_based --size=%s --group_reporting --output-format=json",
+		name, device, rw, bs, iodepth, numjobs, runtime, size)
+
+	if rwmixread := act.Params["rwmixread"]; rwmixread != "" {
+		cmd += fmt.Sprintf(" --rwmixread=%s", rwmixread)
+	}
+
+	actx.Log("  fio %s bs=%s j=%s qd=%s %ss on %s", rw, bs, numjobs, iodepth, runtime, device)
+	stdout, stderr, code, err := node.RunRoot(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("fio_json: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": stdout}, nil
+}
+
+// fioParse extracts a specific metric from fio JSON output.
+// Params:
+//   - json_var: name of var containing fio JSON (required)
+//   - metric: one of "iops", "bw_bytes", "lat_mean_us", "lat_p50_us", "lat_p99_us", "lat_p999_us" (required)
+//   - direction: "read" or "write" (default: auto-detect from rw type)
+//
+// Returns: value = numeric string
+func fioParse(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	varName := act.Params["json_var"]
+	if varName == "" {
+		return nil, fmt.Errorf("fio_parse: json_var param required")
+	}
+	metric := act.Params["metric"]
+	if metric == "" {
+		return nil, fmt.Errorf("fio_parse: metric param required")
+	}
+
+	jsonStr := actx.Vars[varName]
+	if jsonStr == "" {
+		return nil, fmt.Errorf("fio_parse: var %q is empty", varName)
+	}
+
+	val, err := ParseFioMetric(jsonStr, metric, act.Params["direction"])
+	if err != nil {
+		return nil, fmt.Errorf("fio_parse: %w", err)
+	}
+
+	return map[string]string{"value": strconv.FormatFloat(val, 'f', 2, 64)}, nil
+}
+
+// benchCompare compares two fio results and asserts a performance gate.
+// Params:
+//   - a_var: var name for baseline (e.g. iSCSI) fio JSON (required)
+//   - b_var: var name for candidate (e.g. NVMe) fio JSON (required)
+//   - metric: metric to compare (required, same as fio_parse)
+//   - gate: minimum ratio b/a (default: "1.0" = candidate >= baseline)
+//   - warn_gate: soft threshold — ratio < gate but >= warn_gate returns success
+//     with value prefixed "WARN:" instead of hard-failing (optional)
+//   - direction: "read" or "write" (default: auto-detect)
+//
+// Returns: value = "delta_pct" (e.g. "+14.1%"), prefixed "WARN:" if in warn band.
+// Fails only if candidate/baseline < warn_gate (or < gate when warn_gate is unset).
+func benchCompare(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	aVar := act.Params["a_var"]
+	bVar := act.Params["b_var"]
+	metric := act.Params["metric"]
+	if aVar == "" || bVar == "" || metric == "" {
+		return nil, fmt.Errorf("bench_compare: a_var, b_var, metric params required")
+	}
+
+	gateStr := paramDefault(act.Params, "gate", "1.0")
+	gate, err := strconv.ParseFloat(gateStr, 64)
+	if err != nil {
+		return nil, fmt.Errorf("bench_compare: invalid gate %q: %w", gateStr, err)
+	}
+
+	// warn_gate: soft threshold below gate. If ratio is between warn_gate and gate,
+	// we return success with a "WARN:" prefix instead of hard-failing.
+	warnGate := 0.0
+	hasWarnGate := false
+	if wg := act.Params["warn_gate"]; wg != "" {
+		warnGate, err = strconv.ParseFloat(wg, 64)
+		if err != nil {
+			return nil, fmt.Errorf("bench_compare: invalid warn_gate %q: %w", wg, err)
+		}
+		hasWarnGate = true
+	}
+
+	direction := act.Params["direction"]
+
+	aJSON := actx.Vars[aVar]
+	bJSON := actx.Vars[bVar]
+	if aJSON == "" {
+		return nil, fmt.Errorf("bench_compare: var %q is empty", aVar)
+	}
+	if bJSON == "" {
+		return nil, fmt.Errorf("bench_compare: var %q is empty", bVar)
+	}
+
+	aVal, err := ParseFioMetric(aJSON, metric, direction)
+	if err != nil {
+		return nil, fmt.Errorf("bench_compare baseline (%s): %w", aVar, err)
+	}
+	bVal, err := ParseFioMetric(bJSON, metric, direction)
+	if err != nil {
+		return nil, fmt.Errorf("bench_compare candidate (%s): %w", bVar, err)
+	}
+
+	// For latency metrics, lower is better — invert the comparison.
+	isLatency := strings.HasPrefix(metric, "lat_")
+	var ratio float64
+	var deltaStr string
+
+	if aVal == 0 {
+		return nil, fmt.Errorf("bench_compare: baseline %s = 0, cannot compute ratio", metric)
+	}
+
+	if isLatency {
+		// For latency: ratio = baseline/candidate (higher is better = candidate has lower latency)
+		ratio = aVal / bVal
+		deltaPct := (aVal - bVal) / aVal * 100
+		if deltaPct >= 0 {
+			deltaStr = fmt.Sprintf("-%.1f%%", deltaPct) // latency decreased = good
+		} else {
+			deltaStr = fmt.Sprintf("+%.1f%%", -deltaPct) // latency increased = bad
+		}
+	} else {
+		// For throughput: ratio = candidate/baseline (higher is better)
+		ratio = bVal / aVal
+		deltaPct := (bVal - aVal) / aVal * 100
+		if deltaPct >= 0 {
+			deltaStr = fmt.Sprintf("+%.1f%%", deltaPct)
+		} else {
+			deltaStr = fmt.Sprintf("%.1f%%", deltaPct)
+		}
+	}
+
+	actx.Log("  %s: baseline=%.1f candidate=%.1f delta=%s ratio=%.3f gate=%.2f",
+		metric, aVal, bVal, deltaStr, ratio, gate)
+
+	if ratio < gate {
+		// If warn_gate is set and ratio >= warn_gate, return success with WARN prefix.
+		if hasWarnGate && ratio >= warnGate {
+			actx.Log("  WARN: ratio %.3f below gate %.2f but above warn_gate %.2f", ratio, gate, warnGate)
+			return map[string]string{"value": "WARN:" + deltaStr}, nil
+		}
+		return nil, fmt.Errorf("bench_compare FAIL: %s ratio=%.3f < gate=%.2f (baseline=%.1f candidate=%.1f delta=%s)",
+			metric, ratio, gate, aVal, bVal, deltaStr)
+	}
+
+	return map[string]string{"value": deltaStr}, nil
+}
+
+// --- fio JSON parsing ---
+
+// fioOutput represents the top-level fio JSON output.
+type fioOutput struct {
+	Jobs []fioJob `json:"jobs"`
+}
+
+type fioJob struct {
+	JobName string      `json:"jobname"`
+	Read    fioJobStats `json:"read"`
+	Write   fioJobStats `json:"write"`
+}
+
+type fioJobStats struct {
+	IOPS    float64    `json:"iops"`
+	BWBytes float64    `json:"bw_bytes"`
+	LatNS   fioLatency `json:"lat_ns"`
+}
+
+type fioLatency struct {
+	Mean       float64            `json:"mean"`
+	Percentile map[string]float64 `json:"percentile"`
+}
+
+// ParseFioMetric extracts a named metric from fio JSON.
+// direction: "read", "write", or "" (auto-detect: use whichever has IOPS > 0).
+// Supported metrics: "iops", "bw_bytes", "bw_mb", "lat_mean_us", "lat_p50_us", "lat_p99_us", "lat_p999_us"
+func ParseFioMetric(jsonStr, metric, direction string) (float64, error) {
+	var output fioOutput
+	if err := json.Unmarshal([]byte(jsonStr), &output); err != nil {
+		return 0, fmt.Errorf("parse fio JSON: %w", err)
+	}
+	if len(output.Jobs) == 0 {
+		return 0, fmt.Errorf("fio JSON has no jobs")
+	}
+
+	// Use first job (group_reporting merges into one).
+	job := output.Jobs[0]
+
+	// Auto-detect direction.
+	var stats fioJobStats
+	switch direction {
+	case "read":
+		stats = job.Read
+	case "write":
+		stats = job.Write
+	default:
+		if job.Write.IOPS > 0 {
+			stats = job.Write
+		} else {
+			stats = job.Read
+		}
+	}
+
+	switch metric {
+	case "iops":
+		return stats.IOPS, nil
+	case "bw_bytes":
+		return stats.BWBytes, nil
+	case "bw_mb":
+		return stats.BWBytes / (1024 * 1024), nil
+	case "lat_mean_us":
+		return stats.LatNS.Mean / 1000, nil // ns → µs
+	case "lat_p50_us":
+		return getPercentile(stats.LatNS, "50.000000") / 1000, nil
+	case "lat_p99_us":
+		return getPercentile(stats.LatNS, "99.000000") / 1000, nil
+	case "lat_p999_us":
+		return getPercentile(stats.LatNS, "99.900000") / 1000, nil
+	default:
+		return 0, fmt.Errorf("unknown metric %q", metric)
+	}
+}
+
+func getPercentile(lat fioLatency, key string) float64 {
+	if lat.Percentile == nil {
+		return 0
+	}
+	return lat.Percentile[key]
+}
+
+// benchStats computes statistics from a comma-separated list of values.
+// Useful for aggregating results from multiple runs outside the phase repeat system.
+// Params:
+//   - values_var: name of var containing comma-separated numeric values (required)
+//   - trim_pct: percentage of outliers to trim from each end (default: "20")
+//   - label: label for log output (default: "bench_stats")
+//
+// Returns: value = median. Also sets {save_as}_mean, _stddev, _min, _max, _n.
+func benchStats(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	varName := act.Params["values_var"]
+	if varName == "" {
+		return nil, fmt.Errorf("bench_stats: values_var param required")
+	}
+	valStr := actx.Vars[varName]
+	if valStr == "" {
+		return nil, fmt.Errorf("bench_stats: var %q is empty", varName)
+	}
+
+	trimPct := 20
+	if tp := act.Params["trim_pct"]; tp != "" {
+		if v, err := strconv.Atoi(tp); err == nil {
+			trimPct = v
+		}
+	}
+	label := act.Params["label"]
+	if label == "" {
+		label = "bench_stats"
+	}
+
+	// Parse comma-separated values.
+	parts := strings.Split(valStr, ",")
+	var values []float64
+	for _, p := range parts {
+		p = strings.TrimSpace(p)
+		if p == "" {
+			continue
+		}
+		f, err := strconv.ParseFloat(p, 64)
+		if err != nil {
+			return nil, fmt.Errorf("bench_stats: invalid value %q in %s: %w", p, varName, err)
+		}
+		values = append(values, f)
+	}
+	if len(values) == 0 {
+		return nil, fmt.Errorf("bench_stats: no numeric values in %s", varName)
+	}
+
+	// Trim outliers and compute stats.
+	trimmed := trimValues(values, trimPct)
+	stats := tr.ComputeStats(trimmed)
+
+	actx.Log("  [%s] n=%d median=%.2f mean=%.2f stddev=%.2f min=%.2f max=%.2f (trimmed %d%% from %d)",
+		label, stats.Count, stats.P50, stats.Mean, stats.StdDev, stats.Min, stats.Max, trimPct, len(values))
+
+	result := map[string]string{
+		"value": strconv.FormatFloat(stats.P50, 'f', 2, 64),
+	}
+
+	// Store detailed stats as __-prefixed vars for auto-propagation.
+	if act.SaveAs != "" {
+		actx.Vars[act.SaveAs+"_mean"] = strconv.FormatFloat(stats.Mean, 'f', 2, 64)
+		actx.Vars[act.SaveAs+"_stddev"] = strconv.FormatFloat(stats.StdDev, 'f', 2, 64)
+		actx.Vars[act.SaveAs+"_min"] = strconv.FormatFloat(stats.Min, 'f', 2, 64)
+		actx.Vars[act.SaveAs+"_max"] = strconv.FormatFloat(stats.Max, 'f', 2, 64)
+		actx.Vars[act.SaveAs+"_n"] = strconv.Itoa(stats.Count)
+	}
+
+	return result, nil
+}
+
+// trimValues removes the top and bottom pct% of values.
+func trimValues(values []float64, pct int) []float64 {
+	if len(values) <= 2 || pct <= 0 {
+		return values
+	}
+	sorted := make([]float64, len(values))
+	copy(sorted, values)
+	sort.Float64s(sorted)
+
+	trim := int(math.Round(float64(len(sorted)) * float64(pct) / 100.0))
+	if trim*2 >= len(sorted) {
+		trim = (len(sorted) - 1) / 2
+	}
+	return sorted[trim : len(sorted)-trim]
+}
+
+func paramDefault(params map[string]string, key, def string) string {
+	if v := params[key]; v != "" {
+		return v
+	}
+	return def
+}
+
+// FormatBenchReport generates a human-readable A/B comparison table.
+// results is a list of {workload, metric, baselineVal, candidateVal, deltaPct, gate, pass}.
+func FormatBenchReport(results []BenchResult) string {
+	var b strings.Builder
+	b.WriteString(fmt.Sprintf("%-24s | %12s | %12s | %8s | %s\n", "Workload", "Baseline", "Candidate", "Delta", "Gate"))
+	b.WriteString(strings.Repeat("-", 76) + "\n")
+	for _, r := range results {
+		status := "PASS"
+		if !r.Pass {
+			status = "FAIL"
+			if r.Ratio >= 0.9 {
+				status = "WARN"
+			}
+		}
+		b.WriteString(fmt.Sprintf("%-24s | %12.1f | %12.1f | %7s | %s\n",
+			r.Workload, r.Baseline, r.Candidate, r.Delta, status))
+	}
+	return b.String()
+}
+
+// BenchResult holds one row of A/B comparison.
+type BenchResult struct {
+	Workload  string
+	Metric    string
+	Baseline  float64
+	Candidate float64
+	Delta     string
+	Ratio     float64
+	Gate      float64
+	Pass      bool
+}
+
+// ComputeBenchResult computes a single A/B comparison row.
+func ComputeBenchResult(workload, metric string, baseline, candidate, gate float64) BenchResult {
+	isLatency := strings.HasPrefix(metric, "lat_")
+	var ratio float64
+	var delta string
+
+	if baseline == 0 {
+		return BenchResult{Workload: workload, Metric: metric, Pass: false, Delta: "N/A"}
+	}
+
+	if isLatency {
+		ratio = baseline / candidate
+		deltaPct := (baseline - candidate) / baseline * 100
+		if deltaPct >= 0 {
+			delta = fmt.Sprintf("-%.1f%%", deltaPct)
+		} else {
+			delta = fmt.Sprintf("+%.1f%%", math.Abs(deltaPct))
+		}
+	} else {
+		ratio = candidate / baseline
+		deltaPct := (candidate - baseline) / baseline * 100
+		if deltaPct >= 0 {
+			delta = fmt.Sprintf("+%.1f%%", deltaPct)
+		} else {
+			delta = fmt.Sprintf("%.1f%%", deltaPct)
+		}
+	}
+
+	return BenchResult{
+		Workload:  workload,
+		Metric:    metric,
+		Baseline:  baseline,
+		Candidate: candidate,
+		Delta:     delta,
+		Ratio:     ratio,
+		Gate:      gate,
+		Pass:      ratio >= gate,
+	}
+}
diff --git a/weed/storage/blockvol/testrunner/actions/bench_test.go b/weed/storage/blockvol/testrunner/actions/bench_test.go
new file mode 100644
index 000000000..c4dd7eeb9
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/actions/bench_test.go
@@ -0,0 +1,365 @@
+package actions
+
+import (
+	"math"
+	"testing"
+)
+
+// Realistic fio JSON output for testing parse logic.
+const fioWriteJSON = `{
+  "fio version": "fio-3.33",
+  "jobs": [{
+    "jobname": "bench",
+    "read": {
+      "iops": 0,
+      "bw_bytes": 0,
+      "lat_ns": {"mean": 0, "percentile": {}}
+    },
+    "write": {
+      "iops": 49832.5,
+      "bw_bytes": 204113920,
+      "lat_ns": {
+        "mean": 19823.4,
+        "percentile": {
+          "50.000000": 18000,
+          "99.000000": 45000,
+          "99.900000": 82000
+        }
+      }
+    }
+  }]
+}`
+
+const fioReadJSON = `{
+  "jobs": [{
+    "jobname": "bench",
+    "read": {
+      "iops": 62100.0,
+      "bw_bytes": 254361600,
+      "lat_ns": {
+        "mean": 15200.0,
+        "percentile": {
+          "50.000000": 14000,
+          "99.000000": 32000,
+          "99.900000": 58000
+        }
+      }
+    },
+    "write": {
+      "iops": 0,
+      "bw_bytes": 0,
+      "lat_ns": {"mean": 0, "percentile": {}}
+    }
+  }]
+}`
+
+const fioMixedJSON = `{
+  "jobs": [{
+    "jobname": "bench",
+    "read": {
+      "iops": 35000.0,
+      "bw_bytes": 143360000,
+      "lat_ns": {
+        "mean": 22000.0,
+        "percentile": {
+          "50.000000": 20000,
+          "99.000000": 55000,
+          "99.900000": 95000
+        }
+      }
+    },
+    "write": {
+      "iops": 15000.0,
+      "bw_bytes": 61440000,
+      "lat_ns": {
+        "mean": 28000.0,
+        "percentile": {
+          "50.000000": 25000,
+          "99.000000": 65000,
+          "99.900000": 120000
+        }
+      }
+    }
+  }]
+}`
+
+func TestParseFioMetric_WriteIOPS(t *testing.T) {
+	val, err := ParseFioMetric(fioWriteJSON, "iops", "")
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	if val != 49832.5 {
+		t.Fatalf("iops = %f, want 49832.5", val)
+	}
+}
+
+func TestParseFioMetric_WriteBW(t *testing.T) {
+	val, err := ParseFioMetric(fioWriteJSON, "bw_mb", "")
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	expected := 204113920.0 / (1024 * 1024)
+	if math.Abs(val-expected) > 0.1 {
+		t.Fatalf("bw_mb = %f, want %f", val, expected)
+	}
+}
+
+func TestParseFioMetric_WriteLatency(t *testing.T) {
+	val, err := ParseFioMetric(fioWriteJSON, "lat_mean_us", "")
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	expected := 19823.4 / 1000 // ns to µs
+	if math.Abs(val-expected) > 0.01 {
+		t.Fatalf("lat_mean_us = %f, want %f", val, expected)
+	}
+}
+
+func TestParseFioMetric_WriteP99(t *testing.T) {
+	val, err := ParseFioMetric(fioWriteJSON, "lat_p99_us", "")
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	expected := 45000.0 / 1000 // 45 µs
+	if math.Abs(val-expected) > 0.01 {
+		t.Fatalf("lat_p99_us = %f, want %f", val, expected)
+	}
+}
+
+func TestParseFioMetric_ReadIOPS(t *testing.T) {
+	val, err := ParseFioMetric(fioReadJSON, "iops", "")
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	if val != 62100.0 {
+		t.Fatalf("iops = %f, want 62100.0", val)
+	}
+}
+
+func TestParseFioMetric_ExplicitDirection(t *testing.T) {
+	// Mixed workload, explicitly request read.
+	val, err := ParseFioMetric(fioMixedJSON, "iops", "read")
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	if val != 35000.0 {
+		t.Fatalf("read iops = %f, want 35000.0", val)
+	}
+
+	// Explicitly request write.
+	val, err = ParseFioMetric(fioMixedJSON, "iops", "write")
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	if val != 15000.0 {
+		t.Fatalf("write iops = %f, want 15000.0", val)
+	}
+}
+
+func TestParseFioMetric_AutoDetect(t *testing.T) {
+	// Write-only JSON: auto should pick write.
+	val, err := ParseFioMetric(fioWriteJSON, "iops", "")
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	if val != 49832.5 {
+		t.Fatalf("auto-detect write: iops = %f, want 49832.5", val)
+	}
+
+	// Read-only JSON: auto should pick read (write IOPS=0).
+	val, err = ParseFioMetric(fioReadJSON, "iops", "")
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	if val != 62100.0 {
+		t.Fatalf("auto-detect read: iops = %f, want 62100.0", val)
+	}
+}
+
+func TestParseFioMetric_UnknownMetric(t *testing.T) {
+	_, err := ParseFioMetric(fioWriteJSON, "nonexistent", "")
+	if err == nil {
+		t.Fatal("expected error for unknown metric")
+	}
+}
+
+func TestParseFioMetric_InvalidJSON(t *testing.T) {
+	_, err := ParseFioMetric("not json", "iops", "")
+	if err == nil {
+		t.Fatal("expected error for invalid JSON")
+	}
+}
+
+func TestParseFioMetric_EmptyJobs(t *testing.T) {
+	_, err := ParseFioMetric(`{"jobs":[]}`, "iops", "")
+	if err == nil {
+		t.Fatal("expected error for empty jobs")
+	}
+}
+
+func TestComputeBenchResult_ThroughputPass(t *testing.T) {
+	r := ComputeBenchResult("4k-randwrite", "iops", 49000, 52000, 1.0)
+	if !r.Pass {
+		t.Fatalf("expected pass: ratio=%.3f", r.Ratio)
+	}
+	if r.Ratio < 1.0 {
+		t.Fatalf("ratio = %.3f, want >= 1.0", r.Ratio)
+	}
+}
+
+func TestComputeBenchResult_ThroughputFail(t *testing.T) {
+	r := ComputeBenchResult("4k-randwrite", "iops", 49000, 40000, 1.0)
+	if r.Pass {
+		t.Fatal("expected fail: candidate < baseline")
+	}
+}
+
+func TestComputeBenchResult_ThroughputWarn(t *testing.T) {
+	// candidate = 92% of baseline, gate = 1.0 → fail but ratio >= 0.9
+	r := ComputeBenchResult("4k-randwrite", "iops", 50000, 46000, 1.0)
+	if r.Pass {
+		t.Fatal("expected fail")
+	}
+	if r.Ratio < 0.9 {
+		t.Fatalf("ratio = %.3f, expected >= 0.9 for WARN", r.Ratio)
+	}
+}
+
+func TestComputeBenchResult_LatencyPass(t *testing.T) {
+	// Latency: lower candidate is better. baseline=45µs, candidate=32µs → good.
+	r := ComputeBenchResult("4k-randwrite", "lat_p99_us", 45.0, 32.0, 1.0)
+	if !r.Pass {
+		t.Fatalf("expected pass: candidate latency lower. ratio=%.3f", r.Ratio)
+	}
+	// Ratio should be baseline/candidate = 45/32 ≈ 1.406
+	if r.Ratio < 1.0 {
+		t.Fatalf("ratio = %.3f, want > 1.0 (latency decreased)", r.Ratio)
+	}
+}
+
+func TestComputeBenchResult_LatencyFail(t *testing.T) {
+	// Latency: candidate is higher → bad.
+	r := ComputeBenchResult("4k-randwrite", "lat_p99_us", 45.0, 60.0, 1.0)
+	if r.Pass {
+		t.Fatal("expected fail: candidate latency higher")
+	}
+}
+
+func TestComputeBenchResult_ZeroBaseline(t *testing.T) {
+	r := ComputeBenchResult("test", "iops", 0, 100, 1.0)
+	if r.Pass {
+		t.Fatal("expected fail with zero baseline")
+	}
+}
+
+func TestFormatBenchReport(t *testing.T) {
+	results := []BenchResult{
+		ComputeBenchResult("4k-rw j=1 qd=1", "iops", 12000, 14000, 1.0),
+		ComputeBenchResult("4k-rw j=4 qd=32", "iops", 49000, 62000, 1.0),
+		ComputeBenchResult("4k-rw j=4 qd=32", "lat_p99_us", 45.0, 32.0, 1.0),
+	}
+
+	report := FormatBenchReport(results)
+	if report == "" {
+		t.Fatal("empty report")
+	}
+	// Should contain all three workloads.
+	for _, r := range results {
+		if !contains(report, r.Workload) {
+			t.Errorf("report missing workload %q", r.Workload)
+		}
+	}
+	// All should pass.
+	for _, r := range results {
+		if !r.Pass {
+			t.Errorf("expected pass for %s", r.Workload)
+		}
+	}
+}
+
+func contains(s, substr string) bool {
+	return len(s) > 0 && len(substr) > 0 && findSubstr(s, substr)
+}
+
+func findSubstr(s, substr string) bool {
+	for i := 0; i <= len(s)-len(substr); i++ {
+		if s[i:i+len(substr)] == substr {
+			return true
+		}
+	}
+	return false
+}
+
+func TestParsePgbenchTPS(t *testing.T) {
+	tests := []struct {
+		name   string
+		output string
+		want   string
+	}{
+		{
+			"standard TPC-B output",
+			`pgbench (PostgreSQL 16.1)
+starting vacuum...end.
+transaction type: <builtin: TPC-B (sort of)>
+scaling factor: 10
+query mode: simple
+number of clients: 16
+number of threads: 16
+maximum number of seconds of each test: 30
+number of transactions actually processed: 45678
+number of failed transactions: 0 (0.000%)
+latency average = 10.500 ms
+initial connection time = 12.345 ms
+tps = 1522.600000 (without initial connection time)`,
+			"1522.600000",
+		},
+		{
+			"select only",
+			`tps = 89456.123456 (without initial connection time)`,
+			"89456.123456",
+		},
+		{
+			"no match",
+			"some random output",
+			"",
+		},
+		{
+			"skip initial connection line",
+			`initial connection time = 5.678 ms
+tps = 2345.678901 (without initial connection time)`,
+			"2345.678901",
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := parsePgbenchTPS(tt.output)
+			if got != tt.want {
+				t.Errorf("parsePgbenchTPS() = %q, want %q", got, tt.want)
+			}
+		})
+	}
+}
+
+func TestTrimValues(t *testing.T) {
+	// 10 values, trim 20% = remove 2 from each end, keep 6
+	values := []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}
+	trimmed := trimValues(values, 20)
+	if len(trimmed) != 6 {
+		t.Fatalf("trimValues(10, 20%%) = %d values, want 6", len(trimmed))
+	}
+	// Should be [3, 4, 5, 6, 7, 8]
+	if trimmed[0] != 3 || trimmed[len(trimmed)-1] != 8 {
+		t.Errorf("trimmed = %v, want [3..8]", trimmed)
+	}
+}
+
+func TestTargetSpecNQN(t *testing.T) {
+	// Test is in actions package — import testrunner types.
+	// TargetSpec is in testrunner package, so we test the NQN suffix logic
+	// by verifying the format.
+	nqn := "nqn.2024-01.com.seaweedfs:vol." + "bench-vol"
+	if nqn != "nqn.2024-01.com.seaweedfs:vol.bench-vol" {
+		t.Fatalf("NQN format wrong: %s", nqn)
+	}
+}
diff --git a/weed/storage/blockvol/testrunner/actions/block.go b/weed/storage/blockvol/testrunner/actions/block.go
index 748d2cd3c..206db8246 100644
--- a/weed/storage/blockvol/testrunner/actions/block.go
+++ b/weed/storage/blockvol/testrunner/actions/block.go
@@ -277,8 +277,9 @@ func killStale(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[
 		process = "iscsi-target-test"
 	}
 
-	// Kill all matching processes.
-	cmd := fmt.Sprintf("pkill -9 -f '%s' 2>/dev/null; sleep 0.5; pgrep -f '%s' || echo 'all_killed'", process, process)
+	// Kill all matching processes. Use pidof (matches binary name, not args)
+	// to avoid killing sw-test-runner itself (whose -bin arg contains the process name).
+	cmd := fmt.Sprintf("pidof %s 2>/dev/null | xargs -r kill -9 2>/dev/null; sleep 0.5; pidof %s || echo 'all_killed'", process, process)
 	stdout, _, _, _ := node.Run(ctx, cmd)
 	actx.Log("  kill_stale %s: %s", process, strings.TrimSpace(stdout))
 
@@ -288,6 +289,12 @@ func killStale(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[
 		actx.Log("  cleaned stale iSCSI sessions")
 	}
 
+	// Clean up stale fillfiles from previous fault-disk-full tests.
+	node.RunRoot(ctx, "rm -f /tmp/fillfile 2>/dev/null")
+
+	// Clean up stale volume files from previous crashed runs.
+	node.Run(ctx, "rm -f /tmp/blockvol-*.blk /tmp/blockvol-*.blk.wal /tmp/blockvol-*.blk.snap.* 2>/dev/null")
+
 	return nil, nil
 }
 
diff --git a/weed/storage/blockvol/testrunner/actions/database.go b/weed/storage/blockvol/testrunner/actions/database.go
index b479843c4..c7eff7b8b 100644
--- a/weed/storage/blockvol/testrunner/actions/database.go
+++ b/weed/storage/blockvol/testrunner/actions/database.go
@@ -3,17 +3,21 @@ package actions
 import (
 	"context"
 	"fmt"
+	"regexp"
 	"strings"
 
 	tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
 )
 
-// RegisterDatabaseActions registers SQLite database actions.
+// RegisterDatabaseActions registers SQLite and PostgreSQL database actions.
 func RegisterDatabaseActions(r *tr.Registry) {
 	r.RegisterFunc("sqlite_create_db", tr.TierBlock, sqliteCreateDB)
 	r.RegisterFunc("sqlite_insert_rows", tr.TierBlock, sqliteInsertRows)
 	r.RegisterFunc("sqlite_count_rows", tr.TierBlock, sqliteCountRows)
 	r.RegisterFunc("sqlite_integrity_check", tr.TierBlock, sqliteIntegrityCheck)
+	r.RegisterFunc("pgbench_init", tr.TierBlock, pgbenchInit)
+	r.RegisterFunc("pgbench_run", tr.TierBlock, pgbenchRun)
+	r.RegisterFunc("pgbench_cleanup", tr.TierBlock, pgbenchCleanup)
 }
 
 // sqliteCreateDB creates a SQLite database with WAL mode and a test table.
@@ -130,3 +134,193 @@ func sqliteIntegrityCheck(ctx context.Context, actx *tr.ActionContext, act tr.Ac
 
 	return nil, nil
 }
+
+// pgbenchInit initializes a PostgreSQL instance on a block device for benchmarking.
+// Params:
+//   - device (required): block device to format and mount
+//   - mount (default: "/mnt/pgbench"): mount point
+//   - port (default: "5434"): PostgreSQL port
+//   - scale (default: "10"): pgbench scale factor
+//   - fstype (default: "ext4"): filesystem type
+//   - pg_bin (default: "/usr/lib/postgresql/16/bin"): PostgreSQL binary directory
+//
+// Returns: value = "ready"
+func pgbenchInit(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	device := act.Params["device"]
+	if device == "" {
+		return nil, fmt.Errorf("pgbench_init: device param required")
+	}
+
+	mount := paramDefault(act.Params, "mount", "/mnt/pgbench")
+	port := paramDefault(act.Params, "port", "5434")
+	scale := paramDefault(act.Params, "scale", "10")
+	fstype := paramDefault(act.Params, "fstype", "ext4")
+	pgBin := paramDefault(act.Params, "pg_bin", "/usr/lib/postgresql/16/bin")
+
+	node, err := getNode(actx, act.Node)
+	if err != nil {
+		return nil, err
+	}
+
+	pgdata := mount + "/pgdata"
+
+	// Format, mount, init PostgreSQL, start, create bench DB, run pgbench -i.
+	script := fmt.Sprintf(`set -e
+# Stop any previous instance
+sudo -u postgres %s/pg_ctl -D %s stop 2>/dev/null || true
+sleep 1
+# Format and mount
+mkfs.%s -F %s > /dev/null 2>&1
+mkdir -p %s
+mount %s %s
+# Init PostgreSQL
+mkdir -p %s
+chown postgres:postgres %s
+sudo -u postgres %s/initdb -D %s > /dev/null 2>&1
+echo "listen_addresses = '127.0.0.1'" >> %s/postgresql.conf
+echo "port = %s" >> %s/postgresql.conf
+echo "unix_socket_directories = '/tmp'" >> %s/postgresql.conf
+echo "shared_buffers = 256MB" >> %s/postgresql.conf
+echo "effective_cache_size = 512MB" >> %s/postgresql.conf
+echo "work_mem = 4MB" >> %s/postgresql.conf
+echo "wal_buffers = 16MB" >> %s/postgresql.conf
+echo "max_connections = 200" >> %s/postgresql.conf
+chown -R postgres:postgres %s
+# Start
+sudo -u postgres %s/pg_ctl -D %s -l %s/logfile start
+sleep 3
+# Create DB and init pgbench
+sudo -u postgres %s/createdb -h /tmp -p %s benchdb 2>/dev/null || true
+sudo -u postgres pgbench -h /tmp -i -s %s -p %s benchdb 2>&1 | tail -3
+echo PGBENCH_INIT_OK`,
+		pgBin, pgdata,
+		fstype, device,
+		mount,
+		device, mount,
+		pgdata,
+		pgdata,
+		pgBin, pgdata,
+		pgdata, port, pgdata, pgdata,
+		pgdata, pgdata, pgdata, pgdata, pgdata,
+		pgdata,
+		pgBin, pgdata, pgdata,
+		pgBin, port,
+		scale, port,
+	)
+
+	actx.Log("  pgbench_init: %s on %s port=%s scale=%s", fstype, device, port, scale)
+	stdout, stderr, code, err := node.RunRoot(ctx, fmt.Sprintf("bash -c '%s'", strings.ReplaceAll(script, "'", "'\\''")))
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("pgbench_init: code=%d stderr=%s err=%v stdout=%s", code, stderr, err, stdout)
+	}
+	if !strings.Contains(stdout, "PGBENCH_INIT_OK") {
+		return nil, fmt.Errorf("pgbench_init: init did not complete: %s", stdout)
+	}
+
+	// Save state for pgbench_run and pgbench_cleanup.
+	actx.Vars["__pgbench_mount"] = mount
+	actx.Vars["__pgbench_port"] = port
+	actx.Vars["__pgbench_pgbin"] = pgBin
+	actx.Vars["__pgbench_pgdata"] = pgdata
+
+	return map[string]string{"value": "ready"}, nil
+}
+
+// pgbenchRun executes a pgbench workload and returns the TPS.
+// Params:
+//   - clients (default: "1"): number of concurrent clients
+//   - duration (default: "30"): run time in seconds
+//   - select_only (default: "false"): if "true", run SELECT-only workload (-S)
+//   - port: override port (default: uses __pgbench_port from pgbench_init)
+//
+// Returns: value = TPS (numeric string, e.g. "1234.56")
+func pgbenchRun(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	port := act.Params["port"]
+	if port == "" {
+		port = actx.Vars["__pgbench_port"]
+	}
+	if port == "" {
+		port = "5434"
+	}
+
+	clients := paramDefault(act.Params, "clients", "1")
+	duration := paramDefault(act.Params, "duration", "30")
+	selectOnly := act.Params["select_only"] == "true"
+
+	node, err := getNode(actx, act.Node)
+	if err != nil {
+		return nil, err
+	}
+
+	cmd := fmt.Sprintf("sudo -u postgres pgbench -h /tmp -c %s -j %s -T %s -p %s",
+		clients, clients, duration, port)
+	if selectOnly {
+		cmd += " -S"
+	}
+	cmd += " benchdb"
+
+	mode := "TPC-B"
+	if selectOnly {
+		mode = "SELECT-only"
+	}
+	actx.Log("  pgbench %s c=%s %ss", mode, clients, duration)
+	stdout, stderr, code, err := node.RunRoot(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("pgbench_run: code=%d stderr=%s stdout=%s err=%v", code, stderr, stdout, err)
+	}
+
+	// Parse TPS from pgbench output. Look for "tps = NNNN.NN" (excluding initial connection).
+	tps := parsePgbenchTPS(stdout)
+	if tps == "" {
+		return nil, fmt.Errorf("pgbench_run: could not parse TPS from output: %s", stdout)
+	}
+
+	actx.Log("  pgbench %s c=%s: %s TPS", mode, clients, tps)
+	return map[string]string{"value": tps}, nil
+}
+
+// pgbenchCleanup stops PostgreSQL and unmounts the device.
+// Uses state saved by pgbench_init (__pgbench_mount, __pgbench_pgbin, __pgbench_pgdata).
+func pgbenchCleanup(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	mount := actx.Vars["__pgbench_mount"]
+	pgBin := actx.Vars["__pgbench_pgbin"]
+	pgdata := actx.Vars["__pgbench_pgdata"]
+
+	if mount == "" {
+		mount = "/mnt/pgbench"
+	}
+	if pgBin == "" {
+		pgBin = "/usr/lib/postgresql/16/bin"
+	}
+	if pgdata == "" {
+		pgdata = mount + "/pgdata"
+	}
+
+	node, err := getNode(actx, act.Node)
+	if err != nil {
+		return nil, err
+	}
+
+	cmd := fmt.Sprintf("sudo -u postgres %s/pg_ctl -D %s stop 2>/dev/null; sleep 1; umount %s 2>/dev/null; true",
+		pgBin, pgdata, mount)
+	node.RunRoot(ctx, cmd)
+	return nil, nil
+}
+
+// parsePgbenchTPS extracts TPS from pgbench output.
+// Matches "tps = 1234.567890" (excluding "initial connection time" lines).
+var pgbenchTPSPattern = regexp.MustCompile(`tps = ([\d.]+)\s+\(`)
+
+func parsePgbenchTPS(output string) string {
+	lines := strings.Split(output, "\n")
+	for _, line := range lines {
+		// Skip "initial connection time = X.XX ms" lines (no TPS).
+		if strings.Contains(line, "initial connection time") && !strings.Contains(line, "tps") {
+			continue
+		}
+		if m := pgbenchTPSPattern.FindStringSubmatch(line); len(m) > 1 {
+			return m[1]
+		}
+	}
+	return ""
+}
diff --git a/weed/storage/blockvol/testrunner/actions/devops_test.go b/weed/storage/blockvol/testrunner/actions/devops_test.go
index 955f82f24..1e27003fe 100644
--- a/weed/storage/blockvol/testrunner/actions/devops_test.go
+++ b/weed/storage/blockvol/testrunner/actions/devops_test.go
@@ -77,11 +77,11 @@ func TestAllActions_Registration(t *testing.T) {
 	byTier := registry.ListByTier()
 
 	// Verify tier counts.
-	if n := len(byTier[tr.TierCore]); n != 8 {
-		t.Errorf("core: %d, want 8", n)
+	if n := len(byTier[tr.TierCore]); n != 11 {
+		t.Errorf("core: %d, want 11", n)
 	}
-	if n := len(byTier[tr.TierBlock]); n != 44 {
-		t.Errorf("block: %d, want 44", n)
+	if n := len(byTier[tr.TierBlock]); n != 52 {
+		t.Errorf("block: %d, want 52", n)
 	}
 	if n := len(byTier[tr.TierDevOps]); n != 7 {
 		t.Errorf("devops: %d, want 7", n)
@@ -89,13 +89,71 @@ func TestAllActions_Registration(t *testing.T) {
 	if n := len(byTier[tr.TierChaos]); n != 5 {
 		t.Errorf("chaos: %d, want 5", n)
 	}
+	if n := len(byTier[TierK8s]); n != 14 {
+		t.Errorf("k8s: %d, want 14", n)
+	}
 
-	// Total should be 64.
+	// Total should be 89 (85 existing + 3 pgbench + 1 bench_stats).
 	total := 0
 	for _, actions := range byTier {
 		total += len(actions)
 	}
-	if total != 64 {
-		t.Errorf("total actions: %d, want 64", total)
+	if total != 89 {
+		t.Errorf("total actions: %d, want 89", total)
+	}
+}
+
+func TestK8sActions_Registration(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterK8sActions(registry)
+
+	expected := []string{
+		"kubectl_apply",
+		"kubectl_delete",
+		"kubectl_get_field",
+		"kubectl_wait_condition",
+		"kubectl_set_image",
+		"kubectl_assert_exists",
+		"kubectl_assert_not_exists",
+		"kubectl_logs",
+		"kubectl_rollout_status",
+		"kubectl_exec",
+		"kubectl_delete_pod",
+		"kubectl_pod_ready_count",
+		"kubectl_label",
+		"kubectl_get_condition",
+	}
+
+	for _, name := range expected {
+		if _, err := registry.Get(name); err != nil {
+			t.Errorf("action %q not registered: %v", name, err)
+		}
+	}
+
+	byTier := registry.ListByTier()
+	if n := len(byTier[TierK8s]); n != 14 {
+		t.Errorf("k8s tier has %d actions, want 14", n)
+	}
+}
+
+func TestK8sActions_TierGating(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterK8sActions(registry)
+
+	// Without gating, all should be accessible.
+	if _, err := registry.Get("kubectl_apply"); err != nil {
+		t.Errorf("ungated: %v", err)
+	}
+
+	// Enable only core tier — k8s should be blocked.
+	registry.EnableTiers([]string{tr.TierCore})
+	if _, err := registry.Get("kubectl_apply"); err == nil {
+		t.Error("expected error when k8s tier is disabled")
+	}
+
+	// Enable k8s tier — should work again.
+	registry.EnableTiers([]string{TierK8s})
+	if _, err := registry.Get("kubectl_apply"); err != nil {
+		t.Errorf("k8s enabled: %v", err)
 	}
 }
diff --git a/weed/storage/blockvol/testrunner/actions/k8s.go b/weed/storage/blockvol/testrunner/actions/k8s.go
new file mode 100644
index 000000000..74ac5131c
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/actions/k8s.go
@@ -0,0 +1,540 @@
+package actions
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"time"
+
+	tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner/infra"
+)
+
+// TierK8s is the tier for Kubernetes/operator actions.
+const TierK8s = "k8s"
+
+// getK8sNode returns the node and resolved kubectl binary for k8s actions.
+// Tries: kubectl, sudo k3s kubectl. Caches per node.
+func getK8sNode(ctx context.Context, actx *tr.ActionContext, nodeName string) (*infra.Node, string, error) {
+	node, err := getNode(actx, nodeName)
+	if err != nil {
+		return nil, "", err
+	}
+
+	cacheKey := "__kubectl_" + nodeName
+	if cached := actx.Vars[cacheKey]; cached != "" {
+		return node, cached, nil
+	}
+
+	// Try kubectl first.
+	_, _, code, _ := node.Run(ctx, "which kubectl 2>/dev/null")
+	if code == 0 {
+		actx.Vars[cacheKey] = "kubectl"
+		return node, "kubectl", nil
+	}
+
+	// Try k3s kubectl (needs sudo on most installs).
+	_, _, code, _ = node.Run(ctx, "sudo k3s kubectl version --client 2>/dev/null")
+	if code == 0 {
+		actx.Vars[cacheKey] = "sudo k3s kubectl"
+		return node, "sudo k3s kubectl", nil
+	}
+
+	// Fallback.
+	actx.Vars[cacheKey] = "kubectl"
+	return node, "kubectl", nil
+}
+
+// RegisterK8sActions registers Kubernetes/operator actions.
+// These actions run kubectl commands on a node with cluster access.
+func RegisterK8sActions(r *tr.Registry) {
+	r.RegisterFunc("kubectl_apply", TierK8s, kubectlApply)
+	r.RegisterFunc("kubectl_delete", TierK8s, kubectlDelete)
+	r.RegisterFunc("kubectl_get_field", TierK8s, kubectlGetField)
+	r.RegisterFunc("kubectl_wait_condition", TierK8s, kubectlWaitCondition)
+	r.RegisterFunc("kubectl_set_image", TierK8s, kubectlSetImage)
+	r.RegisterFunc("kubectl_assert_exists", TierK8s, kubectlAssertExists)
+	r.RegisterFunc("kubectl_assert_not_exists", TierK8s, kubectlAssertNotExists)
+	r.RegisterFunc("kubectl_logs", TierK8s, kubectlLogs)
+	r.RegisterFunc("kubectl_rollout_status", TierK8s, kubectlRolloutStatus)
+	r.RegisterFunc("kubectl_exec", TierK8s, kubectlExec)
+	r.RegisterFunc("kubectl_delete_pod", TierK8s, kubectlDeletePod)
+	r.RegisterFunc("kubectl_pod_ready_count", TierK8s, kubectlPodReadyCount)
+	r.RegisterFunc("kubectl_label", TierK8s, kubectlLabel)
+	r.RegisterFunc("kubectl_get_condition", TierK8s, kubectlGetCondition)
+}
+
+// kubectlApply applies a YAML manifest.
+// Params: file (path to YAML file) OR manifest (inline YAML content), namespace (optional)
+func kubectlApply(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_apply: %w", err)
+	}
+
+	var cmd string
+	if file := act.Params["file"]; file != "" {
+		cmd = fmt.Sprintf("%s apply -f %s", kctl, file)
+	} else if manifest := act.Params["manifest"]; manifest != "" {
+		cmd = fmt.Sprintf("cat <<'SWEOF' | %s apply -f -\n%s\nSWEOF", kctl, manifest)
+	} else {
+		return nil, fmt.Errorf("kubectl_apply: file or manifest param required")
+	}
+
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_apply: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlDelete deletes a Kubernetes resource.
+// Params: resource (e.g. "deployment/foo"), namespace (optional), wait (optional, "true" to wait)
+func kubectlDelete(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	resource := act.Params["resource"]
+	if resource == "" {
+		return nil, fmt.Errorf("kubectl_delete: resource param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_delete: %w", err)
+	}
+
+	cmd := fmt.Sprintf("%s delete %s", kctl, resource)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+	if act.Params["wait"] == "true" {
+		cmd += " --wait=true"
+	}
+	cmd += " --ignore-not-found"
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_delete: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlGetField gets a jsonpath field from a resource.
+// Params: resource, jsonpath, namespace (optional)
+func kubectlGetField(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	resource := act.Params["resource"]
+	if resource == "" {
+		return nil, fmt.Errorf("kubectl_get_field: resource param required")
+	}
+	jsonpath := act.Params["jsonpath"]
+	if jsonpath == "" {
+		return nil, fmt.Errorf("kubectl_get_field: jsonpath param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_get_field: %w", err)
+	}
+
+	cmd := fmt.Sprintf("%s get %s -o jsonpath='%s'", kctl, resource, jsonpath)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_get_field: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlWaitCondition waits for a condition on a resource.
+// Params: resource, condition (e.g. "CSIReady=True"), namespace (optional),
+//
+//	timeout (e.g. "5m", default "2m")
+//
+// Uses jsonpath polling since K8s custom conditions aren't supported by `kubectl wait`.
+func kubectlWaitCondition(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	resource := act.Params["resource"]
+	if resource == "" {
+		return nil, fmt.Errorf("kubectl_wait_condition: resource param required")
+	}
+	condition := act.Params["condition"]
+	if condition == "" {
+		return nil, fmt.Errorf("kubectl_wait_condition: condition param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_wait_condition: %w", err)
+	}
+
+	parts := strings.SplitN(condition, "=", 2)
+	if len(parts) != 2 {
+		return nil, fmt.Errorf("kubectl_wait_condition: condition must be Type=Status (got %q)", condition)
+	}
+	condType := parts[0]
+	condExpected := parts[1]
+
+	timeout := 2 * time.Minute
+	if t := act.Params["timeout"]; t != "" {
+		if d, parseErr := time.ParseDuration(t); parseErr == nil {
+			timeout = d
+		}
+	}
+
+	jsonpath := fmt.Sprintf("{.status.conditions[?(@.type=='%s')].status}", condType)
+	nsFlag := ""
+	if ns := act.Params["namespace"]; ns != "" {
+		nsFlag = fmt.Sprintf(" -n %s", ns)
+	}
+
+	cmd := fmt.Sprintf("%s get %s%s -o jsonpath='%s'", kctl, resource, nsFlag, jsonpath)
+
+	deadline := time.Now().Add(timeout)
+	for {
+		stdout, _, code, _ := node.Run(ctx, cmd)
+		value := strings.TrimSpace(stdout)
+		if code == 0 && value == condExpected {
+			actx.Log("  condition %s=%s met", condType, condExpected)
+			return map[string]string{"value": value}, nil
+		}
+
+		if time.Now().After(deadline) {
+			return nil, fmt.Errorf("kubectl_wait_condition: timeout waiting for %s=%s on %s (last value: %q)",
+				condType, condExpected, resource, value)
+		}
+
+		select {
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		case <-time.After(3 * time.Second):
+		}
+	}
+}
+
+// kubectlSetImage sets a container image on a deployment/statefulset.
+// Params: deployment, container, image, namespace (optional)
+func kubectlSetImage(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	deployment := act.Params["deployment"]
+	if deployment == "" {
+		return nil, fmt.Errorf("kubectl_set_image: deployment param required")
+	}
+	container := act.Params["container"]
+	if container == "" {
+		return nil, fmt.Errorf("kubectl_set_image: container param required")
+	}
+	image := act.Params["image"]
+	if image == "" {
+		return nil, fmt.Errorf("kubectl_set_image: image param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_set_image: %w", err)
+	}
+
+	cmd := fmt.Sprintf("%s set image %s %s=%s", kctl, deployment, container, image)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_set_image: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlAssertExists asserts a resource exists.
+// Params: resource, namespace (optional)
+func kubectlAssertExists(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	resource := act.Params["resource"]
+	if resource == "" {
+		return nil, fmt.Errorf("kubectl_assert_exists: resource param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_assert_exists: %w", err)
+	}
+
+	cmd := fmt.Sprintf("%s get %s -o name", kctl, resource)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_assert_exists: %s not found (code=%d stderr=%s)", resource, code, stderr)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlAssertNotExists asserts a resource does NOT exist.
+// Params: resource, namespace (optional)
+func kubectlAssertNotExists(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	resource := act.Params["resource"]
+	if resource == "" {
+		return nil, fmt.Errorf("kubectl_assert_not_exists: resource param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_assert_not_exists: %w", err)
+	}
+
+	cmd := fmt.Sprintf("%s get %s -o name 2>/dev/null", kctl, resource)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+
+	stdout, _, code, _ := node.Run(ctx, cmd)
+	if code == 0 && strings.TrimSpace(stdout) != "" {
+		return nil, fmt.Errorf("kubectl_assert_not_exists: %s still exists", resource)
+	}
+
+	return nil, nil
+}
+
+// kubectlLogs collects logs from a pod or deployment.
+// Params: resource, namespace (optional), tail (default "100"), container (optional)
+func kubectlLogs(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	resource := act.Params["resource"]
+	if resource == "" {
+		return nil, fmt.Errorf("kubectl_logs: resource param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_logs: %w", err)
+	}
+
+	tail := act.Params["tail"]
+	if tail == "" {
+		tail = "100"
+	}
+
+	cmd := fmt.Sprintf("%s logs %s --tail=%s", kctl, resource, tail)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+	if container := act.Params["container"]; container != "" {
+		cmd += fmt.Sprintf(" -c %s", container)
+	}
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_logs: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlRolloutStatus waits for a rollout to complete.
+// Params: resource, namespace (optional), timeout (default "5m")
+func kubectlRolloutStatus(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	resource := act.Params["resource"]
+	if resource == "" {
+		return nil, fmt.Errorf("kubectl_rollout_status: resource param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_rollout_status: %w", err)
+	}
+
+	timeout := act.Params["timeout"]
+	if timeout == "" {
+		timeout = "5m"
+	}
+
+	cmd := fmt.Sprintf("%s rollout status %s --timeout=%s", kctl, resource, timeout)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_rollout_status: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlExec runs a command inside a pod.
+// Params: pod, cmd, namespace (optional), container (optional)
+func kubectlExec(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	pod := act.Params["pod"]
+	if pod == "" {
+		return nil, fmt.Errorf("kubectl_exec: pod param required")
+	}
+	execCmd := act.Params["cmd"]
+	if execCmd == "" {
+		return nil, fmt.Errorf("kubectl_exec: cmd param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_exec: %w", err)
+	}
+
+	cmd := fmt.Sprintf("%s exec %s", kctl, pod)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+	if container := act.Params["container"]; container != "" {
+		cmd += fmt.Sprintf(" -c %s", container)
+	}
+	cmd += fmt.Sprintf(" -- %s", execCmd)
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_exec: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlDeletePod deletes a pod by label selector (simulates crash/kill).
+// Params: selector, namespace (optional), grace_period (default "0")
+func kubectlDeletePod(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	selector := act.Params["selector"]
+	if selector == "" {
+		return nil, fmt.Errorf("kubectl_delete_pod: selector param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_delete_pod: %w", err)
+	}
+
+	grace := act.Params["grace_period"]
+	if grace == "" {
+		grace = "0"
+	}
+
+	cmd := fmt.Sprintf("%s delete pod -l %s --grace-period=%s --force", kctl, selector, grace)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_delete_pod: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlPodReadyCount counts ready pods matching a label selector.
+// Params: selector, namespace (optional)
+// Returns: value = count of ready pods
+func kubectlPodReadyCount(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	selector := act.Params["selector"]
+	if selector == "" {
+		return nil, fmt.Errorf("kubectl_pod_ready_count: selector param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_pod_ready_count: %w", err)
+	}
+
+	cmd := fmt.Sprintf("%s get pods -l %s -o jsonpath='{range .items[*]}{.status.conditions[?(@.type==\"Ready\")].status}{\"\\n\"}{end}'",
+		kctl, selector)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+
+	stdout, _, code, _ := node.Run(ctx, cmd)
+	if code != 0 {
+		return map[string]string{"value": "0"}, nil
+	}
+
+	count := 0
+	for _, line := range strings.Split(strings.TrimSpace(stdout), "\n") {
+		if strings.TrimSpace(line) == "True" {
+			count++
+		}
+	}
+
+	return map[string]string{"value": fmt.Sprintf("%d", count)}, nil
+}
+
+// kubectlLabel sets or removes labels on a resource.
+// Params: resource, labels, namespace (optional), overwrite ("true" to allow)
+func kubectlLabel(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	resource := act.Params["resource"]
+	if resource == "" {
+		return nil, fmt.Errorf("kubectl_label: resource param required")
+	}
+	labels := act.Params["labels"]
+	if labels == "" {
+		return nil, fmt.Errorf("kubectl_label: labels param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_label: %w", err)
+	}
+
+	cmd := fmt.Sprintf("%s label %s %s", kctl, resource, labels)
+	if ns := act.Params["namespace"]; ns != "" {
+		cmd += fmt.Sprintf(" -n %s", ns)
+	}
+	if act.Params["overwrite"] == "true" {
+		cmd += " --overwrite"
+	}
+
+	stdout, stderr, code, err := node.Run(ctx, cmd)
+	if err != nil || code != 0 {
+		return nil, fmt.Errorf("kubectl_label: code=%d stderr=%s err=%v", code, stderr, err)
+	}
+
+	return map[string]string{"value": strings.TrimSpace(stdout)}, nil
+}
+
+// kubectlGetCondition gets a specific condition's status from a CRD resource.
+// Params: resource, condition_type, namespace (optional)
+// Returns: value = condition status, message = condition message
+func kubectlGetCondition(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	resource := act.Params["resource"]
+	if resource == "" {
+		return nil, fmt.Errorf("kubectl_get_condition: resource param required")
+	}
+	condType := act.Params["condition_type"]
+	if condType == "" {
+		return nil, fmt.Errorf("kubectl_get_condition: condition_type param required")
+	}
+
+	node, kctl, err := getK8sNode(ctx, actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("kubectl_get_condition: %w", err)
+	}
+
+	nsFlag := ""
+	if ns := act.Params["namespace"]; ns != "" {
+		nsFlag = fmt.Sprintf(" -n %s", ns)
+	}
+
+	statusCmd := fmt.Sprintf("%s get %s%s -o jsonpath='{.status.conditions[?(@.type==\"%s\")].status}'",
+		kctl, resource, nsFlag, condType)
+	statusOut, _, _, _ := node.Run(ctx, statusCmd)
+
+	msgCmd := fmt.Sprintf("%s get %s%s -o jsonpath='{.status.conditions[?(@.type==\"%s\")].message}'",
+		kctl, resource, nsFlag, condType)
+	msgOut, _, _, _ := node.Run(ctx, msgCmd)
+
+	return map[string]string{
+		"value":   strings.TrimSpace(statusOut),
+		"message": strings.TrimSpace(msgOut),
+	}, nil
+}
diff --git a/weed/storage/blockvol/testrunner/actions/nvme.go b/weed/storage/blockvol/testrunner/actions/nvme.go
new file mode 100644
index 000000000..be7819bfa
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/actions/nvme.go
@@ -0,0 +1,218 @@
+package actions
+
+import (
+	"context"
+	"encoding/json"
+	"fmt"
+	"strings"
+	"time"
+
+	tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner/infra"
+)
+
+// RegisterNVMeActions registers NVMe/TCP client actions.
+func RegisterNVMeActions(r *tr.Registry) {
+	r.RegisterFunc("nvme_connect", tr.TierBlock, nvmeConnect)
+	r.RegisterFunc("nvme_disconnect", tr.TierBlock, nvmeDisconnect)
+	r.RegisterFunc("nvme_get_device", tr.TierBlock, nvmeGetDevice)
+	r.RegisterFunc("nvme_cleanup", tr.TierBlock, nvmeCleanup)
+}
+
+// nvmeConnect connects to an NVMe/TCP target.
+// Params: target (required). Uses TargetSpec.NvmePort and NQN().
+// Returns: value = NQN (for subsequent disconnect).
+func nvmeConnect(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	targetName := act.Target
+	if targetName == "" {
+		return nil, fmt.Errorf("nvme_connect: target is required")
+	}
+
+	spec, ok := actx.Scenario.Targets[targetName]
+	if !ok {
+		return nil, fmt.Errorf("nvme_connect: target %q not in scenario", targetName)
+	}
+
+	host, err := getTargetHost(actx, targetName)
+	if err != nil {
+		return nil, err
+	}
+
+	node, err := getNode(actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("nvme_connect: %w", err)
+	}
+
+	nqn := spec.NQN()
+	port := spec.NvmePort
+	if port == 0 {
+		port = 4420
+	}
+
+	actx.Log("  nvme connect %s -> %s:%d nqn=%s", targetName, host, port, nqn)
+	cmd := fmt.Sprintf("nvme connect -t tcp -n %s -a %s -s %d", nqn, host, port)
+	stdout, stderr, code, err := node.RunRoot(ctx, cmd)
+	if err != nil || code != 0 {
+		// Treat "already connected" as success.
+		if strings.Contains(stdout+stderr, "already connected") {
+			actx.Log("  already connected")
+			return map[string]string{"value": nqn}, nil
+		}
+		return nil, fmt.Errorf("nvme_connect: code=%d stdout=%s stderr=%s err=%v", code, stdout, stderr, err)
+	}
+
+	return map[string]string{"value": nqn}, nil
+}
+
+// nvmeDisconnect disconnects from an NVMe/TCP target.
+// Params: target (required).
+func nvmeDisconnect(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	targetName := act.Target
+	if targetName == "" {
+		return nil, fmt.Errorf("nvme_disconnect: target is required")
+	}
+
+	spec, ok := actx.Scenario.Targets[targetName]
+	if !ok {
+		return nil, fmt.Errorf("nvme_disconnect: target %q not in scenario", targetName)
+	}
+
+	node, err := getNode(actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("nvme_disconnect: %w", err)
+	}
+
+	nqn := spec.NQN()
+	actx.Log("  nvme disconnect nqn=%s", nqn)
+	cmd := fmt.Sprintf("nvme disconnect -n %s", nqn)
+	stdout, stderr, code, err := node.RunRoot(ctx, cmd)
+	if err != nil || code != 0 {
+		outStr := stdout + stderr
+		// Treat "not connected" / "no subsystem" as success (idempotent).
+		if strings.Contains(outStr, "not connected") || strings.Contains(outStr, "No subsystemtype") || strings.Contains(outStr, "Invalid argument") {
+			actx.Log("  already disconnected")
+			return nil, nil
+		}
+		return nil, fmt.Errorf("nvme_disconnect: code=%d output=%s err=%v", code, outStr, err)
+	}
+
+	return nil, nil
+}
+
+// nvmeGetDevice finds the block device path for an NVMe/TCP connection.
+// Params: target (required). Polls nvme list-subsys until device appears.
+// Returns: value = /dev/nvmeXn1
+func nvmeGetDevice(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	targetName := act.Target
+	if targetName == "" {
+		return nil, fmt.Errorf("nvme_get_device: target is required")
+	}
+
+	spec, ok := actx.Scenario.Targets[targetName]
+	if !ok {
+		return nil, fmt.Errorf("nvme_get_device: target %q not in scenario", targetName)
+	}
+
+	node, err := getNode(actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("nvme_get_device: %w", err)
+	}
+
+	nqn := spec.NQN()
+	actx.Log("  waiting for NVMe device for nqn=%s ...", nqn)
+
+	// Poll for up to 10 seconds.
+	deadline := time.After(10 * time.Second)
+	ticker := time.NewTicker(500 * time.Millisecond)
+	defer ticker.Stop()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return nil, ctx.Err()
+		case <-deadline:
+			return nil, fmt.Errorf("nvme_get_device: timeout waiting for device (nqn=%s)", nqn)
+		case <-ticker.C:
+			dev, findErr := findNVMeDevice(ctx, node, nqn)
+			if findErr != nil {
+				continue // retry
+			}
+			if dev != "" {
+				actx.Log("  found device: %s", dev)
+				return map[string]string{"value": dev}, nil
+			}
+		}
+	}
+}
+
+// nvmeCleanup disconnects all NVMe/TCP subsystems matching our prefix.
+func nvmeCleanup(ctx context.Context, actx *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	node, err := getNode(actx, act.Node)
+	if err != nil {
+		return nil, fmt.Errorf("nvme_cleanup: %w", err)
+	}
+
+	cmd := "nvme disconnect-all 2>/dev/null || true"
+	node.RunRoot(ctx, cmd)
+	actx.Log("  nvme disconnect-all complete")
+	return nil, nil
+}
+
+// findNVMeDevice parses `nvme list-subsys -o json` to find the device for a NQN.
+func findNVMeDevice(ctx context.Context, node *infra.Node, nqn string) (string, error) {
+	cmd := "nvme list-subsys -o json 2>/dev/null"
+	stdout, _, code, err := node.RunRoot(ctx, cmd)
+	if err != nil || code != 0 {
+		return "", fmt.Errorf("nvme list-subsys failed: code=%d err=%v", code, err)
+	}
+
+	// nvme list-subsys returns a JSON array of host entries, each with a Subsystems array.
+	var hosts []nvmeSubsysOutput
+	if err := json.Unmarshal([]byte(stdout), &hosts); err != nil {
+		// Fallback: try parsing as a single object (older nvme-cli versions).
+		var single nvmeSubsysOutput
+		if err2 := json.Unmarshal([]byte(stdout), &single); err2 != nil {
+			return "", fmt.Errorf("nvme list-subsys parse: %w", err)
+		}
+		hosts = []nvmeSubsysOutput{single}
+	}
+
+	for _, h := range hosts {
+	for _, ss := range h.Subsystems {
+		if ss.NQN != nqn {
+			continue
+		}
+		for _, p := range ss.Paths {
+			if p.Name == "" {
+				continue
+			}
+			if strings.EqualFold(p.Transport, "tcp") && strings.EqualFold(p.State, "live") {
+				return "/dev/" + p.Name + "n1", nil
+			}
+		}
+		// Fallback: any path with a name.
+		for _, p := range ss.Paths {
+			if p.Name != "" {
+				return "/dev/" + p.Name + "n1", nil
+			}
+		}
+	}
+	}
+	return "", nil // not found yet
+}
+
+// JSON structures for nvme list-subsys output.
+type nvmeSubsysOutput struct {
+	Subsystems []nvmeSubsysEntry `json:"Subsystems"`
+}
+
+type nvmeSubsysEntry struct {
+	NQN   string          `json:"NQN"`
+	Paths []nvmePathEntry `json:"Paths"`
+}
+
+type nvmePathEntry struct {
+	Name      string `json:"Name"`
+	Transport string `json:"Transport"`
+	State     string `json:"State"`
+}
diff --git a/weed/storage/blockvol/testrunner/actions/nvme_bench_test.go b/weed/storage/blockvol/testrunner/actions/nvme_bench_test.go
new file mode 100644
index 000000000..c0ae77388
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/actions/nvme_bench_test.go
@@ -0,0 +1,1013 @@
+package actions
+
+import (
+	"context"
+	"encoding/json"
+	"math"
+	"strings"
+	"testing"
+	"time"
+
+	tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
+)
+
+// ============================================================
+// NVMe Action Registration
+// ============================================================
+
+func TestNVMeActions_Registration(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterNVMeActions(registry)
+
+	expected := []string{
+		"nvme_connect",
+		"nvme_disconnect",
+		"nvme_get_device",
+		"nvme_cleanup",
+	}
+
+	for _, name := range expected {
+		if _, err := registry.Get(name); err != nil {
+			t.Errorf("action %q not registered: %v", name, err)
+		}
+	}
+
+	byTier := registry.ListByTier()
+	if n := len(byTier[tr.TierBlock]); n != 4 {
+		t.Errorf("block tier has %d nvme actions, want 4", n)
+	}
+}
+
+func TestNVMeActions_TierGating(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterNVMeActions(registry)
+
+	// Without gating, all accessible.
+	if _, err := registry.Get("nvme_connect"); err != nil {
+		t.Errorf("ungated: %v", err)
+	}
+
+	// Enable only core tier — block actions should be blocked.
+	registry.EnableTiers([]string{tr.TierCore})
+	if _, err := registry.Get("nvme_connect"); err == nil {
+		t.Error("expected error when block tier is disabled")
+	}
+
+	// Enable block tier — should work again.
+	registry.EnableTiers([]string{tr.TierBlock})
+	if _, err := registry.Get("nvme_connect"); err != nil {
+		t.Errorf("block enabled: %v", err)
+	}
+}
+
+func TestBenchActions_Registration(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+
+	expected := []string{"fio_json", "fio_parse", "bench_compare"}
+	for _, name := range expected {
+		if _, err := registry.Get(name); err != nil {
+			t.Errorf("action %q not registered: %v", name, err)
+		}
+	}
+}
+
+// ============================================================
+// findNVMeDevice JSON Parsing (nvme list-subsys output)
+// ============================================================
+
+// parseAndFind is a test helper that parses nvme list-subsys JSON and
+// finds the device for a given NQN, replicating findNVMeDevice logic
+// without SSH.
+func parseAndFind(t *testing.T, jsonStr, nqn string) string {
+	t.Helper()
+	var parsed nvmeSubsysOutput
+	if err := json.Unmarshal([]byte(jsonStr), &parsed); err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+	for _, ss := range parsed.Subsystems {
+		if ss.NQN != nqn {
+			continue
+		}
+		for _, p := range ss.Paths {
+			if p.Name == "" {
+				continue
+			}
+			if strings.EqualFold(p.Transport, "tcp") && strings.EqualFold(p.State, "live") {
+				return "/dev/" + p.Name + "n1"
+			}
+		}
+		for _, p := range ss.Paths {
+			if p.Name != "" {
+				return "/dev/" + p.Name + "n1"
+			}
+		}
+	}
+	return ""
+}
+
+func TestFindNVMeDevice_Parse_LiveTCP(t *testing.T) {
+	dev := parseAndFind(t, `{
+		"Subsystems": [{
+			"NQN": "nqn.2024-01.com.seaweedfs:vol.test-vol",
+			"Paths": [{"Name": "nvme0", "Transport": "tcp", "State": "live"}]
+		}]
+	}`, "nqn.2024-01.com.seaweedfs:vol.test-vol")
+	if dev != "/dev/nvme0n1" {
+		t.Fatalf("device = %q, want /dev/nvme0n1", dev)
+	}
+}
+
+func TestFindNVMeDevice_Parse_NoMatch(t *testing.T) {
+	dev := parseAndFind(t, `{
+		"Subsystems": [{
+			"NQN": "nqn.2024-01.com.seaweedfs:vol.other",
+			"Paths": [{"Name": "nvme0", "Transport": "tcp", "State": "live"}]
+		}]
+	}`, "nqn.2024-01.com.seaweedfs:vol.test-vol")
+	if dev != "" {
+		t.Fatalf("expected empty, got %q", dev)
+	}
+}
+
+func TestFindNVMeDevice_Parse_MultipleSubsystems(t *testing.T) {
+	jsonStr := `{
+		"Subsystems": [
+			{"NQN": "nqn.test:vol-a", "Paths": [{"Name": "nvme0", "Transport": "tcp", "State": "live"}]},
+			{"NQN": "nqn.test:vol-b", "Paths": [{"Name": "nvme1", "Transport": "tcp", "State": "live"}]}
+		]
+	}`
+	if d := parseAndFind(t, jsonStr, "nqn.test:vol-a"); d != "/dev/nvme0n1" {
+		t.Fatalf("vol-a: %q", d)
+	}
+	if d := parseAndFind(t, jsonStr, "nqn.test:vol-b"); d != "/dev/nvme1n1" {
+		t.Fatalf("vol-b: %q", d)
+	}
+}
+
+func TestFindNVMeDevice_Parse_PreferLiveTCP(t *testing.T) {
+	dev := parseAndFind(t, `{
+		"Subsystems": [{
+			"NQN": "nqn.test:vol",
+			"Paths": [
+				{"Name": "nvme0", "Transport": "rdma", "State": "live"},
+				{"Name": "nvme1", "Transport": "tcp", "State": "connecting"},
+				{"Name": "nvme2", "Transport": "tcp", "State": "live"}
+			]
+		}]
+	}`, "nqn.test:vol")
+	if dev != "/dev/nvme2n1" {
+		t.Fatalf("device = %q, want /dev/nvme2n1 (live TCP preferred)", dev)
+	}
+}
+
+func TestFindNVMeDevice_Parse_FallbackNonLive(t *testing.T) {
+	dev := parseAndFind(t, `{
+		"Subsystems": [{
+			"NQN": "nqn.test:vol",
+			"Paths": [{"Name": "nvme3", "Transport": "tcp", "State": "connecting"}]
+		}]
+	}`, "nqn.test:vol")
+	if dev != "/dev/nvme3n1" {
+		t.Fatalf("device = %q, want /dev/nvme3n1 (fallback)", dev)
+	}
+}
+
+func TestFindNVMeDevice_Parse_EmptyPaths(t *testing.T) {
+	dev := parseAndFind(t, `{
+		"Subsystems": [{"NQN": "nqn.test:vol", "Paths": []}]
+	}`, "nqn.test:vol")
+	if dev != "" {
+		t.Fatalf("expected empty for no paths, got %q", dev)
+	}
+}
+
+func TestFindNVMeDevice_Parse_EmptyName(t *testing.T) {
+	dev := parseAndFind(t, `{
+		"Subsystems": [{
+			"NQN": "nqn.test:vol",
+			"Paths": [{"Name": "", "Transport": "tcp", "State": "live"}]
+		}]
+	}`, "nqn.test:vol")
+	if dev != "" {
+		t.Fatalf("expected empty for nameless path, got %q", dev)
+	}
+}
+
+func TestFindNVMeDevice_Parse_EmptySubsystems(t *testing.T) {
+	dev := parseAndFind(t, `{"Subsystems": []}`, "nqn.test:vol")
+	if dev != "" {
+		t.Fatalf("expected empty, got %q", dev)
+	}
+}
+
+func TestFindNVMeDevice_Parse_CaseInsensitive(t *testing.T) {
+	dev := parseAndFind(t, `{
+		"Subsystems": [{
+			"NQN": "nqn.test:vol",
+			"Paths": [{"Name": "nvme5", "Transport": "TCP", "State": "Live"}]
+		}]
+	}`, "nqn.test:vol")
+	if dev != "/dev/nvme5n1" {
+		t.Fatalf("device = %q, want /dev/nvme5n1 (case insensitive)", dev)
+	}
+}
+
+// ============================================================
+// TargetSpec NVMe fields
+// ============================================================
+
+func TestTargetSpec_NQN_WithNQNSuffix(t *testing.T) {
+	spec := tr.TargetSpec{NQNSuffix: "my-vol", IQNSuffix: "fallback"}
+	want := "nqn.2024-01.com.seaweedfs:vol.my-vol"
+	if got := spec.NQN(); got != want {
+		t.Fatalf("NQN() = %q, want %q", got, want)
+	}
+}
+
+func TestTargetSpec_NQN_FallbackToIQN(t *testing.T) {
+	spec := tr.TargetSpec{IQNSuffix: "iqn-vol"}
+	want := "nqn.2024-01.com.seaweedfs:vol.iqn-vol"
+	if got := spec.NQN(); got != want {
+		t.Fatalf("NQN() = %q, want %q (fallback to IQN suffix)", got, want)
+	}
+}
+
+func TestTargetSpec_NQN_BothEmpty(t *testing.T) {
+	spec := tr.TargetSpec{}
+	got := spec.NQN()
+	// Should return prefix + empty string.
+	if got != "nqn.2024-01.com.seaweedfs:vol." {
+		t.Fatalf("NQN() = %q", got)
+	}
+}
+
+// ============================================================
+// ParseFioMetric — additional edge cases
+// ============================================================
+
+func TestParseFioMetric_MixedAutoDetectPicksWrite(t *testing.T) {
+	// When both have IOPS > 0, auto-detect picks write (checked first).
+	val, err := ParseFioMetric(fioMixedJSON, "iops", "")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if val != 15000.0 {
+		t.Fatalf("auto-detect mixed iops = %f, want 15000 (write)", val)
+	}
+}
+
+func TestParseFioMetric_AllLatencyMetrics(t *testing.T) {
+	metrics := []struct {
+		name string
+		want float64
+	}{
+		{"lat_mean_us", 19823.4 / 1000},
+		{"lat_p50_us", 18000.0 / 1000},
+		{"lat_p99_us", 45000.0 / 1000},
+		{"lat_p999_us", 82000.0 / 1000},
+	}
+	for _, m := range metrics {
+		val, err := ParseFioMetric(fioWriteJSON, m.name, "")
+		if err != nil {
+			t.Fatalf("%s: %v", m.name, err)
+		}
+		if math.Abs(val-m.want) > 0.01 {
+			t.Fatalf("%s = %f, want %f", m.name, val, m.want)
+		}
+	}
+}
+
+func TestParseFioMetric_BWBytes(t *testing.T) {
+	val, err := ParseFioMetric(fioWriteJSON, "bw_bytes", "")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if val != 204113920.0 {
+		t.Fatalf("bw_bytes = %f, want 204113920", val)
+	}
+}
+
+func TestParseFioMetric_MissingPercentile(t *testing.T) {
+	jsonStr := `{
+		"jobs": [{"jobname": "bench",
+			"read": {"iops": 0, "bw_bytes": 0, "lat_ns": {"mean": 0, "percentile": {}}},
+			"write": {"iops": 100, "bw_bytes": 409600, "lat_ns": {"mean": 5000, "percentile": {}}}
+		}]
+	}`
+	val, err := ParseFioMetric(jsonStr, "lat_p99_us", "")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if val != 0 {
+		t.Fatalf("lat_p99_us = %f, want 0 (missing key)", val)
+	}
+}
+
+func TestParseFioMetric_NilPercentile(t *testing.T) {
+	jsonStr := `{
+		"jobs": [{"jobname": "bench",
+			"read": {"iops": 0, "bw_bytes": 0, "lat_ns": {"mean": 0}},
+			"write": {"iops": 100, "bw_bytes": 409600, "lat_ns": {"mean": 5000}}
+		}]
+	}`
+	val, err := ParseFioMetric(jsonStr, "lat_p99_us", "")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if val != 0 {
+		t.Fatalf("lat_p99_us = %f, want 0 (nil percentile)", val)
+	}
+}
+
+// ============================================================
+// ComputeBenchResult — additional edge cases
+// ============================================================
+
+func TestComputeBenchResult_LatencyWarn(t *testing.T) {
+	// Candidate latency slightly higher: ratio=40/42=0.952, > 0.9 but < 1.0.
+	r := ComputeBenchResult("lat-test", "lat_p99_us", 40.0, 42.0, 1.0)
+	if r.Pass {
+		t.Fatal("expected fail: candidate latency higher")
+	}
+	if r.Ratio < 0.9 {
+		t.Fatalf("ratio = %.3f, expected >= 0.9 (WARN territory)", r.Ratio)
+	}
+}
+
+func TestComputeBenchResult_LatencyMuchWorse(t *testing.T) {
+	r := ComputeBenchResult("lat-test", "lat_p99_us", 40.0, 120.0, 1.0)
+	if r.Pass {
+		t.Fatal("expected fail")
+	}
+	if r.Ratio >= 0.9 {
+		t.Fatalf("ratio = %.3f, expected < 0.9", r.Ratio)
+	}
+}
+
+func TestComputeBenchResult_ExactGate(t *testing.T) {
+	r := ComputeBenchResult("exact", "iops", 100, 90, 0.9)
+	if !r.Pass {
+		t.Fatalf("expected pass: ratio=%.3f == gate=0.9", r.Ratio)
+	}
+}
+
+func TestComputeBenchResult_JustBelowGate(t *testing.T) {
+	r := ComputeBenchResult("below", "iops", 100, 89, 0.9)
+	if r.Pass {
+		t.Fatal("expected fail: ratio < gate")
+	}
+}
+
+func TestComputeBenchResult_ZeroCandidate(t *testing.T) {
+	r := ComputeBenchResult("zero-cand", "iops", 100, 0, 1.0)
+	if r.Pass {
+		t.Fatal("expected fail: zero candidate")
+	}
+	if r.Ratio != 0 {
+		t.Fatalf("ratio = %f, want 0", r.Ratio)
+	}
+}
+
+func TestComputeBenchResult_BothZero(t *testing.T) {
+	r := ComputeBenchResult("both-zero", "iops", 0, 0, 1.0)
+	if r.Pass {
+		t.Fatal("expected fail: both zero")
+	}
+}
+
+func TestComputeBenchResult_LatencyZeroCandidate(t *testing.T) {
+	r := ComputeBenchResult("lat-zero", "lat_p99_us", 40.0, 0.0, 1.0)
+	if !r.Pass {
+		t.Fatal("expected pass: candidate latency=0 is infinitely good")
+	}
+	if !math.IsInf(r.Ratio, 1) {
+		t.Fatalf("ratio = %f, want +Inf", r.Ratio)
+	}
+}
+
+func TestComputeBenchResult_DeltaSign_ThroughputUp(t *testing.T) {
+	r := ComputeBenchResult("up", "iops", 1000, 1200, 1.0)
+	if r.Delta != "+20.0%" {
+		t.Fatalf("delta = %q, want +20.0%%", r.Delta)
+	}
+}
+
+func TestComputeBenchResult_DeltaSign_ThroughputDown(t *testing.T) {
+	r := ComputeBenchResult("down", "iops", 1000, 800, 1.0)
+	if r.Delta != "-20.0%" {
+		t.Fatalf("delta = %q, want -20.0%%", r.Delta)
+	}
+}
+
+func TestComputeBenchResult_DeltaSign_LatencyDown(t *testing.T) {
+	r := ComputeBenchResult("lat-down", "lat_p99_us", 100, 80, 1.0)
+	if r.Delta != "-20.0%" {
+		t.Fatalf("delta = %q, want -20.0%%", r.Delta)
+	}
+}
+
+func TestComputeBenchResult_DeltaSign_LatencyUp(t *testing.T) {
+	r := ComputeBenchResult("lat-up", "lat_p99_us", 100, 120, 1.0)
+	if r.Delta != "+20.0%" {
+		t.Fatalf("delta = %q, want +20.0%%", r.Delta)
+	}
+}
+
+// ============================================================
+// FormatBenchReport edge cases
+// ============================================================
+
+func TestFormatBenchReport_EmptyResults(t *testing.T) {
+	report := FormatBenchReport(nil)
+	if report == "" {
+		t.Fatal("expected non-empty report even with no results")
+	}
+}
+
+func TestFormatBenchReport_MixedPassFail(t *testing.T) {
+	results := []BenchResult{
+		ComputeBenchResult("good", "iops", 100, 120, 1.0),
+		ComputeBenchResult("bad", "iops", 100, 50, 1.0),
+		ComputeBenchResult("warn", "iops", 100, 92, 1.0),
+	}
+	report := FormatBenchReport(results)
+
+	if !contains(report, "PASS") {
+		t.Error("report missing PASS")
+	}
+	if !contains(report, "FAIL") {
+		t.Error("report missing FAIL")
+	}
+	if !contains(report, "WARN") {
+		t.Error("report missing WARN")
+	}
+}
+
+// ============================================================
+// benchCompare action param validation
+// ============================================================
+
+func TestBenchCompare_MissingParams(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+
+	handler, err := registry.Get("bench_compare")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	tests := []struct {
+		name   string
+		params map[string]string
+	}{
+		{"missing_a_var", map[string]string{"b_var": "b", "metric": "iops"}},
+		{"missing_b_var", map[string]string{"a_var": "a", "metric": "iops"}},
+		{"missing_metric", map[string]string{"a_var": "a", "b_var": "b"}},
+	}
+	for _, tt := range tests {
+		act := tr.Action{Params: tt.params}
+		_, err := handler.Execute(context.Background(), actx, act)
+		if err == nil {
+			t.Errorf("%s: expected error", tt.name)
+		}
+	}
+}
+
+func TestBenchCompare_EmptyVarValues(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("bench_compare")
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"a_fio": fioWriteJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"a_var": "a_fio", "b_var": "b_fio", "metric": "iops",
+	}}
+	_, err := handler.Execute(context.Background(), actx, act)
+	if err == nil {
+		t.Fatal("expected error for empty b_var value")
+	}
+}
+
+func TestBenchCompare_InvalidGate(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("bench_compare")
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"a": fioWriteJSON, "b": fioWriteJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"a_var": "a", "b_var": "b", "metric": "iops", "gate": "not-a-number",
+	}}
+	_, err := handler.Execute(context.Background(), actx, act)
+	if err == nil {
+		t.Fatal("expected error for invalid gate")
+	}
+}
+
+func TestBenchCompare_PassWithDirection(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("bench_compare")
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"a": fioMixedJSON, "b": fioMixedJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"a_var": "a", "b_var": "b", "metric": "iops",
+		"direction": "read", "gate": "0.9",
+	}}
+	result, err := handler.Execute(context.Background(), actx, act)
+	if err != nil {
+		t.Fatalf("expected pass: %v", err)
+	}
+	if result["value"] != "+0.0%" {
+		t.Fatalf("delta = %q, want +0.0%%", result["value"])
+	}
+}
+
+func TestBenchCompare_LatencyGatePass(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("bench_compare")
+
+	// Candidate has lower latency → better → should pass.
+	betterJSON := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":50000,"bw_bytes":204800000,"lat_ns":{"mean":15000,"percentile":{"99.000000":30000}}}}]}`
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"baseline": fioWriteJSON, "candidate": betterJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"a_var": "baseline", "b_var": "candidate",
+		"metric": "lat_p99_us", "gate": "0.9",
+	}}
+	_, err := handler.Execute(context.Background(), actx, act)
+	if err != nil {
+		t.Fatalf("expected pass for lower latency candidate: %v", err)
+	}
+}
+
+// ============================================================
+// fioParse action
+// ============================================================
+
+func TestFioParse_Action(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("fio_parse")
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"my_fio": fioWriteJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{"json_var": "my_fio", "metric": "iops"}}
+	result, err := handler.Execute(context.Background(), actx, act)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if result["value"] != "49832.50" {
+		t.Fatalf("value = %q, want 49832.50", result["value"])
+	}
+}
+
+func TestFioParse_MissingVar(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("fio_parse")
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{"json_var": "missing", "metric": "iops"}}
+	_, err := handler.Execute(context.Background(), actx, act)
+	if err == nil {
+		t.Fatal("expected error for missing var")
+	}
+}
+
+func TestFioParse_MissingParams(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("fio_parse")
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"x": fioWriteJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	// Missing json_var.
+	_, err := handler.Execute(context.Background(), actx,
+		tr.Action{Params: map[string]string{"metric": "iops"}})
+	if err == nil {
+		t.Fatal("expected error for missing json_var")
+	}
+
+	// Missing metric.
+	_, err = handler.Execute(context.Background(), actx,
+		tr.Action{Params: map[string]string{"json_var": "x"}})
+	if err == nil {
+		t.Fatal("expected error for missing metric")
+	}
+}
+
+func TestFioParse_WithDirection(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("fio_parse")
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"m": fioMixedJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"json_var": "m", "metric": "iops", "direction": "read",
+	}}
+	result, err := handler.Execute(context.Background(), actx, act)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if result["value"] != "35000.00" {
+		t.Fatalf("read iops = %q, want 35000.00", result["value"])
+	}
+}
+
+// ============================================================
+// Engine-level integration: bench_compare with mocks
+// ============================================================
+
+// mockTestHandler is a simple mock for engine-level tests.
+type mockTestHandler struct {
+	calls   []tr.Action
+	outputs map[string]string
+	err     error
+}
+
+func (m *mockTestHandler) Execute(_ context.Context, _ *tr.ActionContext, act tr.Action) (map[string]string, error) {
+	m.calls = append(m.calls, act)
+	if m.err != nil {
+		return nil, m.err
+	}
+	return m.outputs, nil
+}
+
+func TestEngine_NVMeBenchScenario(t *testing.T) {
+	registry := tr.NewRegistry()
+
+	RegisterBenchActions(registry) // registers fio_json, fio_parse, bench_compare
+	// Mock fio_json AFTER RegisterBenchActions to override the real handler.
+	fioAction := &mockTestHandler{outputs: map[string]string{"value": fioWriteJSON}}
+	registry.Register("fio_json", tr.TierBlock, fioAction)
+
+	scenario := &tr.Scenario{
+		Name:    "mini-bench",
+		Timeout: tr.Duration{Duration: 30 * time.Second},
+		Phases: []tr.Phase{
+			{
+				Name: "iscsi-bench",
+				Actions: []tr.Action{
+					{Action: "fio_json", SaveAs: "iscsi_result"},
+				},
+			},
+			{
+				Name: "nvme-bench",
+				Actions: []tr.Action{
+					{Action: "fio_json", SaveAs: "nvme_result"},
+				},
+			},
+			{
+				Name: "compare",
+				Actions: []tr.Action{
+					{
+						Action: "bench_compare",
+						SaveAs: "cmp_iops",
+						Params: map[string]string{
+							"a_var": "iscsi_result", "b_var": "nvme_result",
+							"metric": "iops", "gate": "0.9",
+						},
+					},
+					{
+						Action: "bench_compare",
+						SaveAs: "cmp_lat",
+						Params: map[string]string{
+							"a_var": "iscsi_result", "b_var": "nvme_result",
+							"metric": "lat_p99_us", "gate": "0.9",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	engine := tr.NewEngine(registry, nil)
+	actx := &tr.ActionContext{
+		Scenario: scenario,
+		Vars:     make(map[string]string),
+		Log:      func(string, ...interface{}) {},
+	}
+	result := engine.Run(context.Background(), scenario, actx)
+
+	if result.Status != tr.StatusPass {
+		t.Fatalf("status = %s, want PASS. error: %s", result.Status, result.Error)
+	}
+	if len(result.Phases) != 3 {
+		t.Fatalf("phases = %d, want 3", len(result.Phases))
+	}
+
+	// Same JSON → ratio=1.0, gate=0.9 → pass, delta=+0.0%.
+	if actx.Vars["cmp_iops"] != "+0.0%" {
+		t.Fatalf("cmp_iops = %q, want +0.0%%", actx.Vars["cmp_iops"])
+	}
+	// Same latency → ratio=1.0, delta=-0.0%.
+	if actx.Vars["cmp_lat"] != "-0.0%" {
+		t.Fatalf("cmp_lat = %q, want -0.0%%", actx.Vars["cmp_lat"])
+	}
+}
+
+func TestEngine_BenchCompare_FailsGate(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+
+	highJSON := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":50000,"bw_bytes":204800000,"lat_ns":{"mean":20000,"percentile":{"99.000000":45000}}}}]}`
+	lowJSON := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":30000,"bw_bytes":122880000,"lat_ns":{"mean":30000,"percentile":{"99.000000":60000}}}}]}`
+
+	scenario := &tr.Scenario{
+		Name:    "fail-gate",
+		Timeout: tr.Duration{Duration: 10 * time.Second},
+		Phases: []tr.Phase{
+			{
+				Name: "compare",
+				Actions: []tr.Action{
+					{
+						Action: "bench_compare",
+						Params: map[string]string{
+							"a_var": "baseline", "b_var": "candidate",
+							"metric": "iops", "gate": "0.9",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	engine := tr.NewEngine(registry, nil)
+	actx := &tr.ActionContext{
+		Scenario: scenario,
+		Vars:     map[string]string{"baseline": highJSON, "candidate": lowJSON},
+		Log:      func(string, ...interface{}) {},
+	}
+	result := engine.Run(context.Background(), scenario, actx)
+
+	if result.Status != tr.StatusFail {
+		t.Fatalf("status = %s, want FAIL (30k/50k = 0.6 < gate 0.9)", result.Status)
+	}
+}
+
+func TestEngine_BenchCompare_LatencyFails(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+
+	goodLat := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":50000,"bw_bytes":204800000,"lat_ns":{"mean":20000,"percentile":{"99.000000":30000}}}}]}`
+	badLat := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":50000,"bw_bytes":204800000,"lat_ns":{"mean":40000,"percentile":{"99.000000":90000}}}}]}`
+
+	scenario := &tr.Scenario{
+		Name:    "lat-fail",
+		Timeout: tr.Duration{Duration: 10 * time.Second},
+		Phases: []tr.Phase{
+			{
+				Name: "compare",
+				Actions: []tr.Action{
+					{
+						Action: "bench_compare",
+						Params: map[string]string{
+							"a_var": "baseline", "b_var": "candidate",
+							"metric": "lat_p99_us", "gate": "0.9",
+						},
+					},
+				},
+			},
+		},
+	}
+
+	engine := tr.NewEngine(registry, nil)
+	actx := &tr.ActionContext{
+		Scenario: scenario,
+		Vars:     map[string]string{"baseline": goodLat, "candidate": badLat},
+		Log:      func(string, ...interface{}) {},
+	}
+	result := engine.Run(context.Background(), scenario, actx)
+
+	if result.Status != tr.StatusFail {
+		t.Fatalf("status = %s, want FAIL (lat 90µs vs 30µs baseline)", result.Status)
+	}
+}
+
+// ============================================================
+// warn_gate behavior
+// ============================================================
+
+func TestBenchCompare_WarnGate_InWarnBand(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("bench_compare")
+
+	// Candidate = 85% of baseline → below gate (0.9) but above warn_gate (0.8).
+	highJSON := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":10000,"bw_bytes":40960000,"lat_ns":{"mean":20000,"percentile":{"99.000000":45000}}}}]}`
+	lowJSON := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":8500,"bw_bytes":34816000,"lat_ns":{"mean":20000,"percentile":{"99.000000":45000}}}}]}`
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"a": highJSON, "b": lowJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"a_var": "a", "b_var": "b", "metric": "iops",
+		"gate": "0.9", "warn_gate": "0.8",
+	}}
+	result, err := handler.Execute(context.Background(), actx, act)
+	if err != nil {
+		t.Fatalf("expected success with WARN, got error: %v", err)
+	}
+	if !strings.HasPrefix(result["value"], "WARN:") {
+		t.Fatalf("value = %q, want WARN: prefix", result["value"])
+	}
+}
+
+func TestBenchCompare_WarnGate_BelowWarnGate(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("bench_compare")
+
+	// Candidate = 70% of baseline → below both gate and warn_gate.
+	highJSON := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":10000,"bw_bytes":40960000,"lat_ns":{"mean":20000,"percentile":{"99.000000":45000}}}}]}`
+	lowJSON := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":7000,"bw_bytes":28672000,"lat_ns":{"mean":20000,"percentile":{"99.000000":45000}}}}]}`
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"a": highJSON, "b": lowJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"a_var": "a", "b_var": "b", "metric": "iops",
+		"gate": "0.9", "warn_gate": "0.8",
+	}}
+	_, err := handler.Execute(context.Background(), actx, act)
+	if err == nil {
+		t.Fatal("expected hard fail below warn_gate")
+	}
+	if !strings.Contains(err.Error(), "FAIL") {
+		t.Fatalf("error = %q, want FAIL", err.Error())
+	}
+}
+
+func TestBenchCompare_WarnGate_AboveGate(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("bench_compare")
+
+	// Candidate = 100% of baseline → above gate → normal PASS, no WARN prefix.
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"a": fioWriteJSON, "b": fioWriteJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"a_var": "a", "b_var": "b", "metric": "iops",
+		"gate": "0.9", "warn_gate": "0.8",
+	}}
+	result, err := handler.Execute(context.Background(), actx, act)
+	if err != nil {
+		t.Fatalf("expected pass: %v", err)
+	}
+	if strings.HasPrefix(result["value"], "WARN:") {
+		t.Fatalf("value = %q, want no WARN prefix (above gate)", result["value"])
+	}
+}
+
+func TestBenchCompare_WarnGate_InvalidValue(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("bench_compare")
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"a": fioWriteJSON, "b": fioWriteJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"a_var": "a", "b_var": "b", "metric": "iops",
+		"gate": "0.9", "warn_gate": "bad",
+	}}
+	_, err := handler.Execute(context.Background(), actx, act)
+	if err == nil {
+		t.Fatal("expected error for invalid warn_gate")
+	}
+}
+
+func TestBenchCompare_WarnGate_LatencyInWarnBand(t *testing.T) {
+	registry := tr.NewRegistry()
+	RegisterBenchActions(registry)
+	handler, _ := registry.Get("bench_compare")
+
+	// Baseline lat 30µs, candidate lat 35µs → ratio=30/35=0.857, below gate 0.9 but above warn_gate 0.8.
+	baseJSON := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":50000,"bw_bytes":204800000,"lat_ns":{"mean":20000,"percentile":{"99.000000":30000}}}}]}`
+	candJSON := `{"jobs":[{"jobname":"b","read":{"iops":0,"bw_bytes":0,"lat_ns":{"mean":0,"percentile":{}}},
+		"write":{"iops":50000,"bw_bytes":204800000,"lat_ns":{"mean":25000,"percentile":{"99.000000":35000}}}}]}`
+
+	actx := &tr.ActionContext{
+		Vars: map[string]string{"a": baseJSON, "b": candJSON},
+		Log:  func(string, ...interface{}) {},
+	}
+
+	act := tr.Action{Params: map[string]string{
+		"a_var": "a", "b_var": "b", "metric": "lat_p99_us",
+		"gate": "0.9", "warn_gate": "0.8",
+	}}
+	result, err := handler.Execute(context.Background(), actx, act)
+	if err != nil {
+		t.Fatalf("expected WARN success for latency in warn band: %v", err)
+	}
+	if !strings.HasPrefix(result["value"], "WARN:") {
+		t.Fatalf("value = %q, want WARN: prefix", result["value"])
+	}
+}
+
+// ============================================================
+// TargetSpec sanitization (Finding 3)
+// ============================================================
+
+func TestTargetSpec_NQN_Sanitized(t *testing.T) {
+	spec := tr.TargetSpec{NQNSuffix: "My_Volume"}
+	got := spec.NQN()
+	want := "nqn.2024-01.com.seaweedfs:vol.my-volume"
+	if got != want {
+		t.Fatalf("NQN() = %q, want %q (sanitized)", got, want)
+	}
+}
+
+func TestTargetSpec_IQN_Sanitized(t *testing.T) {
+	spec := tr.TargetSpec{IQNSuffix: "My_Volume"}
+	got := spec.IQN()
+	want := "iqn.2024.com.seaweedfs:my-volume"
+	if got != want {
+		t.Fatalf("IQN() = %q, want %q (sanitized)", got, want)
+	}
+}
+
+func TestTargetSpec_NQN_LongNameTruncated(t *testing.T) {
+	long := strings.Repeat("a", 100)
+	spec := tr.TargetSpec{NQNSuffix: long}
+	got := spec.NQN()
+	// SanitizeIQN truncates to 64 chars with hash suffix.
+	prefix := "nqn.2024-01.com.seaweedfs:vol."
+	suffix := got[len(prefix):]
+	if len(suffix) > 64 {
+		t.Fatalf("suffix len = %d, want <= 64", len(suffix))
+	}
+}
+
+// ============================================================
+// paramDefault helper
+// ============================================================
+
+func TestParamDefault(t *testing.T) {
+	params := map[string]string{"key": "val"}
+	if got := paramDefault(params, "key", "def"); got != "val" {
+		t.Fatalf("got %q, want val", got)
+	}
+	if got := paramDefault(params, "missing", "def"); got != "def" {
+		t.Fatalf("got %q, want def", got)
+	}
+	if got := paramDefault(nil, "key", "def"); got != "def" {
+		t.Fatalf("got %q, want def", got)
+	}
+}
diff --git a/weed/storage/blockvol/testrunner/actions/register.go b/weed/storage/blockvol/testrunner/actions/register.go
index ee9f7b6d9..bd3e862ad 100644
--- a/weed/storage/blockvol/testrunner/actions/register.go
+++ b/weed/storage/blockvol/testrunner/actions/register.go
@@ -6,11 +6,14 @@ import tr "github.com/seaweedfs/seaweedfs/weed/storage/blockvol/testrunner"
 func RegisterAll(r *tr.Registry) {
 	RegisterBlockActions(r)
 	RegisterISCSIActions(r)
+	RegisterNVMeActions(r)
 	RegisterIOActions(r)
 	RegisterFaultActions(r)
 	RegisterSystemActions(r)
 	RegisterMetricsActions(r)
+	RegisterBenchActions(r)
 	RegisterDevOpsActions(r)
 	RegisterSnapshotActions(r)
 	RegisterDatabaseActions(r)
+	RegisterK8sActions(r)
 }
diff --git a/weed/storage/blockvol/testrunner/agent.go b/weed/storage/blockvol/testrunner/agent.go
index a6845e6c5..c4d896130 100644
--- a/weed/storage/blockvol/testrunner/agent.go
+++ b/weed/storage/blockvol/testrunner/agent.go
@@ -397,15 +397,19 @@ func (a *Agent) executePhase(ctx context.Context, req *PhaseRequest) PhaseRespon
 				continue
 			}
 			htSpec := infra.HATargetSpec{
-				VolSize:         tgtSpec.VolSize,
-				WALSize:         tgtSpec.WALSize,
-				IQN:             tgtSpec.IQN(),
-				ISCSIPort:       tgtSpec.ISCSIPort,
-				AdminPort:       tgtSpec.AdminPort,
-				ReplicaDataPort: tgtSpec.ReplicaDataPort,
-				ReplicaCtrlPort: tgtSpec.ReplicaCtrlPort,
-				RebuildPort:     tgtSpec.RebuildPort,
-				TPGID:           tgtSpec.TPGID,
+				VolSize:             tgtSpec.VolSize,
+				WALSize:             tgtSpec.WALSize,
+				IQN:                 tgtSpec.IQN(),
+				ISCSIPort:           tgtSpec.ISCSIPort,
+				AdminPort:           tgtSpec.AdminPort,
+				ReplicaDataPort:     tgtSpec.ReplicaDataPort,
+				ReplicaCtrlPort:     tgtSpec.ReplicaCtrlPort,
+				RebuildPort:         tgtSpec.RebuildPort,
+				TPGID:               tgtSpec.TPGID,
+				NvmePort:            tgtSpec.NvmePort,
+				NQN:                 tgtSpec.NQN(),
+				MaxConcurrentWrites: tgtSpec.MaxConcurrentWrites,
+				NvmeIOQueues:        tgtSpec.NvmeIOQueues,
 			}
 			actx.Targets[tgtName] = infra.NewHATargetFromSpec(nativeNode, tgtName, htSpec)
 		}
diff --git a/weed/storage/blockvol/testrunner/cmd/sw-test-runner/main.go b/weed/storage/blockvol/testrunner/cmd/sw-test-runner/main.go
index 5026274f7..e4b3cc736 100644
--- a/weed/storage/blockvol/testrunner/cmd/sw-test-runner/main.go
+++ b/weed/storage/blockvol/testrunner/cmd/sw-test-runner/main.go
@@ -429,7 +429,7 @@ func listCmd() {
 	}
 
 	byTier := registry.ListByTier()
-	tierOrder := []string{tr.TierCore, tr.TierBlock, tr.TierDevOps, tr.TierChaos}
+	tierOrder := []string{tr.TierCore, tr.TierBlock, tr.TierDevOps, tr.TierChaos, actions.TierK8s}
 
 	fmt.Println("Registered actions:")
 	for _, tier := range tierOrder {
@@ -485,15 +485,19 @@ func setupActionContext(s *tr.Scenario, logFunc func(string, ...interface{})) (*
 			return nil, fmt.Errorf("target %s: node %s is not infra.Node", name, spec.Node)
 		}
 		htSpec := infra.HATargetSpec{
-			VolSize:         spec.VolSize,
-			WALSize:         spec.WALSize,
-			IQN:             spec.IQN(),
-			ISCSIPort:       spec.ISCSIPort,
-			AdminPort:       spec.AdminPort,
-			ReplicaDataPort: spec.ReplicaDataPort,
-			ReplicaCtrlPort: spec.ReplicaCtrlPort,
-			RebuildPort:     spec.RebuildPort,
-			TPGID:           spec.TPGID,
+			VolSize:             spec.VolSize,
+			WALSize:             spec.WALSize,
+			IQN:                 spec.IQN(),
+			ISCSIPort:           spec.ISCSIPort,
+			AdminPort:           spec.AdminPort,
+			ReplicaDataPort:     spec.ReplicaDataPort,
+			ReplicaCtrlPort:     spec.ReplicaCtrlPort,
+			RebuildPort:         spec.RebuildPort,
+			TPGID:               spec.TPGID,
+			NvmePort:            spec.NvmePort,
+			NQN:                 spec.NQN(),
+			MaxConcurrentWrites: spec.MaxConcurrentWrites,
+			NvmeIOQueues:        spec.NvmeIOQueues,
 		}
 		ht := infra.NewHATargetFromSpec(node, name, htSpec)
 		actx.Targets[name] = ht
diff --git a/weed/storage/blockvol/testrunner/engine.go b/weed/storage/blockvol/testrunner/engine.go
index dcdd1eeeb..a8c50a941 100644
--- a/weed/storage/blockvol/testrunner/engine.go
+++ b/weed/storage/blockvol/testrunner/engine.go
@@ -3,7 +3,10 @@ package testrunner
 import (
 	"context"
 	"fmt"
+	"math"
 	"regexp"
+	"sort"
+	"strconv"
 	"strings"
 	"sync"
 	"time"
@@ -67,6 +70,13 @@ func (e *Engine) Run(ctx context.Context, s *Scenario, actx *ActionContext) *Sce
 		if count <= 0 {
 			count = 1
 		}
+
+		// Collect save_as values across iterations for aggregation.
+		var iterValues map[string][]float64
+		if count > 1 && phase.Aggregate != "none" {
+			iterValues = make(map[string][]float64)
+		}
+
 		for iter := 1; iter <= count; iter++ {
 			iterPhase := phase
 			if phase.Repeat > 1 {
@@ -74,6 +84,20 @@ func (e *Engine) Run(ctx context.Context, s *Scenario, actx *ActionContext) *Sce
 			}
 			pr := e.runPhase(ctx, actx, iterPhase)
 			result.Phases = append(result.Phases, pr)
+
+			// Collect numeric save_as values for aggregation.
+			if iterValues != nil {
+				for _, act := range phase.Actions {
+					if act.SaveAs != "" {
+						if v, ok := actx.Vars[act.SaveAs]; ok {
+							if f, err := strconv.ParseFloat(strings.TrimSpace(v), 64); err == nil {
+								iterValues[act.SaveAs] = append(iterValues[act.SaveAs], f)
+							}
+						}
+					}
+				}
+			}
+
 			if pr.Status == StatusFail {
 				failed = true
 				result.Status = StatusFail
@@ -81,14 +105,64 @@ func (e *Engine) Run(ctx context.Context, s *Scenario, actx *ActionContext) *Sce
 				break
 			}
 		}
+
+		// Aggregate collected values across iterations.
+		if iterValues != nil && !failed {
+			trimPct := phase.TrimPct
+			// 0 means no trimming (explicit or default). Only auto-default
+			// when repeat >= 5 and trim_pct was not set.
+			if trimPct == 0 && count >= 5 {
+				trimPct = 20
+			}
+			agg := phase.Aggregate
+			if agg == "" {
+				agg = "median" // default aggregation method
+			}
+			for varName, values := range iterValues {
+				if len(values) < 2 {
+					continue
+				}
+				trimmed := trimOutliers(values, trimPct)
+				stats := ComputeStats(trimmed)
+
+				// Store aggregate results as vars.
+				switch agg {
+				case "median":
+					actx.Vars[varName] = strconv.FormatFloat(stats.P50, 'f', 2, 64)
+				case "mean":
+					actx.Vars[varName] = strconv.FormatFloat(stats.Mean, 'f', 2, 64)
+				}
+				actx.Vars[varName+"_median"] = strconv.FormatFloat(stats.P50, 'f', 2, 64)
+				actx.Vars[varName+"_mean"] = strconv.FormatFloat(stats.Mean, 'f', 2, 64)
+				actx.Vars[varName+"_stddev"] = strconv.FormatFloat(stats.StdDev, 'f', 2, 64)
+				actx.Vars[varName+"_min"] = strconv.FormatFloat(stats.Min, 'f', 2, 64)
+				actx.Vars[varName+"_max"] = strconv.FormatFloat(stats.Max, 'f', 2, 64)
+				actx.Vars[varName+"_n"] = strconv.Itoa(stats.Count)
+
+				// Store all raw values as comma-separated string.
+				parts := make([]string, len(values))
+				for i, v := range values {
+					parts[i] = strconv.FormatFloat(v, 'f', 2, 64)
+				}
+				actx.Vars[varName+"_all"] = strings.Join(parts, ",")
+
+				e.log("  [aggregate] %s: n=%d median=%.2f mean=%.2f stddev=%.2f (trimmed %d%% from %d samples)",
+					varName, stats.Count, stats.P50, stats.Mean, stats.StdDev, trimPct, len(values))
+			}
+		}
+
 		if failed {
 			break
 		}
 	}
 
-	// Always-phases run regardless of failure.
+	// Always-phases run regardless of failure, with a fresh 60s context
+	// so they can complete even if the main context was canceled.
+	cleanupCtx := context.Background()
+	cleanupCtx, cleanupCancel := context.WithTimeout(cleanupCtx, 60*time.Second)
+	defer cleanupCancel()
 	for _, phase := range alwaysPhases {
-		pr := e.runPhase(ctx, actx, phase)
+		pr := e.runPhase(cleanupCtx, actx, phase)
 		result.Phases = append(result.Phases, pr)
 	}
 
@@ -310,3 +384,23 @@ func marshalActionYAML(act Action) string {
 	}
 	return string(data)
 }
+
+// trimOutliers removes the top and bottom pct% of values.
+// E.g. pct=20 on 10 values removes the 2 lowest and 2 highest, returning 6.
+// Returns a copy; does not modify the input.
+func trimOutliers(values []float64, pct int) []float64 {
+	if len(values) <= 2 || pct <= 0 {
+		return values
+	}
+	sorted := make([]float64, len(values))
+	copy(sorted, values)
+	sort.Float64s(sorted)
+
+	trim := int(math.Round(float64(len(sorted)) * float64(pct) / 100.0))
+	if trim*2 >= len(sorted) {
+		// Can't trim more than half from each end; keep at least 1.
+		trim = (len(sorted) - 1) / 2
+	}
+	return sorted[trim : len(sorted)-trim]
+}
+
diff --git a/weed/storage/blockvol/testrunner/engine_test.go b/weed/storage/blockvol/testrunner/engine_test.go
index 4eaefcc4e..468be65af 100644
--- a/weed/storage/blockvol/testrunner/engine_test.go
+++ b/weed/storage/blockvol/testrunner/engine_test.go
@@ -558,6 +558,285 @@ func TestEngine_RepeatFailStopsEarly(t *testing.T) {
 	}
 }
 
+func TestEngine_RepeatAggregateMedian(t *testing.T) {
+	registry := NewRegistry()
+
+	iter := 0
+	values := []string{"100", "200", "150", "180", "170"}
+	step := ActionHandlerFunc(func(ctx context.Context, actx *ActionContext, act Action) (map[string]string, error) {
+		v := values[iter]
+		iter++
+		return map[string]string{"value": v}, nil
+	})
+	registry.Register("step", TierCore, step)
+
+	scenario := &Scenario{
+		Name:    "aggregate-test",
+		Timeout: Duration{5 * time.Second},
+		Phases: []Phase{
+			{
+				Name:      "bench",
+				Repeat:    5,
+				Aggregate: "median",
+				TrimPct:   20,
+				Actions: []Action{
+					{Action: "step", SaveAs: "iops"},
+				},
+			},
+		},
+	}
+
+	engine := NewEngine(registry, nil)
+	actx := &ActionContext{
+		Scenario: scenario,
+		Vars:     make(map[string]string),
+		Log:      func(string, ...interface{}) {},
+	}
+	result := engine.Run(context.Background(), scenario, actx)
+
+	if result.Status != StatusPass {
+		t.Fatalf("status = %s: %s", result.Status, result.Error)
+	}
+	if iter != 5 {
+		t.Fatalf("step called %d times, want 5", iter)
+	}
+
+	// Verify aggregated vars exist.
+	if v := actx.Vars["iops_median"]; v == "" {
+		t.Fatal("iops_median not set")
+	}
+	if v := actx.Vars["iops_mean"]; v == "" {
+		t.Fatal("iops_mean not set")
+	}
+	if v := actx.Vars["iops_all"]; v == "" {
+		t.Fatal("iops_all not set")
+	}
+	if v := actx.Vars["iops_n"]; v == "" {
+		t.Fatal("iops_n not set")
+	}
+
+	// The primary var should be overwritten with the median.
+	// Values: [100, 200, 150, 180, 170], trim 20% = remove 1 from each end
+	// Sorted: [100, 150, 170, 180, 200], trimmed: [150, 170, 180]
+	// Median of [150, 170, 180] = 170
+	if actx.Vars["iops"] != "170.00" {
+		t.Errorf("iops = %q, want 170.00 (median after trim)", actx.Vars["iops"])
+	}
+}
+
+func TestEngine_RepeatAggregateMean(t *testing.T) {
+	registry := NewRegistry()
+
+	iter := 0
+	values := []string{"100", "200", "150", "180", "170"}
+	step := ActionHandlerFunc(func(ctx context.Context, actx *ActionContext, act Action) (map[string]string, error) {
+		v := values[iter]
+		iter++
+		return map[string]string{"value": v}, nil
+	})
+	registry.Register("step", TierCore, step)
+
+	scenario := &Scenario{
+		Name:    "aggregate-mean-test",
+		Timeout: Duration{5 * time.Second},
+		Phases: []Phase{
+			{
+				Name:      "bench",
+				Repeat:    5,
+				Aggregate: "mean",
+				TrimPct:   20,
+				Actions: []Action{
+					{Action: "step", SaveAs: "iops"},
+				},
+			},
+		},
+	}
+
+	engine := NewEngine(registry, nil)
+	actx := &ActionContext{
+		Scenario: scenario,
+		Vars:     make(map[string]string),
+		Log:      func(string, ...interface{}) {},
+	}
+	result := engine.Run(context.Background(), scenario, actx)
+
+	if result.Status != StatusPass {
+		t.Fatalf("status = %s: %s", result.Status, result.Error)
+	}
+
+	// Trimmed: [150, 170, 180], mean = 166.67
+	if actx.Vars["iops"] != "166.67" {
+		t.Errorf("iops = %q, want 166.67 (mean after trim)", actx.Vars["iops"])
+	}
+}
+
+func TestEngine_RepeatAggregateNone(t *testing.T) {
+	registry := NewRegistry()
+
+	iter := 0
+	step := ActionHandlerFunc(func(ctx context.Context, actx *ActionContext, act Action) (map[string]string, error) {
+		iter++
+		return map[string]string{"value": fmt.Sprintf("%d", iter*100)}, nil
+	})
+	registry.Register("step", TierCore, step)
+
+	scenario := &Scenario{
+		Name:    "aggregate-none-test",
+		Timeout: Duration{5 * time.Second},
+		Phases: []Phase{
+			{
+				Name:      "bench",
+				Repeat:    3,
+				Aggregate: "none",
+				Actions: []Action{
+					{Action: "step", SaveAs: "iops"},
+				},
+			},
+		},
+	}
+
+	engine := NewEngine(registry, nil)
+	actx := &ActionContext{
+		Scenario: scenario,
+		Vars:     make(map[string]string),
+		Log:      func(string, ...interface{}) {},
+	}
+	result := engine.Run(context.Background(), scenario, actx)
+
+	if result.Status != StatusPass {
+		t.Fatalf("status = %s: %s", result.Status, result.Error)
+	}
+
+	// With aggregate: none, the var should hold the last iteration's value.
+	if actx.Vars["iops"] != "300" {
+		t.Errorf("iops = %q, want 300 (last iteration, no aggregation)", actx.Vars["iops"])
+	}
+	// And no aggregate vars should be set.
+	if _, ok := actx.Vars["iops_median"]; ok {
+		t.Error("iops_median should not be set with aggregate: none")
+	}
+}
+
+func TestTrimOutliers(t *testing.T) {
+	tests := []struct {
+		name   string
+		values []float64
+		pct    int
+		want   int // expected length after trim
+	}{
+		{"5 values trim 20%", []float64{1, 2, 3, 4, 5}, 20, 3},
+		{"10 values trim 10%", []float64{1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, 10, 8},
+		{"3 values trim 20%", []float64{1, 2, 3}, 20, 1},
+		{"2 values no trim", []float64{1, 2}, 20, 2},
+		{"empty no trim", []float64{}, 20, 0},
+		{"no trim pct 0", []float64{1, 2, 3, 4, 5}, 0, 5},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			got := trimOutliers(tt.values, tt.pct)
+			if len(got) != tt.want {
+				t.Errorf("trimOutliers(%v, %d) len = %d, want %d", tt.values, tt.pct, len(got), tt.want)
+			}
+		})
+	}
+}
+
+// TestParse_InlineParams verifies that YAML fields not in the Action struct
+// are captured into Params via the inline tag. This is a regression test for
+// the snapshot-stress failure where `id: "1"` was not captured.
+func TestParse_InlineParams(t *testing.T) {
+	yaml := `
+name: inline-test
+timeout: 5m
+topology:
+  nodes:
+    node1:
+      host: "127.0.0.1"
+      is_local: true
+targets:
+  primary:
+    node: node1
+    iscsi_port: 3260
+    admin_port: 8080
+    iqn_suffix: test-primary
+phases:
+  - name: test_phase
+    actions:
+      - action: snapshot_create
+        target: primary
+        id: "42"
+      - action: dd_write
+        node: node1
+        device: "/dev/sda"
+        bs: 4k
+        count: "10"
+      - action: kubectl_apply
+        node: node1
+        file: "/tmp/cr.yaml"
+        namespace: "sw-block"
+`
+
+	s, err := Parse([]byte(yaml))
+	if err != nil {
+		t.Fatalf("parse: %v", err)
+	}
+
+	// Verify inline params are captured for each action type.
+	phase := s.Phases[0]
+
+	// snapshot_create: id should be in Params
+	snapAct := phase.Actions[0]
+	if snapAct.Params["id"] != "42" {
+		t.Errorf("snapshot_create: id = %q, want %q (inline param not captured)",
+			snapAct.Params["id"], "42")
+	}
+
+	// dd_write: device, bs, count should be in Params
+	ddAct := phase.Actions[1]
+	if ddAct.Params["device"] != "/dev/sda" {
+		t.Errorf("dd_write: device = %q, want /dev/sda", ddAct.Params["device"])
+	}
+	if ddAct.Params["bs"] != "4k" {
+		t.Errorf("dd_write: bs = %q, want 4k", ddAct.Params["bs"])
+	}
+	if ddAct.Params["count"] != "10" {
+		t.Errorf("dd_write: count = %q, want 10", ddAct.Params["count"])
+	}
+
+	// kubectl_apply: file, namespace should be in Params
+	k8sAct := phase.Actions[2]
+	if k8sAct.Params["file"] != "/tmp/cr.yaml" {
+		t.Errorf("kubectl_apply: file = %q, want /tmp/cr.yaml", k8sAct.Params["file"])
+	}
+	if k8sAct.Params["namespace"] != "sw-block" {
+		t.Errorf("kubectl_apply: namespace = %q, want sw-block", k8sAct.Params["namespace"])
+	}
+}
+
+// TestResolveAction_PreservesInlineParams verifies that resolveAction doesn't
+// lose inline params when copying the action.
+func TestResolveAction_PreservesInlineParams(t *testing.T) {
+	act := Action{
+		Action: "snapshot_create",
+		Target: "primary",
+		Params: map[string]string{
+			"id":     "5",
+			"device": "{{ dev }}",
+		},
+	}
+
+	vars := map[string]string{"dev": "/dev/sdb"}
+	resolved := resolveAction(act, vars)
+
+	if resolved.Params["id"] != "5" {
+		t.Errorf("id = %q, want 5", resolved.Params["id"])
+	}
+	if resolved.Params["device"] != "/dev/sdb" {
+		t.Errorf("device = %q, want /dev/sdb (should resolve var)", resolved.Params["device"])
+	}
+}
+
 func TestEngine_CleanupVars(t *testing.T) {
 	registry := NewRegistry()
 
@@ -609,3 +888,58 @@ func TestEngine_CleanupVars(t *testing.T) {
 		t.Errorf("result = %q", actx.Vars["result"])
 	}
 }
+
+func TestParse_AggregateValidation(t *testing.T) {
+	base := `
+name: validate-test
+timeout: 5m
+topology:
+  nodes:
+    node1:
+      host: "127.0.0.1"
+      is_local: true
+targets:
+  primary:
+    node: node1
+    iscsi_port: 3260
+    admin_port: 8080
+    iqn_suffix: test
+phases:
+  - name: bench
+    repeat: 5
+    aggregate: "%s"
+    trim_pct: %d
+    actions:
+      - action: exec
+        node: node1
+        cmd: "echo 1"
+`
+
+	tests := []struct {
+		name      string
+		aggregate string
+		trimPct   int
+		wantErr   bool
+	}{
+		{"valid median", "median", 20, false},
+		{"valid mean", "mean", 10, false},
+		{"valid none", "none", 0, false},
+		{"valid empty", "", 0, false},
+		{"invalid aggregate", "invalid", 0, true},
+		{"trim_pct too high", "median", 50, true},
+		{"trim_pct negative", "median", -1, true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			yaml := fmt.Sprintf(base, tt.aggregate, tt.trimPct)
+			_, err := Parse([]byte(yaml))
+			if tt.wantErr && err == nil {
+				t.Error("expected error")
+			}
+			if !tt.wantErr && err != nil {
+				t.Errorf("unexpected error: %v", err)
+			}
+		})
+	}
+}
diff --git a/weed/storage/blockvol/testrunner/infra/fault.go b/weed/storage/blockvol/testrunner/infra/fault.go
index 0012da98f..0b2d052d8 100644
--- a/weed/storage/blockvol/testrunner/infra/fault.go
+++ b/weed/storage/blockvol/testrunner/infra/fault.go
@@ -23,7 +23,7 @@ func InjectNetem(ctx context.Context, node *Node, targetIP string, delayMs int)
 		return "", fmt.Errorf("tc qdisc add: code=%d stderr=%s err=%v", code, stderr, err)
 	}
 
-	cleanupCmd = fmt.Sprintf("tc qdisc del dev %s root 2>/dev/null", iface)
+	cleanupCmd = fmt.Sprintf("tc qdisc del dev %s root 2>/dev/null || true", iface)
 	return cleanupCmd, nil
 }
 
@@ -120,6 +120,8 @@ func CorruptWALRegion(ctx context.Context, node *Node, volPath string, nBytes in
 }
 
 // ClearFault executes a cleanup command stored in vars.
+// Tolerates non-zero exit codes since cleanup commands are often
+// idempotent (e.g. removing an already-removed iptables rule).
 func ClearFault(ctx context.Context, node *Node, cleanupCmd string) error {
 	if cleanupCmd == "" {
 		return nil
@@ -127,8 +129,10 @@ func ClearFault(ctx context.Context, node *Node, cleanupCmd string) error {
 	cctx, cancel := context.WithTimeout(ctx, 10*time.Second)
 	defer cancel()
 	_, stderr, code, err := node.RunRoot(cctx, cleanupCmd)
-	if err != nil || code != 0 {
+	if err != nil {
 		return fmt.Errorf("clear fault: code=%d stderr=%s err=%v", code, stderr, err)
 	}
+	// Non-zero exit is tolerated — cleanup commands use "|| true" but
+	// legacy cleanup strings might not, and double-cleanup is harmless.
 	return nil
 }
diff --git a/weed/storage/blockvol/testrunner/infra/ha_target.go b/weed/storage/blockvol/testrunner/infra/ha_target.go
index 6452c5289..9b1436eaa 100644
--- a/weed/storage/blockvol/testrunner/infra/ha_target.go
+++ b/weed/storage/blockvol/testrunner/infra/ha_target.go
@@ -17,6 +17,10 @@ type HATarget struct {
 	ReplicaCtrl int // replica receiver ctrl port
 	RebuildPort int
 	TPGID       int // ALUA target port group ID (0 = omit flag)
+	NvmePort             int // NVMe/TCP listen port (0 = disabled)
+	NQN                  string // NVMe NQN (auto-derived from IQN if empty)
+	MaxConcurrentWrites  int // WAL max concurrent writes (0 = default 16)
+	NvmeIOQueues         int // NVMe max IO queues (0 = default 4)
 }
 
 // StatusResp matches the JSON returned by GET /status.
@@ -60,7 +64,11 @@ type HATargetSpec struct {
 	ReplicaDataPort int
 	ReplicaCtrlPort int
 	RebuildPort     int
-	TPGID           int
+	TPGID                int
+	NvmePort             int
+	NQN                  string
+	MaxConcurrentWrites  int
+	NvmeIOQueues         int
 }
 
 // NewHATargetFromSpec creates an HATarget from an HATargetSpec and Node.
@@ -83,6 +91,10 @@ func NewHATargetFromSpec(node *Node, name string, spec HATargetSpec) *HATarget {
 
 	ht := NewHATarget(node, cfg, spec.AdminPort, spec.ReplicaDataPort, spec.ReplicaCtrlPort, spec.RebuildPort)
 	ht.TPGID = spec.TPGID
+	ht.NvmePort = spec.NvmePort
+	ht.NQN = spec.NQN
+	ht.MaxConcurrentWrites = spec.MaxConcurrentWrites
+	ht.NvmeIOQueues = spec.NvmeIOQueues
 
 	// Use unique file paths per target name.
 	ht.BinPath = "/tmp/iscsi-target-test"
@@ -93,6 +105,11 @@ func NewHATargetFromSpec(node *Node, name string, spec HATargetSpec) *HATarget {
 
 // Start overrides Target.Start to add HA-specific flags.
 func (h *HATarget) Start(ctx context.Context, create bool) error {
+	// Pre-flight: check if ports are already in use by another process.
+	if err := h.checkPortsFree(ctx); err != nil {
+		return err
+	}
+
 	// Remove old log
 	h.Node.Run(ctx, fmt.Sprintf("rm -f %s", h.LogFile))
 
@@ -100,8 +117,14 @@ func (h *HATarget) Start(ctx context.Context, create bool) error {
 		h.VolFile, h.Config.Port, h.Config.IQN)
 
 	if create {
+		if err := h.checkDiskSpace(ctx); err != nil {
+			return err
+		}
 		h.Node.Run(ctx, fmt.Sprintf("rm -f %s %s.wal", h.VolFile, h.VolFile))
 		args += fmt.Sprintf(" -create -size %s", h.Config.VolSize)
+		if h.Config.WALSize != "" {
+			args += fmt.Sprintf(" -wal-size %s", h.Config.WALSize)
+		}
 	}
 
 	if h.AdminPort > 0 {
@@ -116,6 +139,18 @@ func (h *HATarget) Start(ctx context.Context, create bool) error {
 	if h.TPGID > 0 {
 		args += fmt.Sprintf(" -tpg-id %d", h.TPGID)
 	}
+	if h.NvmePort > 0 {
+		args += fmt.Sprintf(" -nvme-addr :%d", h.NvmePort)
+		if h.NQN != "" {
+			args += fmt.Sprintf(" -nqn %s", h.NQN)
+		}
+	}
+	if h.MaxConcurrentWrites > 0 {
+		args += fmt.Sprintf(" -wal-max-concurrent-writes %d", h.MaxConcurrentWrites)
+	}
+	if h.NvmeIOQueues > 0 {
+		args += fmt.Sprintf(" -nvme-io-queues %d", h.NvmeIOQueues)
+	}
 
 	cmd := fmt.Sprintf("setsid -f %s %s >%s 2>&1", h.BinPath, args, h.LogFile)
 	_, stderr, code, err := h.Node.Run(ctx, cmd)
@@ -127,13 +162,7 @@ func (h *HATarget) Start(ctx context.Context, create bool) error {
 		return err
 	}
 
-	if h.AdminPort > 0 {
-		if err := h.waitForAdminPort(ctx); err != nil {
-			return err
-		}
-	}
-
-	// Discover PID by matching the unique volume file path.
+	// Discover PID early — needed for liveness check in waitForAdminPort.
 	stdout, _, _, _ := h.Node.Run(ctx, fmt.Sprintf("ps -eo pid,args | grep '%s' | grep -v grep | awk '{print $1}'", h.VolFile))
 	pidStr := strings.TrimSpace(stdout)
 	if idx := strings.IndexByte(pidStr, '\n'); idx > 0 {
@@ -145,6 +174,12 @@ func (h *HATarget) Start(ctx context.Context, create bool) error {
 		return fmt.Errorf("find ha target PID: %q", pidStr)
 	}
 	h.Pid = pid
+
+	if h.AdminPort > 0 {
+		if err := h.waitForAdminPort(ctx); err != nil {
+			return err
+		}
+	}
 	return nil
 }
 
@@ -152,9 +187,24 @@ func (h *HATarget) waitForAdminPort(ctx context.Context) error {
 	for {
 		select {
 		case <-ctx.Done():
-			return fmt.Errorf("wait for admin port %d: %w", h.AdminPort, ctx.Err())
+			// Collect last 20 lines of log for diagnostics.
+			logTail, _, _, _ := h.Node.Run(context.Background(),
+				fmt.Sprintf("tail -20 %s 2>/dev/null", h.LogFile))
+			return fmt.Errorf("wait for admin port %d: %w\nlast log:\n%s", h.AdminPort, ctx.Err(), logTail)
 		default:
 		}
+
+		// Check if our process is still alive — fail fast if it crashed.
+		if h.Pid > 0 {
+			_, _, code, _ := h.Node.Run(ctx, fmt.Sprintf("kill -0 %d 2>/dev/null", h.Pid))
+			if code != 0 {
+				logTail, _, _, _ := h.Node.Run(context.Background(),
+					fmt.Sprintf("tail -20 %s 2>/dev/null", h.LogFile))
+				return fmt.Errorf("target process %d died before admin port %d was ready\nlast log:\n%s",
+					h.Pid, h.AdminPort, logTail)
+			}
+		}
+
 		stdout, _, code, _ := h.Node.Run(ctx, fmt.Sprintf("ss -tln | grep :%d", h.AdminPort))
 		if code == 0 && strings.Contains(stdout, fmt.Sprintf(":%d", h.AdminPort)) {
 			return nil
@@ -163,6 +213,63 @@ func (h *HATarget) waitForAdminPort(ctx context.Context) error {
 	}
 }
 
+// checkPortsFree verifies required ports are not already in use by another process.
+func (h *HATarget) checkPortsFree(ctx context.Context) error {
+	ports := []struct {
+		port int
+		name string
+	}{
+		{h.Config.Port, "iSCSI"},
+	}
+	if h.AdminPort > 0 {
+		ports = append(ports, struct {
+			port int
+			name string
+		}{h.AdminPort, "admin"})
+	}
+	if h.ReplicaData > 0 {
+		ports = append(ports, struct {
+			port int
+			name string
+		}{h.ReplicaData, "replica-data"})
+	}
+	if h.ReplicaCtrl > 0 {
+		ports = append(ports, struct {
+			port int
+			name string
+		}{h.ReplicaCtrl, "replica-ctrl"})
+	}
+	if h.RebuildPort > 0 {
+		ports = append(ports, struct {
+			port int
+			name string
+		}{h.RebuildPort, "rebuild"})
+	}
+	if h.NvmePort > 0 {
+		ports = append(ports, struct {
+			port int
+			name string
+		}{h.NvmePort, "nvme"})
+	}
+
+	for _, p := range ports {
+		stdout, _, code, _ := h.Node.Run(ctx, fmt.Sprintf("ss -tln | grep ':%d '", p.port))
+		if code == 0 && strings.TrimSpace(stdout) != "" {
+			// Port is in use — find what owns it.
+			owner, _, _, _ := h.Node.Run(ctx, fmt.Sprintf(
+				"ss -tlnp | grep ':%d ' | head -1", p.port))
+			return fmt.Errorf("port %d (%s) already in use on %s: %s",
+				p.port, p.name, h.Node.Host, strings.TrimSpace(owner))
+		}
+	}
+	return nil
+}
+
+// checkDiskSpace verifies the target node has enough disk space for the volume + WAL.
+func (h *HATarget) checkDiskSpace(ctx context.Context) error {
+	return CheckDiskSpace(ctx, h.Node, h.VolFile, h.Config.VolSize, h.Config.WALSize)
+}
+
 // curlPost executes a POST via curl on the node.
 func (h *HATarget) curlPost(ctx context.Context, path string, body interface{}) (int, string, error) {
 	data, err := json.Marshal(body)
diff --git a/weed/storage/blockvol/testrunner/infra/node.go b/weed/storage/blockvol/testrunner/infra/node.go
index a633868d0..0e4dc4bfa 100644
--- a/weed/storage/blockvol/testrunner/infra/node.go
+++ b/weed/storage/blockvol/testrunner/infra/node.go
@@ -8,6 +8,7 @@ import (
 	"net"
 	"os"
 	"os/exec"
+	"runtime"
 	"strings"
 	"sync"
 	"time"
@@ -94,7 +95,12 @@ func (n *Node) runNative(ctx context.Context, cmd string) (string, string, int,
 }
 
 func (n *Node) runLocal(ctx context.Context, cmd string) (string, string, int, error) {
-	c := exec.CommandContext(ctx, "wsl", "-e", "bash", "-c", cmd)
+	var c *exec.Cmd
+	if runtime.GOOS == "windows" {
+		c = exec.CommandContext(ctx, "wsl", "-e", "bash", "-c", cmd)
+	} else {
+		c = exec.CommandContext(ctx, "bash", "-c", cmd)
+	}
 	var outBuf, errBuf bytes.Buffer
 	c.Stdout = &outBuf
 	c.Stderr = &errBuf
@@ -166,8 +172,11 @@ func (n *Node) Upload(local, remote string) error {
 	if n.IsLocal {
 		ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
 		defer cancel()
-		wslLocal := ToWSLPath(local)
-		_, stderr, code, err := n.Run(ctx, fmt.Sprintf("cp %s %s && chmod +x %s", wslLocal, remote, remote))
+		src := local
+		if runtime.GOOS == "windows" {
+			src = ToWSLPath(local)
+		}
+		_, stderr, code, err := n.Run(ctx, fmt.Sprintf("cp %s %s && chmod +x %s", src, remote, remote))
 		if err != nil || code != 0 {
 			return fmt.Errorf("local upload: code=%d stderr=%s err=%v", code, stderr, err)
 		}
@@ -226,8 +235,11 @@ func (n *Node) Download(remote, local string) error {
 	if n.IsLocal {
 		ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
 		defer cancel()
-		wslLocal := ToWSLPath(local)
-		_, stderr, code, err := n.Run(ctx, fmt.Sprintf("cp %s %s", remote, wslLocal))
+		dst := local
+		if runtime.GOOS == "windows" {
+			dst = ToWSLPath(local)
+		}
+		_, stderr, code, err := n.Run(ctx, fmt.Sprintf("cp %s %s", remote, dst))
 		if err != nil || code != 0 {
 			return fmt.Errorf("local download: code=%d stderr=%s err=%v", code, stderr, err)
 		}
@@ -305,7 +317,12 @@ func (n *Node) StreamRun(ctx context.Context, cmd string, w io.Writer) error {
 		return c.Run()
 	}
 	if n.IsLocal {
-		c := exec.CommandContext(ctx, "wsl", "-e", "bash", "-c", cmd)
+		var c *exec.Cmd
+		if runtime.GOOS == "windows" {
+			c = exec.CommandContext(ctx, "wsl", "-e", "bash", "-c", cmd)
+		} else {
+			c = exec.CommandContext(ctx, "bash", "-c", cmd)
+		}
 		c.Stdout = w
 		c.Stderr = w
 		return c.Run()
diff --git a/weed/storage/blockvol/testrunner/infra/target.go b/weed/storage/blockvol/testrunner/infra/target.go
index 73782677b..2964fe5e6 100644
--- a/weed/storage/blockvol/testrunner/infra/target.go
+++ b/weed/storage/blockvol/testrunner/infra/target.go
@@ -80,6 +80,14 @@ func (t *Target) Deploy(localBin string) error {
 
 // Start launches the target process. If create is true, a new volume is created.
 func (t *Target) Start(ctx context.Context, create bool) error {
+	// Pre-flight: check if iSCSI port is already in use.
+	stdout, _, code, _ := t.Node.Run(ctx, fmt.Sprintf("ss -tln | grep ':%d '", t.Config.Port))
+	if code == 0 && strings.TrimSpace(stdout) != "" {
+		owner, _, _, _ := t.Node.Run(ctx, fmt.Sprintf("ss -tlnp | grep ':%d ' | head -1", t.Config.Port))
+		return fmt.Errorf("port %d already in use on %s: %s",
+			t.Config.Port, t.Node.Host, strings.TrimSpace(owner))
+	}
+
 	// Remove old log
 	t.Node.Run(ctx, fmt.Sprintf("rm -f %s", t.LogFile))
 
@@ -87,8 +95,14 @@ func (t *Target) Start(ctx context.Context, create bool) error {
 		t.VolFile, t.Config.Port, t.Config.IQN)
 
 	if create {
+		if err := CheckDiskSpace(ctx, t.Node, t.VolFile, t.Config.VolSize, t.Config.WALSize); err != nil {
+			return err
+		}
 		t.Node.Run(ctx, fmt.Sprintf("rm -f %s %s.wal", t.VolFile, t.VolFile))
 		args += fmt.Sprintf(" -create -size %s", t.Config.VolSize)
+		if t.Config.WALSize != "" {
+			args += fmt.Sprintf(" -wal-size %s", t.Config.WALSize)
+		}
 	}
 
 	cmd := fmt.Sprintf("setsid -f %s %s >%s 2>&1", t.BinPath, args, t.LogFile)
@@ -102,7 +116,7 @@ func (t *Target) Start(ctx context.Context, create bool) error {
 	}
 
 	// Discover PID by matching the binary name
-	stdout, _, _, _ := t.Node.Run(ctx, fmt.Sprintf("ps -eo pid,args | grep '%s' | grep -v grep | awk '{print $1}'", t.BinPath))
+	stdout, _, _, _ = t.Node.Run(ctx, fmt.Sprintf("ps -eo pid,args | grep '%s' | grep -v grep | awk '{print $1}'", t.BinPath))
 	pidStr := strings.TrimSpace(stdout)
 	if idx := strings.IndexByte(pidStr, '\n'); idx > 0 {
 		pidStr = pidStr[:idx]
@@ -194,3 +208,65 @@ func (t *Target) PID() int { return t.Pid }
 
 // VolFilePath returns the remote volume file path.
 func (t *Target) VolFilePath() string { return t.VolFile }
+
+// CheckDiskSpace verifies a node has enough space for a volume + WAL.
+// volSize/walSize are human-readable strings like "100M", "64M".
+func CheckDiskSpace(ctx context.Context, node *Node, volFile, volSize, walSize string) error {
+	// Parse sizes to MB.
+	volMB := parseSizeMB(volSize)
+	walMB := parseSizeMB(walSize)
+	if walMB == 0 {
+		walMB = 64 // default WAL
+	}
+	neededMB := volMB + walMB + 50 // headroom for metadata/journal
+
+	// Get available space on the directory containing the volume file.
+	dir := volFile
+	if idx := strings.LastIndex(dir, "/"); idx > 0 {
+		dir = dir[:idx]
+	}
+	stdout, _, code, _ := node.Run(ctx, fmt.Sprintf("df -BM %s 2>/dev/null | tail -1 | awk '{print $4}'", dir))
+	if code != 0 {
+		return nil // can't check, proceed anyway
+	}
+	availStr := strings.TrimSpace(stdout)
+	availStr = strings.TrimSuffix(availStr, "M")
+	availMB, err := strconv.Atoi(availStr)
+	if err != nil {
+		return nil // can't parse, proceed anyway
+	}
+
+	if availMB < neededMB {
+		return fmt.Errorf("insufficient disk space on %s: %dMB available, need %dMB (vol=%s wal=%s + 50MB headroom)",
+			node.Host, availMB, neededMB, volSize, walSize)
+	}
+	return nil
+}
+
+// parseSizeMB parses a human-readable size string (e.g. "100M", "1G", "1073741824") to megabytes.
+// Raw numbers >= 1048576 are treated as bytes.
+func parseSizeMB(s string) int {
+	s = strings.TrimSpace(s)
+	if s == "" {
+		return 0
+	}
+	s = strings.ToUpper(s)
+	multiplier := 1
+	if strings.HasSuffix(s, "G") {
+		multiplier = 1024
+		s = strings.TrimSuffix(s, "G")
+	} else if strings.HasSuffix(s, "M") {
+		s = strings.TrimSuffix(s, "M")
+	} else if strings.HasSuffix(s, "K") {
+		s = strings.TrimSuffix(s, "K")
+		v, _ := strconv.Atoi(s)
+		return v / 1024
+	}
+	v, _ := strconv.Atoi(s)
+	result := v * multiplier
+	// Raw numbers >= 1MB are assumed to be in bytes.
+	if multiplier == 1 && result >= 1048576 {
+		return result / (1024 * 1024)
+	}
+	return result
+}
diff --git a/weed/storage/blockvol/testrunner/parser.go b/weed/storage/blockvol/testrunner/parser.go
index b0a89540c..1dd58d89b 100644
--- a/weed/storage/blockvol/testrunner/parser.go
+++ b/weed/storage/blockvol/testrunner/parser.go
@@ -91,6 +91,12 @@ func validate(s *Scenario) error {
 		if phase.Repeat < 0 || phase.Repeat > 100 {
 			return fmt.Errorf("phase %q: repeat must be 0..100 (got %d)", phase.Name, phase.Repeat)
 		}
+		if phase.TrimPct < 0 || phase.TrimPct > 49 {
+			return fmt.Errorf("phase %q: trim_pct must be 0..49 (got %d)", phase.Name, phase.TrimPct)
+		}
+		if phase.Aggregate != "" && phase.Aggregate != "median" && phase.Aggregate != "mean" && phase.Aggregate != "none" {
+			return fmt.Errorf("phase %q: aggregate must be 'median', 'mean', or 'none' (got %q)", phase.Name, phase.Aggregate)
+		}
 
 		// Validate save_as uniqueness within parallel phases.
 		if phase.Parallel {
diff --git a/weed/storage/blockvol/testrunner/scenarios/cp103-25g-ab.yaml b/weed/storage/blockvol/testrunner/scenarios/cp103-25g-ab.yaml
new file mode 100644
index 000000000..7b99b03db
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/cp103-25g-ab.yaml
@@ -0,0 +1,455 @@
+name: "CP10-3 25G A/B Benchmark: iSCSI vs NVMe (3-run median)"
+timeout: "45m"
+
+topology:
+  nodes:
+    server:
+      host: "10.0.0.3"
+      user: "testdev"
+      key: "/home/testdev/.ssh/id_ed25519"
+    client:
+      host: "10.0.0.1"
+      is_local: true
+
+targets:
+  primary:
+    node: server
+    vol_size: "1073741824"
+    wal_size: "536870912"
+    iscsi_port: 3263
+    nvme_port: 4420
+    admin_port: 8083
+    iqn_suffix: "bench-25g"
+    nqn_suffix: "bench-25g"
+
+phases:
+  # --- Setup ---
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: client
+        ignore_error: true
+      - action: kill_stale
+        node: server
+        ignore_error: true
+      - action: nvme_cleanup
+        node: client
+        ignore_error: true
+      - action: iscsi_cleanup
+        node: client
+        ignore_error: true
+      - action: start_target
+        target: primary
+        create: "true"
+
+  # =================================================================
+  # iSCSI fio benchmarks (3 runs, median)
+  # =================================================================
+  - name: iscsi-connect
+    actions:
+      - action: iscsi_login
+        target: primary
+        node: client
+        save_as: iscsi_device
+
+  - name: iscsi-fio
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      # 4K randwrite QD=1
+      - action: fio_json
+        node: client
+        device: "{{iscsi_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "30"
+        name: "iscsi-4k-rw-qd1"
+        save_as: _iscsi_fio_4k_rw_qd1
+      - action: fio_parse
+        json_var: _iscsi_fio_4k_rw_qd1
+        metric: iops
+        save_as: iscsi_4k_rw_qd1
+
+      # 4K randwrite QD=32
+      - action: fio_json
+        node: client
+        device: "{{iscsi_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "iscsi-4k-rw-qd32"
+        save_as: _iscsi_fio_4k_rw_qd32
+      - action: fio_parse
+        json_var: _iscsi_fio_4k_rw_qd32
+        metric: iops
+        save_as: iscsi_4k_rw_qd32
+
+      # 4K randread QD=1
+      - action: fio_json
+        node: client
+        device: "{{iscsi_device}}"
+        rw: randread
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "30"
+        name: "iscsi-4k-rd-qd1"
+        save_as: _iscsi_fio_4k_rd_qd1
+      - action: fio_parse
+        json_var: _iscsi_fio_4k_rd_qd1
+        metric: iops
+        save_as: iscsi_4k_rd_qd1
+
+      # 4K randread QD=32
+      - action: fio_json
+        node: client
+        device: "{{iscsi_device}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "iscsi-4k-rd-qd32"
+        save_as: _iscsi_fio_4k_rd_qd32
+      - action: fio_parse
+        json_var: _iscsi_fio_4k_rd_qd32
+        metric: iops
+        save_as: iscsi_4k_rd_qd32
+
+      # 64K seqwrite QD=32
+      - action: fio_json
+        node: client
+        device: "{{iscsi_device}}"
+        rw: write
+        bs: 64k
+        iodepth: "8"
+        numjobs: "1"
+        runtime: "30"
+        name: "iscsi-64k-sw-qd8"
+        save_as: _iscsi_fio_64k_sw_qd8
+      - action: fio_parse
+        json_var: _iscsi_fio_64k_sw_qd8
+        metric: bw_mb
+        save_as: iscsi_64k_sw_qd8
+
+      # 64K seqread QD=8
+      - action: fio_json
+        node: client
+        device: "{{iscsi_device}}"
+        rw: read
+        bs: 64k
+        iodepth: "8"
+        numjobs: "1"
+        runtime: "30"
+        name: "iscsi-64k-sr-qd8"
+        save_as: _iscsi_fio_64k_sr_qd8
+      - action: fio_parse
+        json_var: _iscsi_fio_64k_sr_qd8
+        metric: bw_mb
+        save_as: iscsi_64k_sr_qd8
+
+  - name: iscsi-disconnect
+    actions:
+      - action: iscsi_logout
+        target: primary
+        node: client
+
+  # =================================================================
+  # NVMe fio benchmarks (3 runs, median)
+  # =================================================================
+  - name: nvme-connect
+    actions:
+      - action: nvme_connect
+        target: primary
+        node: client
+        save_as: nvme_nqn
+      - action: nvme_get_device
+        target: primary
+        node: client
+        save_as: nvme_device
+
+  - name: nvme-fio
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      # 4K randwrite QD=1
+      - action: fio_json
+        node: client
+        device: "{{nvme_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "30"
+        name: "nvme-4k-rw-qd1"
+        save_as: _nvme_fio_4k_rw_qd1
+      - action: fio_parse
+        json_var: _nvme_fio_4k_rw_qd1
+        metric: iops
+        save_as: nvme_4k_rw_qd1
+
+      # 4K randwrite QD=32
+      - action: fio_json
+        node: client
+        device: "{{nvme_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "nvme-4k-rw-qd32"
+        save_as: _nvme_fio_4k_rw_qd32
+      - action: fio_parse
+        json_var: _nvme_fio_4k_rw_qd32
+        metric: iops
+        save_as: nvme_4k_rw_qd32
+
+      # 4K randread QD=1
+      - action: fio_json
+        node: client
+        device: "{{nvme_device}}"
+        rw: randread
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "30"
+        name: "nvme-4k-rd-qd1"
+        save_as: _nvme_fio_4k_rd_qd1
+      - action: fio_parse
+        json_var: _nvme_fio_4k_rd_qd1
+        metric: iops
+        save_as: nvme_4k_rd_qd1
+
+      # 4K randread QD=32
+      - action: fio_json
+        node: client
+        device: "{{nvme_device}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "nvme-4k-rd-qd32"
+        save_as: _nvme_fio_4k_rd_qd32
+      - action: fio_parse
+        json_var: _nvme_fio_4k_rd_qd32
+        metric: iops
+        save_as: nvme_4k_rd_qd32
+
+      # 64K seqwrite QD=8
+      - action: fio_json
+        node: client
+        device: "{{nvme_device}}"
+        rw: write
+        bs: 64k
+        iodepth: "8"
+        numjobs: "1"
+        runtime: "30"
+        name: "nvme-64k-sw-qd8"
+        save_as: _nvme_fio_64k_sw_qd8
+      - action: fio_parse
+        json_var: _nvme_fio_64k_sw_qd8
+        metric: bw_mb
+        save_as: nvme_64k_sw_qd8
+
+      # 64K seqread QD=8
+      - action: fio_json
+        node: client
+        device: "{{nvme_device}}"
+        rw: read
+        bs: 64k
+        iodepth: "8"
+        numjobs: "1"
+        runtime: "30"
+        name: "nvme-64k-sr-qd8"
+        save_as: _nvme_fio_64k_sr_qd8
+      - action: fio_parse
+        json_var: _nvme_fio_64k_sr_qd8
+        metric: bw_mb
+        save_as: nvme_64k_sr_qd8
+
+  - name: nvme-disconnect
+    actions:
+      - action: nvme_disconnect
+        target: primary
+        node: client
+
+  # =================================================================
+  # pgbench: iSCSI (3 runs, median)
+  # =================================================================
+  - name: iscsi-pgbench-setup
+    actions:
+      - action: iscsi_login
+        target: primary
+        node: client
+        save_as: iscsi_device
+      - action: pgbench_init
+        node: client
+        device: "{{iscsi_device}}"
+        port: "5434"
+        scale: "10"
+        mount: "/mnt/pgbench-iscsi"
+
+  - name: iscsi-pgbench-tpcb
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: pgbench_run
+        node: client
+        clients: "1"
+        duration: "30"
+        port: "5434"
+        save_as: iscsi_pg_c1
+      - action: pgbench_run
+        node: client
+        clients: "4"
+        duration: "30"
+        port: "5434"
+        save_as: iscsi_pg_c4
+      - action: pgbench_run
+        node: client
+        clients: "16"
+        duration: "30"
+        port: "5434"
+        save_as: iscsi_pg_c16
+
+  - name: iscsi-pgbench-teardown
+    actions:
+      - action: pgbench_cleanup
+        node: client
+        ignore_error: true
+      - action: iscsi_logout
+        target: primary
+        node: client
+
+  # =================================================================
+  # pgbench: NVMe (3 runs, median)
+  # =================================================================
+  - name: nvme-pgbench-setup
+    actions:
+      - action: nvme_connect
+        target: primary
+        node: client
+        save_as: nvme_nqn
+      - action: nvme_get_device
+        target: primary
+        node: client
+        save_as: nvme_device
+      - action: pgbench_init
+        node: client
+        device: "{{nvme_device}}"
+        port: "5435"
+        scale: "10"
+        mount: "/mnt/pgbench-nvme"
+
+  - name: nvme-pgbench-tpcb
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: pgbench_run
+        node: client
+        clients: "1"
+        duration: "30"
+        port: "5435"
+        save_as: nvme_pg_c1
+      - action: pgbench_run
+        node: client
+        clients: "4"
+        duration: "30"
+        port: "5435"
+        save_as: nvme_pg_c4
+      - action: pgbench_run
+        node: client
+        clients: "16"
+        duration: "30"
+        port: "5435"
+        save_as: nvme_pg_c16
+
+  - name: nvme-pgbench-teardown
+    actions:
+      - action: pgbench_cleanup
+        node: client
+        ignore_error: true
+      - action: nvme_disconnect
+        target: primary
+        node: client
+
+  # =================================================================
+  # Compare results (all use median values from aggregation)
+  # =================================================================
+  - name: compare-fio
+    actions:
+      - action: bench_compare
+        save_as: cmp_4k_rw_qd1
+        a_var: iscsi_4k_rw_qd1
+        b_var: nvme_4k_rw_qd1
+        metric: iops
+        gate: "0.8"
+        warn_gate: "0.7"
+
+      - action: bench_compare
+        save_as: cmp_4k_rw_qd32
+        a_var: iscsi_4k_rw_qd32
+        b_var: nvme_4k_rw_qd32
+        metric: iops
+        gate: "0.8"
+        warn_gate: "0.7"
+
+      - action: bench_compare
+        save_as: cmp_4k_rd_qd1
+        a_var: iscsi_4k_rd_qd1
+        b_var: nvme_4k_rd_qd1
+        metric: iops
+        gate: "0.8"
+        warn_gate: "0.7"
+
+      - action: bench_compare
+        save_as: cmp_4k_rd_qd32
+        a_var: iscsi_4k_rd_qd32
+        b_var: nvme_4k_rd_qd32
+        metric: iops
+        gate: "0.8"
+        warn_gate: "0.7"
+
+      - action: bench_compare
+        save_as: cmp_64k_sw
+        a_var: iscsi_64k_sw_qd8
+        b_var: nvme_64k_sw_qd8
+        metric: bw_mb
+        gate: "0.8"
+        warn_gate: "0.7"
+
+      - action: bench_compare
+        save_as: cmp_64k_sr
+        a_var: iscsi_64k_sr_qd8
+        b_var: nvme_64k_sr_qd8
+        metric: bw_mb
+        gate: "0.8"
+        warn_gate: "0.7"
+
+  # =================================================================
+  # Cleanup
+  # =================================================================
+  - name: cleanup
+    always: true
+    actions:
+      - action: pgbench_cleanup
+        node: client
+        ignore_error: true
+      - action: nvme_cleanup
+        node: client
+        ignore_error: true
+      - action: iscsi_cleanup
+        node: client
+        ignore_error: true
+      - action: stop_all_targets
+        node: server
+        ignore_error: true
diff --git a/weed/storage/blockvol/testrunner/scenarios/cp103-nvme-cw-sweep.yaml b/weed/storage/blockvol/testrunner/scenarios/cp103-nvme-cw-sweep.yaml
new file mode 100644
index 000000000..6a436ee54
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/cp103-nvme-cw-sweep.yaml
@@ -0,0 +1,435 @@
+name: "CP10-3 NVMe MaxConcurrentWrites Sweep (16/32/64/128)"
+timeout: "60m"
+
+topology:
+  nodes:
+    server:
+      host: "10.0.0.3"
+      user: "testdev"
+      key: "/home/testdev/.ssh/id_ed25519"
+    client:
+      host: "10.0.0.1"
+      is_local: true
+
+# We define 4 targets, each with a different max_concurrent_writes value.
+# They share the same server node but use different ports.
+targets:
+  cw16:
+    node: server
+    vol_size: "1073741824"
+    wal_size: "536870912"
+    iscsi_port: 3263
+    nvme_port: 4420
+    admin_port: 8083
+    iqn_suffix: "cw16"
+    nqn_suffix: "cw16"
+    max_concurrent_writes: 16
+  cw32:
+    node: server
+    vol_size: "1073741824"
+    wal_size: "536870912"
+    iscsi_port: 3264
+    nvme_port: 4421
+    admin_port: 8084
+    iqn_suffix: "cw32"
+    nqn_suffix: "cw32"
+    max_concurrent_writes: 32
+  cw64:
+    node: server
+    vol_size: "1073741824"
+    wal_size: "536870912"
+    iscsi_port: 3265
+    nvme_port: 4422
+    admin_port: 8085
+    iqn_suffix: "cw64"
+    nqn_suffix: "cw64"
+    max_concurrent_writes: 64
+  cw128:
+    node: server
+    vol_size: "1073741824"
+    wal_size: "536870912"
+    iscsi_port: 3266
+    nvme_port: 4423
+    admin_port: 8086
+    iqn_suffix: "cw128"
+    nqn_suffix: "cw128"
+    max_concurrent_writes: 128
+
+phases:
+  # --- Cleanup stale processes ---
+  - name: cleanup-stale
+    actions:
+      - action: kill_stale
+        node: client
+        ignore_error: true
+      - action: kill_stale
+        node: server
+        ignore_error: true
+      - action: nvme_cleanup
+        node: client
+        ignore_error: true
+
+  # =============================================
+  # CW=16 (default baseline)
+  # =============================================
+  - name: cw16-start
+    actions:
+      - action: start_target
+        target: cw16
+        create: "true"
+
+  - name: cw16-nvme-connect
+    actions:
+      - action: nvme_connect
+        target: cw16
+        node: client
+        save_as: nvme_nqn_16
+      - action: nvme_get_device
+        target: cw16
+        node: client
+        save_as: nvme_dev_16
+
+  - name: cw16-4k-rw-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_16}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw16-4k-rw-qd32"
+        save_as: _fio_cw16_rw32
+      - action: fio_parse
+        json_var: _fio_cw16_rw32
+        metric: iops
+        save_as: cw16_rw_iops
+
+  - name: cw16-4k-rd-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_16}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw16-4k-rd-qd32"
+        save_as: _fio_cw16_rd32
+      - action: fio_parse
+        json_var: _fio_cw16_rd32
+        metric: iops
+        save_as: cw16_rd_iops
+
+  - name: cw16-64k-sw-qd8
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_16}}"
+        rw: write
+        bs: 64k
+        iodepth: "8"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw16-64k-sw-qd8"
+        save_as: _fio_cw16_sw64k
+      - action: fio_parse
+        json_var: _fio_cw16_sw64k
+        metric: bw_mb
+        save_as: cw16_sw_bw
+
+  - name: cw16-disconnect
+    actions:
+      - action: nvme_disconnect
+        target: cw16
+        node: client
+      - action: stop_target
+        target: cw16
+
+  # =============================================
+  # CW=32
+  # =============================================
+  - name: cw32-start
+    actions:
+      - action: start_target
+        target: cw32
+        create: "true"
+
+  - name: cw32-nvme-connect
+    actions:
+      - action: nvme_connect
+        target: cw32
+        node: client
+        save_as: nvme_nqn_32
+      - action: nvme_get_device
+        target: cw32
+        node: client
+        save_as: nvme_dev_32
+
+  - name: cw32-4k-rw-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_32}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw32-4k-rw-qd32"
+        save_as: _fio_cw32_rw32
+      - action: fio_parse
+        json_var: _fio_cw32_rw32
+        metric: iops
+        save_as: cw32_rw_iops
+
+  - name: cw32-4k-rd-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_32}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw32-4k-rd-qd32"
+        save_as: _fio_cw32_rd32
+      - action: fio_parse
+        json_var: _fio_cw32_rd32
+        metric: iops
+        save_as: cw32_rd_iops
+
+  - name: cw32-64k-sw-qd8
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_32}}"
+        rw: write
+        bs: 64k
+        iodepth: "8"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw32-64k-sw-qd8"
+        save_as: _fio_cw32_sw64k
+      - action: fio_parse
+        json_var: _fio_cw32_sw64k
+        metric: bw_mb
+        save_as: cw32_sw_bw
+
+  - name: cw32-disconnect
+    actions:
+      - action: nvme_disconnect
+        target: cw32
+        node: client
+      - action: stop_target
+        target: cw32
+
+  # =============================================
+  # CW=64
+  # =============================================
+  - name: cw64-start
+    actions:
+      - action: start_target
+        target: cw64
+        create: "true"
+
+  - name: cw64-nvme-connect
+    actions:
+      - action: nvme_connect
+        target: cw64
+        node: client
+        save_as: nvme_nqn_64
+      - action: nvme_get_device
+        target: cw64
+        node: client
+        save_as: nvme_dev_64
+
+  - name: cw64-4k-rw-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_64}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw64-4k-rw-qd32"
+        save_as: _fio_cw64_rw32
+      - action: fio_parse
+        json_var: _fio_cw64_rw32
+        metric: iops
+        save_as: cw64_rw_iops
+
+  - name: cw64-4k-rd-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_64}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw64-4k-rd-qd32"
+        save_as: _fio_cw64_rd32
+      - action: fio_parse
+        json_var: _fio_cw64_rd32
+        metric: iops
+        save_as: cw64_rd_iops
+
+  - name: cw64-64k-sw-qd8
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_64}}"
+        rw: write
+        bs: 64k
+        iodepth: "8"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw64-64k-sw-qd8"
+        save_as: _fio_cw64_sw64k
+      - action: fio_parse
+        json_var: _fio_cw64_sw64k
+        metric: bw_mb
+        save_as: cw64_sw_bw
+
+  - name: cw64-disconnect
+    actions:
+      - action: nvme_disconnect
+        target: cw64
+        node: client
+      - action: stop_target
+        target: cw64
+
+  # =============================================
+  # CW=128
+  # =============================================
+  - name: cw128-start
+    actions:
+      - action: start_target
+        target: cw128
+        create: "true"
+
+  - name: cw128-nvme-connect
+    actions:
+      - action: nvme_connect
+        target: cw128
+        node: client
+        save_as: nvme_nqn_128
+      - action: nvme_get_device
+        target: cw128
+        node: client
+        save_as: nvme_dev_128
+
+  - name: cw128-4k-rw-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_128}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw128-4k-rw-qd32"
+        save_as: _fio_cw128_rw32
+      - action: fio_parse
+        json_var: _fio_cw128_rw32
+        metric: iops
+        save_as: cw128_rw_iops
+
+  - name: cw128-4k-rd-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_128}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw128-4k-rd-qd32"
+        save_as: _fio_cw128_rd32
+      - action: fio_parse
+        json_var: _fio_cw128_rd32
+        metric: iops
+        save_as: cw128_rd_iops
+
+  - name: cw128-64k-sw-qd8
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_128}}"
+        rw: write
+        bs: 64k
+        iodepth: "8"
+        numjobs: "1"
+        runtime: "30"
+        name: "cw128-64k-sw-qd8"
+        save_as: _fio_cw128_sw64k
+      - action: fio_parse
+        json_var: _fio_cw128_sw64k
+        metric: bw_mb
+        save_as: cw128_sw_bw
+
+  - name: cw128-disconnect
+    actions:
+      - action: nvme_disconnect
+        target: cw128
+        node: client
+      - action: stop_target
+        target: cw128
+
+  # =============================================
+  # Cleanup (always runs)
+  # =============================================
+  - name: cleanup
+    always: true
+    actions:
+      - action: nvme_cleanup
+        node: client
+        ignore_error: true
+      - action: stop_all_targets
+        node: server
+        ignore_error: true
diff --git a/weed/storage/blockvol/testrunner/scenarios/cp103-nvme-ioq-sweep.yaml b/weed/storage/blockvol/testrunner/scenarios/cp103-nvme-ioq-sweep.yaml
new file mode 100644
index 000000000..371fdade3
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/cp103-nvme-ioq-sweep.yaml
@@ -0,0 +1,236 @@
+name: "CP10-3 NVMe IO Queues Sweep (1 vs 4) — Contention Theory"
+timeout: "30m"
+
+topology:
+  nodes:
+    server:
+      host: "10.0.0.3"
+      user: "testdev"
+      key: "/home/testdev/.ssh/id_ed25519"
+    client:
+      host: "10.0.0.1"
+      is_local: true
+
+targets:
+  ioq1:
+    node: server
+    vol_size: "1073741824"
+    wal_size: "536870912"
+    iscsi_port: 3270
+    nvme_port: 4430
+    admin_port: 8090
+    iqn_suffix: "ioq1"
+    nqn_suffix: "ioq1"
+    nvme_io_queues: 1
+  ioq4:
+    node: server
+    vol_size: "1073741824"
+    wal_size: "536870912"
+    iscsi_port: 3271
+    nvme_port: 4431
+    admin_port: 8091
+    iqn_suffix: "ioq4"
+    nqn_suffix: "ioq4"
+    nvme_io_queues: 4
+
+phases:
+  - name: cleanup-stale
+    actions:
+      - action: kill_stale
+        node: client
+        ignore_error: true
+      - action: kill_stale
+        node: server
+        ignore_error: true
+      - action: nvme_cleanup
+        node: client
+        ignore_error: true
+
+  # =============================================
+  # IOQ=1 (single connection, like iSCSI)
+  # =============================================
+  - name: ioq1-start
+    actions:
+      - action: start_target
+        target: ioq1
+        create: "true"
+
+  - name: ioq1-nvme-connect
+    actions:
+      - action: nvme_connect
+        target: ioq1
+        node: client
+        save_as: nvme_nqn_1
+      - action: nvme_get_device
+        target: ioq1
+        node: client
+        save_as: nvme_dev_1
+
+  - name: ioq1-4k-rw-qd1
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_1}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "30"
+        name: "ioq1-4k-rw-qd1"
+        save_as: _fio_ioq1_rw1
+      - action: fio_parse
+        json_var: _fio_ioq1_rw1
+        metric: iops
+        save_as: ioq1_rw_qd1
+
+  - name: ioq1-4k-rw-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_1}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "ioq1-4k-rw-qd32"
+        save_as: _fio_ioq1_rw32
+      - action: fio_parse
+        json_var: _fio_ioq1_rw32
+        metric: iops
+        save_as: ioq1_rw_qd32
+
+  - name: ioq1-4k-rd-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_1}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "ioq1-4k-rd-qd32"
+        save_as: _fio_ioq1_rd32
+      - action: fio_parse
+        json_var: _fio_ioq1_rd32
+        metric: iops
+        save_as: ioq1_rd_qd32
+
+  - name: ioq1-disconnect
+    actions:
+      - action: nvme_disconnect
+        target: ioq1
+        node: client
+      - action: stop_target
+        target: ioq1
+
+  # =============================================
+  # IOQ=4 (default, 4 connections)
+  # =============================================
+  - name: ioq4-start
+    actions:
+      - action: start_target
+        target: ioq4
+        create: "true"
+
+  - name: ioq4-nvme-connect
+    actions:
+      - action: nvme_connect
+        target: ioq4
+        node: client
+        save_as: nvme_nqn_4
+      - action: nvme_get_device
+        target: ioq4
+        node: client
+        save_as: nvme_dev_4
+
+  - name: ioq4-4k-rw-qd1
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_4}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "30"
+        name: "ioq4-4k-rw-qd1"
+        save_as: _fio_ioq4_rw1
+      - action: fio_parse
+        json_var: _fio_ioq4_rw1
+        metric: iops
+        save_as: ioq4_rw_qd1
+
+  - name: ioq4-4k-rw-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_4}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "ioq4-4k-rw-qd32"
+        save_as: _fio_ioq4_rw32
+      - action: fio_parse
+        json_var: _fio_ioq4_rw32
+        metric: iops
+        save_as: ioq4_rw_qd32
+
+  - name: ioq4-4k-rd-qd32
+    repeat: 3
+    aggregate: median
+    trim_pct: 0
+    actions:
+      - action: fio_json
+        node: client
+        device: "{{nvme_dev_4}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "30"
+        name: "ioq4-4k-rd-qd32"
+        save_as: _fio_ioq4_rd32
+      - action: fio_parse
+        json_var: _fio_ioq4_rd32
+        metric: iops
+        save_as: ioq4_rd_qd32
+
+  - name: ioq4-disconnect
+    actions:
+      - action: nvme_disconnect
+        target: ioq4
+        node: client
+      - action: stop_target
+        target: ioq4
+
+  # =============================================
+  # Cleanup
+  # =============================================
+  - name: cleanup
+    always: true
+    actions:
+      - action: nvme_cleanup
+        node: client
+        ignore_error: true
+      - action: stop_all_targets
+        node: server
+        ignore_error: true
diff --git a/weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml b/weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml
new file mode 100644
index 000000000..232487216
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/cp103-perf-baseline.yaml
@@ -0,0 +1,431 @@
+name: "CP10-3 Performance Baseline: iSCSI vs NVMe A/B"
+timeout: "30m"
+
+env:
+  vol_name: "bench-vol"
+  vol_size: "1073741824"  # 1GB
+
+topology:
+  nodes:
+    server:
+      host: "192.168.1.184"
+      user: "testdev"
+      key: "/home/testdev/.ssh/id_ed25519"
+    client:
+      host: "192.168.1.181"
+      is_local: true
+
+targets:
+  primary:
+    node: server
+    vol_size: "1073741824"
+    wal_size: "536870912"
+    iscsi_port: 3263
+    nvme_port: 4420
+    admin_port: 8083
+    iqn_suffix: "bench-vol"
+    nqn_suffix: "bench-vol"
+
+phases:
+  # --- Setup ---
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: client
+      - action: kill_stale
+        node: server
+      - action: kill_stale
+        node: server
+        process: block-csi
+      - action: start_target
+        target: primary
+        create: "true"
+
+  # --- iSCSI benchmark ---
+  - name: iscsi-connect
+    actions:
+      - action: iscsi_login
+        target: primary
+        node: client
+        save_as: iscsi_device
+
+  - name: iscsi-bench
+    actions:
+      # B-01: 4K randwrite QD=1 (protocol latency)
+      - action: fio_json
+        node: client
+        save_as: iscsi_4k_rw_qd1
+        device: "{{iscsi_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "60"
+        name: "4k-randwrite-qd1"
+
+      # B-02: 4K randwrite j=1 QD=32 (single-queue saturation)
+      - action: fio_json
+        node: client
+        save_as: iscsi_4k_rw_qd32
+        device: "{{iscsi_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "60"
+        name: "4k-randwrite-qd32"
+
+      # B-03: 4K randwrite j=4 QD=32 (multi-queue scaling)
+      - action: fio_json
+        node: client
+        save_as: iscsi_4k_rw_j4_qd32
+        device: "{{iscsi_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "4"
+        runtime: "60"
+        name: "4k-randwrite-j4-qd32"
+
+      # B-04: 4K randread QD=1 (read latency)
+      - action: fio_json
+        node: client
+        save_as: iscsi_4k_rd_qd1
+        device: "{{iscsi_device}}"
+        rw: randread
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "60"
+        name: "4k-randread-qd1"
+
+      # B-05: 4K randread j=4 QD=32 (multi-queue read scaling)
+      - action: fio_json
+        node: client
+        save_as: iscsi_4k_rd_j4_qd32
+        device: "{{iscsi_device}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "4"
+        runtime: "60"
+        name: "4k-randread-j4-qd32"
+
+      # B-06: 64K seqwrite QD=4 (bandwidth single-queue)
+      - action: fio_json
+        node: client
+        save_as: iscsi_64k_sw_qd4
+        device: "{{iscsi_device}}"
+        rw: write
+        bs: 64k
+        iodepth: "4"
+        numjobs: "1"
+        runtime: "60"
+        name: "64k-seqwrite-qd4"
+
+      # B-07: 64K seqwrite j=4 QD=4 (bandwidth scaling)
+      - action: fio_json
+        node: client
+        save_as: iscsi_64k_sw_j4_qd4
+        device: "{{iscsi_device}}"
+        rw: write
+        bs: 64k
+        iodepth: "4"
+        numjobs: "4"
+        runtime: "60"
+        name: "64k-seqwrite-j4-qd4"
+
+      # B-08: 64K seqread QD=4 (read bandwidth single-queue)
+      - action: fio_json
+        node: client
+        save_as: iscsi_64k_sr_qd4
+        device: "{{iscsi_device}}"
+        rw: read
+        bs: 64k
+        iodepth: "4"
+        numjobs: "1"
+        runtime: "60"
+        name: "64k-seqread-qd4"
+
+      # B-09: 64K seqread j=4 QD=4 (read bandwidth scaling)
+      - action: fio_json
+        node: client
+        save_as: iscsi_64k_sr_j4_qd4
+        device: "{{iscsi_device}}"
+        rw: read
+        bs: 64k
+        iodepth: "4"
+        numjobs: "4"
+        runtime: "60"
+        name: "64k-seqread-j4-qd4"
+
+      # B-10: Mixed 70/30 j=4 QD=32 (DB-like pattern)
+      - action: fio_json
+        node: client
+        save_as: iscsi_mixed
+        device: "{{iscsi_device}}"
+        rw: randrw
+        rwmixread: "70"
+        bs: 4k
+        iodepth: "32"
+        numjobs: "4"
+        runtime: "60"
+        name: "mixed-70-30-j4-qd32"
+
+  - name: iscsi-disconnect
+    actions:
+      - action: iscsi_logout
+        target: primary
+        node: client
+
+  # --- NVMe benchmark ---
+  - name: nvme-connect
+    actions:
+      - action: nvme_connect
+        target: primary
+        node: client
+        save_as: nvme_nqn
+      - action: nvme_get_device
+        target: primary
+        node: client
+        save_as: nvme_device
+
+  - name: nvme-bench
+    actions:
+      # B-01: 4K randwrite QD=1
+      - action: fio_json
+        node: client
+        save_as: nvme_4k_rw_qd1
+        device: "{{nvme_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "60"
+        name: "4k-randwrite-qd1"
+
+      # B-02: 4K randwrite j=1 QD=32
+      - action: fio_json
+        node: client
+        save_as: nvme_4k_rw_qd32
+        device: "{{nvme_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "1"
+        runtime: "60"
+        name: "4k-randwrite-qd32"
+
+      # B-03: 4K randwrite j=4 QD=32
+      - action: fio_json
+        node: client
+        save_as: nvme_4k_rw_j4_qd32
+        device: "{{nvme_device}}"
+        rw: randwrite
+        bs: 4k
+        iodepth: "32"
+        numjobs: "4"
+        runtime: "60"
+        name: "4k-randwrite-j4-qd32"
+
+      # B-04: 4K randread QD=1
+      - action: fio_json
+        node: client
+        save_as: nvme_4k_rd_qd1
+        device: "{{nvme_device}}"
+        rw: randread
+        bs: 4k
+        iodepth: "1"
+        numjobs: "1"
+        runtime: "60"
+        name: "4k-randread-qd1"
+
+      # B-05: 4K randread j=4 QD=32
+      - action: fio_json
+        node: client
+        save_as: nvme_4k_rd_j4_qd32
+        device: "{{nvme_device}}"
+        rw: randread
+        bs: 4k
+        iodepth: "32"
+        numjobs: "4"
+        runtime: "60"
+        name: "4k-randread-j4-qd32"
+
+      # B-06: 64K seqwrite QD=4
+      - action: fio_json
+        node: client
+        save_as: nvme_64k_sw_qd4
+        device: "{{nvme_device}}"
+        rw: write
+        bs: 64k
+        iodepth: "4"
+        numjobs: "1"
+        runtime: "60"
+        name: "64k-seqwrite-qd4"
+
+      # B-07: 64K seqwrite j=4 QD=4
+      - action: fio_json
+        node: client
+        save_as: nvme_64k_sw_j4_qd4
+        device: "{{nvme_device}}"
+        rw: write
+        bs: 64k
+        iodepth: "4"
+        numjobs: "4"
+        runtime: "60"
+        name: "64k-seqwrite-j4-qd4"
+
+      # B-08: 64K seqread QD=4
+      - action: fio_json
+        node: client
+        save_as: nvme_64k_sr_qd4
+        device: "{{nvme_device}}"
+        rw: read
+        bs: 64k
+        iodepth: "4"
+        numjobs: "1"
+        runtime: "60"
+        name: "64k-seqread-qd4"
+
+      # B-09: 64K seqread j=4 QD=4
+      - action: fio_json
+        node: client
+        save_as: nvme_64k_sr_j4_qd4
+        device: "{{nvme_device}}"
+        rw: read
+        bs: 64k
+        iodepth: "4"
+        numjobs: "4"
+        runtime: "60"
+        name: "64k-seqread-j4-qd4"
+
+      # B-10: Mixed 70/30 j=4 QD=32
+      - action: fio_json
+        node: client
+        save_as: nvme_mixed
+        device: "{{nvme_device}}"
+        rw: randrw
+        rwmixread: "70"
+        bs: 4k
+        iodepth: "32"
+        numjobs: "4"
+        runtime: "60"
+        name: "mixed-70-30-j4-qd32"
+
+  - name: nvme-disconnect
+    actions:
+      - action: nvme_disconnect
+        target: primary
+        node: client
+
+  # --- Comparison ---
+  - name: compare
+    actions:
+      # 4K IOPS gates: NVMe >= 90% of iSCSI (warn at 80%)
+      - action: bench_compare
+        save_as: cmp_4k_rw_qd1
+        a_var: iscsi_4k_rw_qd1
+        b_var: nvme_4k_rw_qd1
+        metric: iops
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      - action: bench_compare
+        save_as: cmp_4k_rw_qd32
+        a_var: iscsi_4k_rw_qd32
+        b_var: nvme_4k_rw_qd32
+        metric: iops
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      - action: bench_compare
+        save_as: cmp_4k_rw_j4_qd32
+        a_var: iscsi_4k_rw_j4_qd32
+        b_var: nvme_4k_rw_j4_qd32
+        metric: iops
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      - action: bench_compare
+        save_as: cmp_4k_rd_qd1
+        a_var: iscsi_4k_rd_qd1
+        b_var: nvme_4k_rd_qd1
+        metric: iops
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      - action: bench_compare
+        save_as: cmp_4k_rd_j4_qd32
+        a_var: iscsi_4k_rd_j4_qd32
+        b_var: nvme_4k_rd_j4_qd32
+        metric: iops
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      # 64K bandwidth gates
+      - action: bench_compare
+        save_as: cmp_64k_sw_qd4
+        a_var: iscsi_64k_sw_qd4
+        b_var: nvme_64k_sw_qd4
+        metric: bw_mb
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      - action: bench_compare
+        save_as: cmp_64k_sw_j4_qd4
+        a_var: iscsi_64k_sw_j4_qd4
+        b_var: nvme_64k_sw_j4_qd4
+        metric: bw_mb
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      - action: bench_compare
+        save_as: cmp_64k_sr_qd4
+        a_var: iscsi_64k_sr_qd4
+        b_var: nvme_64k_sr_qd4
+        metric: bw_mb
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      - action: bench_compare
+        save_as: cmp_64k_sr_j4_qd4
+        a_var: iscsi_64k_sr_j4_qd4
+        b_var: nvme_64k_sr_j4_qd4
+        metric: bw_mb
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      # Mixed IOPS gate (read-side only: in a 70/30 mixed workload, read IOPS
+      # is the bottleneck indicator since writes benefit from group commit)
+      - action: bench_compare
+        save_as: cmp_mixed
+        a_var: iscsi_mixed
+        b_var: nvme_mixed
+        metric: iops
+        direction: read
+        gate: "0.9"
+        warn_gate: "0.8"
+
+      # Latency comparison (4K write P99)
+      - action: bench_compare
+        save_as: cmp_lat_qd1
+        a_var: iscsi_4k_rw_qd1
+        b_var: nvme_4k_rw_qd1
+        metric: lat_p99_us
+        gate: "0.9"
+        warn_gate: "0.8"
+
+  # --- Cleanup ---
+  - name: cleanup
+    always: true
+    actions:
+      - action: nvme_cleanup
+        node: client
+        ignore_error: true
+      - action: iscsi_cleanup
+        node: client
+        ignore_error: true
+      - action: stop_all_targets
+        node: server
+        ignore_error: true
diff --git a/weed/storage/blockvol/testrunner/scenarios/cp83-snapshot-expand.yaml b/weed/storage/blockvol/testrunner/scenarios/cp83-snapshot-expand.yaml
index 4b9a42e2d..7b2e3897d 100644
--- a/weed/storage/blockvol/testrunner/scenarios/cp83-snapshot-expand.yaml
+++ b/weed/storage/blockvol/testrunner/scenarios/cp83-snapshot-expand.yaml
@@ -18,8 +18,8 @@ targets:
   primary:
     node: target_node
     vol_size: 50M
-    iscsi_port: 3262
-    admin_port: 8082
+    iscsi_port: 3266
+    admin_port: 8086
     iqn_suffix: cp83-snap
 
 phases:
diff --git a/weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml b/weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml
index 54d410e9f..68b557bc3 100644
--- a/weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml
+++ b/weed/storage/blockvol/testrunner/scenarios/cp85-perf-baseline.yaml
@@ -18,6 +18,7 @@ targets:
   primary:
     node: target_node
     vol_size: 200M
+    wal_size: 128M
     iscsi_port: 3270
     admin_port: 8090
     iqn_suffix: cp85-perf-primary
@@ -52,7 +53,7 @@ phases:
         device: "{{ device }}"
         rw: randwrite
         bs: 4k
-        iodepth: "32"
+        iodepth: "8"
         runtime: "60"
         size: 180M
         name: perf_4k_randwrite
@@ -65,7 +66,7 @@ phases:
         device: "{{ device }}"
         rw: randread
         bs: 4k
-        iodepth: "32"
+        iodepth: "8"
         runtime: "60"
         size: 180M
         name: perf_4k_randread
@@ -79,7 +80,7 @@ phases:
         rw: write
         bs: 64k
         size: 180M
-        iodepth: "32"
+        iodepth: "8"
         runtime: "60"
         name: perf_64k_seqwrite
         save_as: fio_64k_sw
diff --git a/weed/storage/blockvol/testrunner/scenarios/ha-rf3-failover.yaml b/weed/storage/blockvol/testrunner/scenarios/ha-rf3-failover.yaml
new file mode 100644
index 000000000..262fc78f7
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/ha-rf3-failover.yaml
@@ -0,0 +1,157 @@
+# HA RF3 Failover (Multi-Replica)
+#
+# Tests failover with 3 replicas (RF3). When primary dies, the replica
+# with the highest WAL LSN should be promoted. The remaining replica
+# continues as replica under the new primary.
+#
+# Topology: primary + replica_a + replica_b (all on M02, different ports)
+#
+# Pass criteria:
+# - Data replicated to both replicas
+# - After primary kill, promoted replica has correct data
+# - Remaining replica can rebuild from new primary
+
+name: ha-rf3-failover
+timeout: 5m
+env:
+  repo_dir: "C:/work/seaweedfs"
+
+topology:
+  nodes:
+    target_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+    client_node:
+      host: "192.168.1.181"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+
+targets:
+  primary:
+    node: target_node
+    vol_size: 50M
+    iscsi_port: 3270
+    admin_port: 8090
+    replica_data_port: 9021
+    replica_ctrl_port: 9022
+    rebuild_port: 9031
+    iqn_suffix: rf3-primary
+  replica_a:
+    node: target_node
+    vol_size: 50M
+    iscsi_port: 3271
+    admin_port: 8091
+    replica_data_port: 9023
+    replica_ctrl_port: 9024
+    rebuild_port: 9032
+    iqn_suffix: rf3-replica-a
+  replica_b:
+    node: target_node
+    vol_size: 50M
+    iscsi_port: 3272
+    admin_port: 8092
+    replica_data_port: 9025
+    replica_ctrl_port: 9026
+    rebuild_port: 9033
+    iqn_suffix: rf3-replica-b
+
+phases:
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: target_node
+      - action: kill_stale
+        node: client_node
+        iscsi_cleanup: "true"
+      - action: build_deploy
+      - action: start_target
+        target: primary
+        create: "true"
+      - action: start_target
+        target: replica_a
+        create: "true"
+      - action: start_target
+        target: replica_b
+        create: "true"
+      # Assign roles
+      - action: assign
+        target: primary
+        epoch: "1"
+        role: primary
+        lease_ttl: 120s
+      - action: assign
+        target: replica_a
+        epoch: "1"
+        role: replica
+      - action: assign
+        target: replica_b
+        epoch: "1"
+        role: replica
+      # Set up replication: primary → replica_a, primary → replica_b
+      - action: set_replica
+        target: primary
+        replica: replica_a
+      # Note: second set_replica would need multi-replica support
+      # For now, test with one replica and verify architecture
+
+  - name: write_data
+    actions:
+      - action: iscsi_login
+        target: primary
+        node: client_node
+        save_as: device
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "5"
+        save_as: md5_original
+      - action: wait_lsn
+        target: replica_a
+        min_lsn: "1"
+        timeout: 10s
+
+  - name: kill_primary
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+      - action: kill_target
+        target: primary
+
+  - name: promote_replica_a
+    actions:
+      - action: assign
+        target: replica_a
+        epoch: "2"
+        role: primary
+        lease_ttl: 120s
+      - action: wait_role
+        target: replica_a
+        role: primary
+        timeout: 10s
+
+  - name: verify_data
+    actions:
+      - action: iscsi_login
+        target: replica_a
+        node: client_node
+        save_as: device2
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device2 }}"
+        bs: 1M
+        count: "5"
+        save_as: md5_verify
+      - action: assert_equal
+        actual: "{{ md5_verify }}"
+        expected: "{{ md5_original }}"
+
+  - name: cleanup
+    always: true
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: stop_all_targets
+        ignore_error: true
diff --git a/weed/storage/blockvol/testrunner/scenarios/lease-expiry-write-gate.yaml b/weed/storage/blockvol/testrunner/scenarios/lease-expiry-write-gate.yaml
new file mode 100644
index 000000000..848650517
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/lease-expiry-write-gate.yaml
@@ -0,0 +1,128 @@
+# Lease Expiry Write Gate
+#
+# Tests that the write gate correctly blocks writes after lease expiry.
+# After lease expires, writes via iSCSI should return I/O errors.
+# Re-granting a lease should allow writes again.
+#
+# Pass criteria:
+# - Writes succeed with valid lease
+# - Writes fail after lease expires (dd returns error or I/O error)
+# - After re-granting lease, writes succeed again
+# - Data written before expiry is still readable
+
+name: lease-expiry-write-gate
+timeout: 3m
+env:
+  repo_dir: "C:/work/seaweedfs"
+
+topology:
+  nodes:
+    target_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+    client_node:
+      host: "192.168.1.181"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+
+targets:
+  primary:
+    node: target_node
+    vol_size: 50M
+    iscsi_port: 3270
+    admin_port: 8090
+    iqn_suffix: lease-gate
+
+phases:
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: target_node
+      - action: kill_stale
+        node: client_node
+        iscsi_cleanup: "true"
+      - action: build_deploy
+      - action: start_target
+        target: primary
+        create: "true"
+      - action: assign
+        target: primary
+        epoch: "1"
+        role: primary
+        lease_ttl: 8s
+      - action: iscsi_login
+        target: primary
+        node: client_node
+        save_as: device
+
+  - name: write_with_lease
+    actions:
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "2"
+        save_as: md5_valid
+
+  - name: wait_for_expiry
+    actions:
+      - action: sleep
+        duration: 10s
+      - action: assert_status
+        target: primary
+        field: has_lease
+        expected: "false"
+
+  - name: verify_read_still_works
+    actions:
+      # Reads should still work even without lease
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "2"
+        save_as: verify_read
+      - action: assert_equal
+        actual: "{{ verify_read }}"
+        expected: "{{ md5_valid }}"
+
+  - name: regrant_and_write
+    actions:
+      # Re-grant lease with higher epoch
+      - action: assign
+        target: primary
+        epoch: "2"
+        role: primary
+        lease_ttl: 60s
+      - action: assert_status
+        target: primary
+        field: has_lease
+        expected: "true"
+      # Writes should work again
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "2"
+        seek: "10"
+        save_as: md5_regrant
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "2"
+        skip: "10"
+        save_as: verify_regrant
+      - action: assert_equal
+        actual: "{{ verify_regrant }}"
+        expected: "{{ md5_regrant }}"
+
+  - name: cleanup
+    always: true
+    actions:
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: stop_all_targets
+        ignore_error: true
diff --git a/weed/storage/blockvol/testrunner/scenarios/lease-renewal-under-io.yaml b/weed/storage/blockvol/testrunner/scenarios/lease-renewal-under-io.yaml
new file mode 100644
index 000000000..7ddacb928
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/lease-renewal-under-io.yaml
@@ -0,0 +1,138 @@
+# Lease Renewal Under I/O
+#
+# Tests that lease renewal (re-assignment with same epoch+role) works
+# correctly while I/O is in flight. The lease should be extended
+# without disrupting ongoing writes.
+#
+# Pass criteria:
+# - Writes succeed before, during, and after lease renewal
+# - Data is consistent across all phases
+# - Status shows has_lease=true throughout
+
+name: lease-renewal-under-io
+timeout: 5m
+env:
+  repo_dir: "C:/work/seaweedfs"
+
+topology:
+  nodes:
+    target_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+    client_node:
+      host: "192.168.1.181"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+
+targets:
+  primary:
+    node: target_node
+    vol_size: 50M
+    iscsi_port: 3270
+    admin_port: 8090
+    iqn_suffix: lease-renew
+
+phases:
+  - name: setup
+    actions:
+      - action: kill_stale
+        node: target_node
+      - action: kill_stale
+        node: client_node
+        iscsi_cleanup: "true"
+      - action: build_deploy
+      - action: start_target
+        target: primary
+        create: "true"
+      - action: assign
+        target: primary
+        epoch: "1"
+        role: primary
+        lease_ttl: 10s
+      - action: iscsi_login
+        target: primary
+        node: client_node
+        save_as: device
+
+  - name: write_before_renewal
+    actions:
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "5"
+        save_as: md5_before
+      - action: assert_status
+        target: primary
+        field: has_lease
+        expected: "true"
+
+  - name: renew_lease_during_io
+    actions:
+      # Start background writes
+      - action: write_loop_bg
+        node: client_node
+        device: "{{ device }}"
+        save_as: bg_pid
+      # Sleep 3s to let writes accumulate
+      - action: sleep
+        duration: 3s
+      # Renew lease (same epoch, same role, new TTL)
+      - action: assign
+        target: primary
+        epoch: "1"
+        role: primary
+        lease_ttl: 30s
+      # Verify lease still valid
+      - action: assert_status
+        target: primary
+        field: has_lease
+        expected: "true"
+      # Continue writing for a bit
+      - action: sleep
+        duration: 2s
+      - action: stop_bg
+        node: client_node
+        pid: "{{ bg_pid }}"
+
+  - name: write_after_renewal
+    actions:
+      - action: dd_write
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "5"
+        save_as: md5_after
+      - action: dd_read_md5
+        node: client_node
+        device: "{{ device }}"
+        bs: 1M
+        count: "5"
+        save_as: verify_after
+      - action: assert_equal
+        actual: "{{ verify_after }}"
+        expected: "{{ md5_after }}"
+
+  - name: verify_lease_expiry
+    actions:
+      # Wait for the 30s lease to expire
+      - action: sleep
+        duration: 32s
+      - action: assert_status
+        target: primary
+        field: has_lease
+        expected: "false"
+
+  - name: cleanup
+    always: true
+    actions:
+      - action: stop_bg
+        node: client_node
+        pid: "{{ bg_pid }}"
+        ignore_error: true
+      - action: iscsi_cleanup
+        node: client_node
+        ignore_error: true
+      - action: stop_all_targets
+        ignore_error: true
diff --git a/weed/storage/blockvol/testrunner/scenarios/op-csi-lifecycle.yaml b/weed/storage/blockvol/testrunner/scenarios/op-csi-lifecycle.yaml
new file mode 100644
index 000000000..2465de549
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/op-csi-lifecycle.yaml
@@ -0,0 +1,174 @@
+# Operator Gate G3: CSI-only E2E Lifecycle
+#
+# Tests the full operator lifecycle in CSI-only mode:
+# 1. Apply CRD + RBAC + operator deployment
+# 2. Create SeaweedBlockCluster CR (CSI-only mode)
+# 3. Wait for CSIReady condition
+# 4. Verify all sub-resources exist (CSIDriver, StorageClass, Deployment, DaemonSet)
+# 5. Create PVC + Pod, write data, verify checksum
+# 6. Delete CR, verify cleanup (no leaked cluster-scoped resources)
+#
+# Requires: k3s cluster with kubectl access on k8s_node
+# Container name for operator Deployment is "operator" (not "manager")
+
+name: op-csi-lifecycle
+timeout: 15m
+
+topology:
+  nodes:
+    k8s_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+
+phases:
+  - name: deploy_operator
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/crd/bases/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/rbac/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/manager/"
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "sw-block-system"
+        timeout: "3m"
+
+  - name: create_cr
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/samples/csi-only.yaml"
+      - action: sleep
+        duration: 5s
+
+  - name: wait_ready
+    actions:
+      # Use jsonpath — CRD conditions are CSIReady, not generic "Ready"
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/sw-block-sample"
+        namespace: "default"
+        condition: "CSIReady=True"
+        timeout: "5m"
+
+  - name: verify_resources
+    actions:
+      # Cluster-scoped resources
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "csidriver/block.seaweedfs.com"
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "clusterrole/sw-block-csi"
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "clusterrolebinding/sw-block-csi"
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "storageclass/sw-block"
+      # CSI namespace resources
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "deploy/sw-block-sample-csi-controller"
+        namespace: "kube-system"
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "daemonset/sw-block-sample-csi-node"
+        namespace: "kube-system"
+      # Operator status
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/sw-block-sample"
+        namespace: "default"
+        jsonpath: "{.status.phase}"
+        save_as: cr_phase
+      - action: assert_equal
+        actual: "{{ cr_phase }}"
+        expected: "Running"
+
+  - name: verify_pvc_lifecycle
+    actions:
+      # Create PVC using the operator's StorageClass
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: v1
+          kind: PersistentVolumeClaim
+          metadata:
+            name: test-block-pvc
+            namespace: default
+          spec:
+            accessModes: [ReadWriteOnce]
+            storageClassName: sw-block
+            resources:
+              requests:
+                storage: 1Gi
+      - action: sleep
+        duration: 5s
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "pvc/test-block-pvc"
+        namespace: "default"
+      # Cleanup PVC
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/test-block-pvc"
+        namespace: "default"
+        wait: "true"
+
+  - name: delete_cr
+    actions:
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "seaweedblockcluster/sw-block-sample"
+        namespace: "default"
+        wait: "true"
+      - action: sleep
+        duration: 10s
+
+  - name: verify_cleanup
+    actions:
+      # Cluster-scoped resources should be cleaned by finalizer
+      - action: kubectl_assert_not_exists
+        node: k8s_node
+        resource: "csidriver/block.seaweedfs.com"
+      - action: kubectl_assert_not_exists
+        node: k8s_node
+        resource: "clusterrole/sw-block-csi"
+      - action: kubectl_assert_not_exists
+        node: k8s_node
+        resource: "clusterrolebinding/sw-block-csi"
+      - action: kubectl_assert_not_exists
+        node: k8s_node
+        resource: "storageclass/sw-block"
+      # Cross-namespace CSI resources should also be cleaned
+      - action: kubectl_assert_not_exists
+        node: k8s_node
+        resource: "deploy/sw-block-sample-csi-controller"
+        namespace: "kube-system"
+      - action: kubectl_assert_not_exists
+        node: k8s_node
+        resource: "daemonset/sw-block-sample-csi-node"
+        namespace: "kube-system"
+
+  - name: cleanup
+    always: true
+    actions:
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "seaweedblockcluster/sw-block-sample"
+        namespace: "default"
+        ignore_error: true
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/test-block-pvc"
+        namespace: "default"
+        ignore_error: true
+      - action: sleep
+        duration: 5s
diff --git a/weed/storage/blockvol/testrunner/scenarios/op-failure-injection.yaml b/weed/storage/blockvol/testrunner/scenarios/op-failure-injection.yaml
new file mode 100644
index 000000000..01420a6df
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/op-failure-injection.yaml
@@ -0,0 +1,199 @@
+# Operator Gate G2: Failure Injection
+#
+# Tests operator and CSI self-recovery under pod kills:
+# 1. Kill operator pod during steady state → verify auto-recovery
+# 2. Kill CSI controller pod → verify it restarts and PVC still works
+# 3. Kill CSI node pod → verify restart, no orphaned mounts
+# 4. Verify no crashloop after recovery
+#
+# Pass criteria:
+# - Operator pod recovers within 120s
+# - CSI controller pod recovers within 120s
+# - CR status returns to Running after each kill
+# - No pod in CrashLoopBackOff
+# - No orphaned resources
+#
+# Requires: k3s cluster, operator + CR deployed
+# Container name for operator Deployment is "operator" (not "manager")
+
+name: op-failure-injection
+timeout: 20m
+env:
+  operator_ns: "sw-block-system"
+  cr_name: "sw-block-sample"
+  cr_ns: "default"
+
+topology:
+  nodes:
+    k8s_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+
+phases:
+  - name: deploy_operator
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/crd/bases/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/rbac/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/manager/"
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        timeout: "3m"
+
+  - name: create_cr
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/samples/csi-only.yaml"
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        condition: "CSIReady=True"
+        timeout: "5m"
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        jsonpath: "{.status.phase}"
+        save_as: phase_baseline
+      - action: assert_equal
+        actual: "{{ phase_baseline }}"
+        expected: "Running"
+
+  - name: kill_operator_pod
+    actions:
+      # Force-kill the operator pod
+      - action: kubectl_delete_pod
+        node: k8s_node
+        selector: "control-plane=sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        grace_period: "0"
+      - action: sleep
+        duration: 5s
+      # Wait for operator to self-recover via Deployment controller
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        timeout: "2m"
+
+  - name: verify_after_operator_kill
+    actions:
+      # CR should converge back to Running
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        condition: "CSIReady=True"
+        timeout: "2m"
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        jsonpath: "{.status.phase}"
+        save_as: phase_after_op_kill
+      - action: assert_equal
+        actual: "{{ phase_after_op_kill }}"
+        expected: "Running"
+      # Verify operator pod is not crashlooping
+      - action: kubectl_pod_ready_count
+        node: k8s_node
+        selector: "control-plane=sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        save_as: op_ready
+      - action: assert_equal
+        actual: "{{ op_ready }}"
+        expected: "1"
+
+  - name: kill_csi_controller
+    actions:
+      # Force-kill the CSI controller pod
+      - action: kubectl_delete_pod
+        node: k8s_node
+        selector: "app=sw-block-csi-controller"
+        namespace: "kube-system"
+        grace_period: "0"
+      - action: sleep
+        duration: 5s
+      # Wait for CSI controller Deployment to recover
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/{{ cr_name }}-csi-controller"
+        namespace: "kube-system"
+        timeout: "2m"
+
+  - name: verify_after_csi_kill
+    actions:
+      # CSI controller should be back and healthy
+      - action: kubectl_pod_ready_count
+        node: k8s_node
+        selector: "app=sw-block-csi-controller"
+        namespace: "kube-system"
+        save_as: csi_ready
+      - action: assert_equal
+        actual: "{{ csi_ready }}"
+        expected: "1"
+      # CSIReady condition should still hold
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        condition: "CSIReady=True"
+        timeout: "2m"
+      # CSI resources still intact
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "csidriver/block.seaweedfs.com"
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "storageclass/sw-block"
+
+  - name: kill_csi_node
+    actions:
+      # Force-kill the CSI node DaemonSet pod
+      - action: kubectl_delete_pod
+        node: k8s_node
+        selector: "app=sw-block-csi-node"
+        namespace: "kube-system"
+        grace_period: "0"
+      - action: sleep
+        duration: 10s
+
+  - name: verify_after_node_kill
+    actions:
+      # DaemonSet should restart the node pod
+      - action: kubectl_pod_ready_count
+        node: k8s_node
+        selector: "app=sw-block-csi-node"
+        namespace: "kube-system"
+        save_as: node_ready
+      - action: assert_greater
+        actual: "{{ node_ready }}"
+        expected: "0"
+      # Collect operator logs for evidence
+      - action: kubectl_logs
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        tail: "200"
+        save_as: operator_logs
+
+  - name: cleanup
+    always: true
+    actions:
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        ignore_error: true
+      - action: sleep
+        duration: 10s
diff --git a/weed/storage/blockvol/testrunner/scenarios/op-mini-soak.yaml b/weed/storage/blockvol/testrunner/scenarios/op-mini-soak.yaml
new file mode 100644
index 000000000..066bc5b7c
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/op-mini-soak.yaml
@@ -0,0 +1,315 @@
+# Operator Gate G5: Mini Soak (1 Hour)
+#
+# Tests operator stability under continuous PVC create/use/delete cycles
+# with periodic operator pod restarts.
+#
+# 10 iterations of:
+# 1. Create PVC
+# 2. Create Pod using PVC, write checksum data
+# 3. Delete Pod + PVC
+# 4. Every 3rd iteration: kill operator pod
+# 5. Verify operator recovers, CR still Running
+#
+# Pass criteria:
+# - All PVC create/delete cycles succeed
+# - CR stays Running after each operator kill
+# - No stuck PVC/PV/VolumeAttachment
+# - Recovery within 120s per injected fault
+#
+# Requires: k3s cluster, operator + CR deployed
+
+name: op-mini-soak
+timeout: 60m
+env:
+  operator_ns: "sw-block-system"
+  cr_name: "sw-block-sample"
+  cr_ns: "default"
+
+topology:
+  nodes:
+    k8s_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+
+phases:
+  - name: deploy_and_create_cr
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/crd/bases/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/rbac/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/manager/"
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        timeout: "3m"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/samples/csi-only.yaml"
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        condition: "CSIReady=True"
+        timeout: "5m"
+
+  # Iteration 1
+  - name: pvc_cycle_1
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: v1
+          kind: PersistentVolumeClaim
+          metadata:
+            name: soak-pvc-1
+            namespace: default
+          spec:
+            accessModes: [ReadWriteOnce]
+            storageClassName: sw-block
+            resources:
+              requests:
+                storage: 1Gi
+      - action: sleep
+        duration: 5s
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "pvc/soak-pvc-1"
+        namespace: "default"
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-1"
+        namespace: "default"
+        wait: "true"
+
+  # Iteration 2
+  - name: pvc_cycle_2
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: v1
+          kind: PersistentVolumeClaim
+          metadata:
+            name: soak-pvc-2
+            namespace: default
+          spec:
+            accessModes: [ReadWriteOnce]
+            storageClassName: sw-block
+            resources:
+              requests:
+                storage: 1Gi
+      - action: sleep
+        duration: 5s
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "pvc/soak-pvc-2"
+        namespace: "default"
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-2"
+        namespace: "default"
+        wait: "true"
+
+  # Iteration 3 — with operator kill
+  - name: pvc_cycle_3_with_kill
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: v1
+          kind: PersistentVolumeClaim
+          metadata:
+            name: soak-pvc-3
+            namespace: default
+          spec:
+            accessModes: [ReadWriteOnce]
+            storageClassName: sw-block
+            resources:
+              requests:
+                storage: 1Gi
+      - action: kubectl_delete_pod
+        node: k8s_node
+        selector: "control-plane=sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        grace_period: "0"
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        timeout: "2m"
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        condition: "CSIReady=True"
+        timeout: "2m"
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-3"
+        namespace: "default"
+        wait: "true"
+
+  # Iterations 4-5
+  - name: pvc_cycle_4
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: v1
+          kind: PersistentVolumeClaim
+          metadata:
+            name: soak-pvc-4
+            namespace: default
+          spec:
+            accessModes: [ReadWriteOnce]
+            storageClassName: sw-block
+            resources:
+              requests:
+                storage: 1Gi
+      - action: sleep
+        duration: 3s
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-4"
+        namespace: "default"
+        wait: "true"
+
+  - name: pvc_cycle_5
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: v1
+          kind: PersistentVolumeClaim
+          metadata:
+            name: soak-pvc-5
+            namespace: default
+          spec:
+            accessModes: [ReadWriteOnce]
+            storageClassName: sw-block
+            resources:
+              requests:
+                storage: 1Gi
+      - action: sleep
+        duration: 3s
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-5"
+        namespace: "default"
+        wait: "true"
+
+  # Iteration 6 — with operator kill
+  - name: pvc_cycle_6_with_kill
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: v1
+          kind: PersistentVolumeClaim
+          metadata:
+            name: soak-pvc-6
+            namespace: default
+          spec:
+            accessModes: [ReadWriteOnce]
+            storageClassName: sw-block
+            resources:
+              requests:
+                storage: 1Gi
+      - action: kubectl_delete_pod
+        node: k8s_node
+        selector: "control-plane=sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        grace_period: "0"
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        timeout: "2m"
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        condition: "CSIReady=True"
+        timeout: "2m"
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-6"
+        namespace: "default"
+        wait: "true"
+
+  - name: final_verify
+    actions:
+      # CR should still be Running after all cycles
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        jsonpath: "{.status.phase}"
+        save_as: final_phase
+      - action: assert_equal
+        actual: "{{ final_phase }}"
+        expected: "Running"
+      # Operator healthy
+      - action: kubectl_pod_ready_count
+        node: k8s_node
+        selector: "control-plane=sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        save_as: op_ready
+      - action: assert_equal
+        actual: "{{ op_ready }}"
+        expected: "1"
+      # No stuck PVCs
+      - action: kubectl_logs
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        tail: "300"
+        save_as: final_logs
+
+  - name: cleanup
+    always: true
+    actions:
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        ignore_error: true
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-1"
+        namespace: "default"
+        ignore_error: true
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-2"
+        namespace: "default"
+        ignore_error: true
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-3"
+        namespace: "default"
+        ignore_error: true
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-4"
+        namespace: "default"
+        ignore_error: true
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-5"
+        namespace: "default"
+        ignore_error: true
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "pvc/soak-pvc-6"
+        namespace: "default"
+        ignore_error: true
+      - action: sleep
+        duration: 5s
diff --git a/weed/storage/blockvol/testrunner/scenarios/op-ownership-conflict.yaml b/weed/storage/blockvol/testrunner/scenarios/op-ownership-conflict.yaml
new file mode 100644
index 000000000..6e3f39072
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/op-ownership-conflict.yaml
@@ -0,0 +1,242 @@
+# Operator Gate G4: Ownership and Conflict Safety
+#
+# Tests that the operator correctly handles:
+# 1. Two CRs competing for singleton cluster-scoped resources
+# 2. Label tampering on owned resources
+# 3. Cleanup after conflict
+#
+# The operator uses label-based ownership (not ownerReferences) for
+# cluster-scoped resources. When a second CR tries to create the same
+# CSIDriver/StorageClass, the operator should set ResourceConflict=True
+# and phase=Failed on the second CR.
+#
+# Pass criteria:
+# - First CR reaches Running with CSIReady=True
+# - Second CR gets ResourceConflict condition, phase=Failed
+# - Label tampering on cluster-scoped resource is detected and corrected
+# - Cleanup of first CR removes all owned resources
+# - After cleanup, second CR can reconcile to Running
+#
+# Requires: k3s cluster, operator deployed
+
+name: op-ownership-conflict
+timeout: 15m
+env:
+  operator_ns: "sw-block-system"
+
+topology:
+  nodes:
+    k8s_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+
+phases:
+  - name: deploy_operator
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/crd/bases/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/rbac/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/manager/"
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        timeout: "3m"
+
+  - name: create_first_cr
+    actions:
+      # Create first CR — should succeed
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: block.seaweedfs.com/v1alpha1
+          kind: SeaweedBlockCluster
+          metadata:
+            name: cr-alpha
+            namespace: default
+          spec:
+            masterRef:
+              address: "192.168.1.184:9333"
+            csi:
+              storageClassName: "sw-block"
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-alpha"
+        namespace: "default"
+        condition: "CSIReady=True"
+        timeout: "5m"
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-alpha"
+        namespace: "default"
+        jsonpath: "{.status.phase}"
+        save_as: alpha_phase
+      - action: assert_equal
+        actual: "{{ alpha_phase }}"
+        expected: "Running"
+
+  - name: create_conflicting_cr
+    actions:
+      # Create second CR with same StorageClass name — should conflict
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: block.seaweedfs.com/v1alpha1
+          kind: SeaweedBlockCluster
+          metadata:
+            name: cr-beta
+            namespace: default
+          spec:
+            masterRef:
+              address: "192.168.1.184:9333"
+            csi:
+              storageClassName: "sw-block"
+      - action: sleep
+        duration: 15s
+
+  - name: verify_conflict
+    actions:
+      # Second CR should have ResourceConflict condition
+      - action: kubectl_get_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-beta"
+        namespace: "default"
+        condition_type: "ResourceConflict"
+        save_as: conflict_status
+      - action: assert_equal
+        actual: "{{ conflict_status }}"
+        expected: "True"
+      # Second CR should be in Failed phase
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-beta"
+        namespace: "default"
+        jsonpath: "{.status.phase}"
+        save_as: beta_phase
+      - action: assert_equal
+        actual: "{{ beta_phase }}"
+        expected: "Failed"
+      # First CR should still be Running
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-alpha"
+        namespace: "default"
+        jsonpath: "{.status.phase}"
+        save_as: alpha_still_running
+      - action: assert_equal
+        actual: "{{ alpha_still_running }}"
+        expected: "Running"
+
+  - name: label_tampering
+    actions:
+      # Tamper with the ownership label on CSIDriver
+      - action: kubectl_label
+        node: k8s_node
+        resource: "csidriver/block.seaweedfs.com"
+        labels: "app.kubernetes.io/managed-by=tampered"
+        overwrite: "true"
+      - action: sleep
+        duration: 10s
+      # After next reconcile, operator should restore the label
+      # Trigger reconcile by touching the CR
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: block.seaweedfs.com/v1alpha1
+          kind: SeaweedBlockCluster
+          metadata:
+            name: cr-alpha
+            namespace: default
+            annotations:
+              reconcile-trigger: "label-fix"
+          spec:
+            masterRef:
+              address: "192.168.1.184:9333"
+            csi:
+              storageClassName: "sw-block"
+      - action: sleep
+        duration: 10s
+      # Verify label was restored
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "csidriver/block.seaweedfs.com"
+        jsonpath: "{.metadata.labels.app\\.kubernetes\\.io/managed-by}"
+        save_as: managed_by
+      - action: assert_equal
+        actual: "{{ managed_by }}"
+        expected: "sw-block-operator"
+
+  - name: cleanup_first_cr
+    actions:
+      # Delete first CR — finalizer should clean up cluster-scoped resources
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-alpha"
+        namespace: "default"
+        wait: "true"
+      - action: sleep
+        duration: 10s
+      # Cluster-scoped resources should be gone
+      - action: kubectl_assert_not_exists
+        node: k8s_node
+        resource: "csidriver/block.seaweedfs.com"
+      - action: kubectl_assert_not_exists
+        node: k8s_node
+        resource: "storageclass/sw-block"
+
+  - name: second_cr_recovers
+    actions:
+      # Now that first CR is gone, second CR should reconcile to Running
+      # Trigger reconcile
+      - action: kubectl_apply
+        node: k8s_node
+        manifest: |
+          apiVersion: block.seaweedfs.com/v1alpha1
+          kind: SeaweedBlockCluster
+          metadata:
+            name: cr-beta
+            namespace: default
+            annotations:
+              reconcile-trigger: "retry-after-cleanup"
+          spec:
+            masterRef:
+              address: "192.168.1.184:9333"
+            csi:
+              storageClassName: "sw-block"
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-beta"
+        namespace: "default"
+        condition: "CSIReady=True"
+        timeout: "5m"
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-beta"
+        namespace: "default"
+        jsonpath: "{.status.phase}"
+        save_as: beta_recovered
+      - action: assert_equal
+        actual: "{{ beta_recovered }}"
+        expected: "Running"
+
+  - name: cleanup
+    always: true
+    actions:
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-alpha"
+        namespace: "default"
+        ignore_error: true
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "seaweedblockcluster/cr-beta"
+        namespace: "default"
+        ignore_error: true
+      - action: sleep
+        duration: 10s
diff --git a/weed/storage/blockvol/testrunner/scenarios/op-upgrade-rollback.yaml b/weed/storage/blockvol/testrunner/scenarios/op-upgrade-rollback.yaml
new file mode 100644
index 000000000..8fd84f1d4
--- /dev/null
+++ b/weed/storage/blockvol/testrunner/scenarios/op-upgrade-rollback.yaml
@@ -0,0 +1,154 @@
+# Operator Gate G1: Upgrade and Rollback Safety
+#
+# Tests operator upgrade N → N+1 and rollback N+1 → N with active CR.
+# Container name for operator Deployment is "operator" (not "manager").
+#
+# Pass criteria:
+# - No stuck PVC/PV/VolumeAttachment
+# - No CR stuck in Failed due to upgrade path
+# - Reconcile converges within 5 minutes after each transition
+#
+# Requires: k3s cluster, two operator image tags (v1 and v2)
+
+name: op-upgrade-rollback
+timeout: 20m
+env:
+  operator_image_v1: "sw-block-operator:v1"
+  operator_image_v2: "sw-block-operator:v2"
+  operator_ns: "sw-block-system"
+  cr_name: "sw-block-upgrade-test"
+  cr_ns: "default"
+
+topology:
+  nodes:
+    k8s_node:
+      host: "192.168.1.184"
+      user: testdev
+      key: "C:/work/dev_server/testdev_key"
+
+phases:
+  - name: baseline_deploy
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/crd/bases/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/rbac/"
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/manager/"
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        timeout: "3m"
+
+  - name: create_cr
+    actions:
+      - action: kubectl_apply
+        node: k8s_node
+        file: "/opt/work/seaweedfs/operator/config/samples/csi-only.yaml"
+      - action: kubectl_wait_condition
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        condition: "CSIReady=True"
+        timeout: "5m"
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        jsonpath: "{.status.phase}"
+        save_as: phase_pre_upgrade
+      - action: assert_equal
+        actual: "{{ phase_pre_upgrade }}"
+        expected: "Running"
+
+  - name: upgrade_operator
+    actions:
+      # Upgrade: N → N+1 (container name is "operator")
+      - action: kubectl_set_image
+        node: k8s_node
+        deployment: "deploy/sw-block-operator"
+        container: "operator"
+        image: "{{ operator_image_v2 }}"
+        namespace: "{{ operator_ns }}"
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        timeout: "5m"
+      - action: sleep
+        duration: 10s
+
+  - name: verify_after_upgrade
+    actions:
+      # CR should still be Running after upgrade
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        jsonpath: "{.status.phase}"
+        save_as: phase_post_upgrade
+      - action: assert_equal
+        actual: "{{ phase_post_upgrade }}"
+        expected: "Running"
+      # CSI resources should still exist
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "csidriver/block.seaweedfs.com"
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "storageclass/sw-block"
+
+  - name: rollback_operator
+    actions:
+      # Rollback: N+1 → N (container name is "operator")
+      - action: kubectl_set_image
+        node: k8s_node
+        deployment: "deploy/sw-block-operator"
+        container: "operator"
+        image: "{{ operator_image_v1 }}"
+        namespace: "{{ operator_ns }}"
+      - action: kubectl_rollout_status
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        timeout: "5m"
+      - action: sleep
+        duration: 10s
+
+  - name: verify_after_rollback
+    actions:
+      - action: kubectl_get_field
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        jsonpath: "{.status.phase}"
+        save_as: phase_post_rollback
+      - action: assert_equal
+        actual: "{{ phase_post_rollback }}"
+        expected: "Running"
+      # Verify no stuck resources
+      - action: kubectl_assert_exists
+        node: k8s_node
+        resource: "csidriver/block.seaweedfs.com"
+      # Collect operator logs for evidence
+      - action: kubectl_logs
+        node: k8s_node
+        resource: "deploy/sw-block-operator"
+        namespace: "{{ operator_ns }}"
+        tail: "200"
+        save_as: operator_logs
+
+  - name: cleanup
+    always: true
+    actions:
+      - action: kubectl_delete
+        node: k8s_node
+        resource: "seaweedblockcluster/{{ cr_name }}"
+        namespace: "{{ cr_ns }}"
+        ignore_error: true
+      - action: sleep
+        duration: 10s
diff --git a/weed/storage/blockvol/testrunner/types.go b/weed/storage/blockvol/testrunner/types.go
index 0fa0b274b..23de7f749 100644
--- a/weed/storage/blockvol/testrunner/types.go
+++ b/weed/storage/blockvol/testrunner/types.go
@@ -1,6 +1,10 @@
 package testrunner
 
-import "time"
+import (
+	"time"
+
+	"github.com/seaweedfs/seaweedfs/weed/storage/blockvol"
+)
 
 // Scenario is the top-level YAML structure for a test scenario.
 type Scenario struct {
@@ -50,7 +54,7 @@ type NodeSpec struct {
 	Agent   string `yaml:"agent"` // maps node to an agent (coordinator mode)
 }
 
-// TargetSpec defines an iSCSI target instance.
+// TargetSpec defines an iSCSI/NVMe target instance.
 type TargetSpec struct {
 	Node            string `yaml:"node"`
 	VolSize         string `yaml:"vol_size"`
@@ -62,20 +66,36 @@ type TargetSpec struct {
 	RebuildPort     int    `yaml:"rebuild_port"`
 	IQNSuffix       string `yaml:"iqn_suffix"`
 	TPGID           int    `yaml:"tpg_id"`
+	NvmePort             int    `yaml:"nvme_port"`
+	NQNSuffix            string `yaml:"nqn_suffix"`
+	MaxConcurrentWrites  int    `yaml:"max_concurrent_writes"`
+	NvmeIOQueues         int    `yaml:"nvme_io_queues"`
 }
 
-// IQN returns the full IQN from the suffix.
+// IQN returns the full IQN from the suffix, sanitized via the shared naming helper.
 func (ts TargetSpec) IQN() string {
-	return "iqn.2024.com.seaweedfs:" + ts.IQNSuffix
+	return "iqn.2024.com.seaweedfs:" + blockvol.SanitizeIQN(ts.IQNSuffix)
+}
+
+// NQN returns the full NQN from the suffix, using the shared BuildNQN helper
+// so that testrunner identifiers always match what the runtime registers.
+func (ts TargetSpec) NQN() string {
+	suffix := ts.NQNSuffix
+	if suffix == "" {
+		suffix = ts.IQNSuffix
+	}
+	return blockvol.BuildNQN("nqn.2024-01.com.seaweedfs:vol.", suffix)
 }
 
 // Phase is a sequential group of actions.
 type Phase struct {
-	Name     string   `yaml:"name"`
-	Always   bool     `yaml:"always"`
-	Parallel bool     `yaml:"parallel"`
-	Repeat   int      `yaml:"repeat"`
-	Actions  []Action `yaml:"actions"`
+	Name      string `yaml:"name"`
+	Always    bool   `yaml:"always"`
+	Parallel  bool   `yaml:"parallel"`
+	Repeat    int    `yaml:"repeat"`
+	Aggregate string `yaml:"aggregate"` // "median" (default when repeat>1), "mean", "none"
+	TrimPct   int    `yaml:"trim_pct"`  // percentage of outliers to trim from each end (default: 20)
+	Actions   []Action `yaml:"actions"`
 }
 
 // Action is a single step within a phase.
diff --git a/weed/storage/blockvol/wal_admission.go b/weed/storage/blockvol/wal_admission.go
new file mode 100644
index 000000000..e8973d175
--- /dev/null
+++ b/weed/storage/blockvol/wal_admission.go
@@ -0,0 +1,121 @@
+package blockvol
+
+import (
+	"time"
+)
+
+// WALAdmission controls write admission based on WAL pressure watermarks.
+// It limits concurrent writers via a counting semaphore and gates new
+// admission when WAL usage exceeds configurable thresholds.
+//
+// Watermark behavior:
+//   - below soft watermark: writes pass through immediately
+//   - between soft and hard: writes are admitted with a small delay to
+//     desynchronize concurrent writers and give the flusher time to drain
+//   - above hard watermark: new writes are blocked until pressure drops
+//     below the hard watermark or the timeout expires
+//
+// A single deadline governs the entire Acquire call. Time spent waiting
+// for the hard watermark to clear reduces the budget available for
+// semaphore acquisition.
+type WALAdmission struct {
+	sem      chan struct{}   // counting semaphore for concurrent WAL appenders
+	walUsed  func() float64 // returns WAL used fraction 0.0–1.0
+	notifyFn func()         // wakes flusher
+	softMark float64        // begin throttling
+	hardMark float64        // block admission
+	closedFn func() bool    // returns true if volume is closed
+
+	// sleepFn is the sleep function. Replaced in tests for determinism.
+	sleepFn func(time.Duration)
+}
+
+// WALAdmissionConfig holds parameters for WALAdmission construction.
+type WALAdmissionConfig struct {
+	MaxConcurrent int            // max concurrent writers (semaphore size)
+	SoftWatermark float64        // WAL fraction above which writes throttle
+	HardWatermark float64        // WAL fraction above which writes block
+	WALUsedFn     func() float64 // returns WAL used fraction
+	NotifyFn      func()         // wake flusher on pressure
+	ClosedFn      func() bool    // check if volume is closed
+}
+
+// NewWALAdmission creates a WAL admission controller.
+func NewWALAdmission(cfg WALAdmissionConfig) *WALAdmission {
+	return &WALAdmission{
+		sem:      make(chan struct{}, cfg.MaxConcurrent),
+		walUsed:  cfg.WALUsedFn,
+		notifyFn: cfg.NotifyFn,
+		softMark: cfg.SoftWatermark,
+		hardMark: cfg.HardWatermark,
+		closedFn: cfg.ClosedFn,
+		sleepFn:  time.Sleep,
+	}
+}
+
+// Acquire blocks until a write slot is available or the deadline expires.
+// The timeout covers both the watermark wait and semaphore acquisition.
+// Returns ErrWALFull on timeout, ErrVolumeClosed if the volume closes.
+func (a *WALAdmission) Acquire(timeout time.Duration) error {
+	deadline := time.NewTimer(timeout)
+	defer deadline.Stop()
+
+	pressure := a.walUsed()
+
+	// Hard watermark gate: wait for flusher to drain before competing for semaphore.
+	if pressure >= a.hardMark {
+		a.notifyFn()
+		for a.walUsed() >= a.hardMark {
+			if a.closedFn() {
+				return ErrVolumeClosed
+			}
+			a.notifyFn()
+			select {
+			case <-deadline.C:
+				return ErrWALFull
+			default:
+			}
+			a.sleepFn(2 * time.Millisecond)
+		}
+		// Pressure dropped — fall through to semaphore acquisition.
+	} else if pressure >= a.softMark {
+		// Soft watermark: small delay to desynchronize herd.
+		a.notifyFn()
+		scale := (pressure - a.softMark) / (a.hardMark - a.softMark)
+		if scale > 1 {
+			scale = 1
+		}
+		// Scale: softMark→0ms, hardMark→5ms.
+		delay := time.Duration(scale * 5 * float64(time.Millisecond))
+		if delay > 0 {
+			a.sleepFn(delay)
+		}
+	}
+
+	// Acquire semaphore slot using the same deadline.
+	select {
+	case a.sem <- struct{}{}:
+		return nil
+	default:
+	}
+	// Semaphore full — wait with remaining budget, also check close.
+	closeTick := time.NewTicker(5 * time.Millisecond)
+	defer closeTick.Stop()
+	for {
+		select {
+		case a.sem <- struct{}{}:
+			return nil
+		case <-deadline.C:
+			return ErrWALFull
+		case <-closeTick.C:
+			if a.closedFn() {
+				return ErrVolumeClosed
+			}
+		}
+	}
+}
+
+// Release returns a write slot to the semaphore.
+func (a *WALAdmission) Release() {
+	<-a.sem
+}
diff --git a/weed/storage/blockvol/wal_admission_test.go b/weed/storage/blockvol/wal_admission_test.go
new file mode 100644
index 000000000..fc9150400
--- /dev/null
+++ b/weed/storage/blockvol/wal_admission_test.go
@@ -0,0 +1,354 @@
+package blockvol
+
+import (
+	"errors"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+)
+
+func TestWALAdmission_AcquireRelease_Basic(t *testing.T) {
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 4,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+
+	// Acquire and release should work under no pressure.
+	for i := 0; i < 4; i++ {
+		if err := a.Acquire(100 * time.Millisecond); err != nil {
+			t.Fatalf("Acquire %d: %v", i, err)
+		}
+	}
+	// All 4 slots taken — next acquire should timeout.
+	err := a.Acquire(10 * time.Millisecond)
+	if err == nil {
+		t.Fatal("expected timeout with all slots taken")
+	}
+	if !errors.Is(err, ErrWALFull) {
+		t.Fatalf("expected ErrWALFull, got %v", err)
+	}
+
+	// Release one and acquire again.
+	a.Release()
+	if err := a.Acquire(100 * time.Millisecond); err != nil {
+		t.Fatalf("Acquire after release: %v", err)
+	}
+
+	// Release all.
+	for i := 0; i < 4; i++ {
+		a.Release()
+	}
+}
+
+func TestWALAdmission_SoftWatermark_Throttles(t *testing.T) {
+	var sleepCalls []time.Duration
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.8 }, // between soft and hard
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+	a.sleepFn = func(d time.Duration) { sleepCalls = append(sleepCalls, d) }
+
+	if err := a.Acquire(100 * time.Millisecond); err != nil {
+		t.Fatalf("Acquire: %v", err)
+	}
+	a.Release()
+
+	// Should have slept once for soft watermark delay.
+	if len(sleepCalls) != 1 {
+		t.Fatalf("expected 1 sleep call for soft watermark, got %d", len(sleepCalls))
+	}
+	// Scale: (0.8 - 0.7) / (0.9 - 0.7) = 0.5, delay = 0.5 * 5ms = 2.5ms
+	if sleepCalls[0] < 2*time.Millisecond || sleepCalls[0] > 3*time.Millisecond {
+		t.Fatalf("soft watermark sleep = %v, want ~2.5ms", sleepCalls[0])
+	}
+}
+
+func TestWALAdmission_BelowSoft_NoThrottle(t *testing.T) {
+	sleepCalled := false
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.5 }, // below soft
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+	a.sleepFn = func(d time.Duration) { sleepCalled = true }
+
+	if err := a.Acquire(100 * time.Millisecond); err != nil {
+		t.Fatalf("Acquire: %v", err)
+	}
+	a.Release()
+
+	if sleepCalled {
+		t.Fatal("should not sleep below soft watermark")
+	}
+}
+
+func TestWALAdmission_HardWatermark_BlocksUntilDrain(t *testing.T) {
+	var pressure atomic.Int64
+	pressure.Store(95) // 0.95
+
+	var notifyCalls atomic.Int64
+	var sleepCalls atomic.Int64
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() { notifyCalls.Add(1) },
+		ClosedFn:      func() bool { return false },
+	})
+	a.sleepFn = func(d time.Duration) {
+		count := sleepCalls.Add(1)
+		// Simulate flusher drain: after 3 sleeps, pressure drops.
+		if count >= 3 {
+			pressure.Store(50)
+		}
+	}
+
+	if err := a.Acquire(1 * time.Second); err != nil {
+		t.Fatalf("Acquire: %v", err)
+	}
+	a.Release()
+
+	if sleepCalls.Load() < 3 {
+		t.Fatalf("expected >= 3 sleep calls in hard watermark wait, got %d", sleepCalls.Load())
+	}
+	if notifyCalls.Load() < 2 {
+		t.Fatalf("expected >= 2 flusher notifications, got %d", notifyCalls.Load())
+	}
+}
+
+func TestWALAdmission_HardWatermark_Timeout(t *testing.T) {
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.95 }, // always above hard
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+	a.sleepFn = func(d time.Duration) {} // no-op sleep
+
+	err := a.Acquire(10 * time.Millisecond)
+	if err == nil {
+		t.Fatal("expected timeout under persistent hard watermark pressure")
+	}
+	if !errors.Is(err, ErrWALFull) {
+		t.Fatalf("expected ErrWALFull, got %v", err)
+	}
+}
+
+func TestWALAdmission_ClosedDuringHardWait(t *testing.T) {
+	var closed atomic.Bool
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.95 },
+		NotifyFn:      func() {},
+		ClosedFn:      closed.Load,
+	})
+	a.sleepFn = func(d time.Duration) {
+		closed.Store(true) // simulate volume closing during wait
+	}
+
+	err := a.Acquire(1 * time.Second)
+	if !errors.Is(err, ErrVolumeClosed) {
+		t.Fatalf("expected ErrVolumeClosed, got %v", err)
+	}
+}
+
+func TestWALAdmission_Concurrent_BoundedWriters(t *testing.T) {
+	const maxConcurrent = 4
+	var active atomic.Int64
+	var maxSeen atomic.Int64
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: maxConcurrent,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+
+	var wg sync.WaitGroup
+	const goroutines = 32
+
+	wg.Add(goroutines)
+	for i := 0; i < goroutines; i++ {
+		go func() {
+			defer wg.Done()
+			for j := 0; j < 10; j++ {
+				if err := a.Acquire(5 * time.Second); err != nil {
+					return
+				}
+				cur := active.Add(1)
+				// Track max concurrency observed.
+				for {
+					old := maxSeen.Load()
+					if cur <= old || maxSeen.CompareAndSwap(old, cur) {
+						break
+					}
+				}
+				// Simulate work.
+				time.Sleep(100 * time.Microsecond)
+				active.Add(-1)
+				a.Release()
+			}
+		}()
+	}
+	wg.Wait()
+
+	if maxSeen.Load() > maxConcurrent {
+		t.Fatalf("max concurrent = %d, want <= %d", maxSeen.Load(), maxConcurrent)
+	}
+}
+
+func TestWALAdmission_FlusherNotified_OnSoftAndHard(t *testing.T) {
+	var notifyCount atomic.Int64
+	var callNum atomic.Int64
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 16,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn: func() float64 {
+			// First call returns soft pressure, second returns below soft.
+			n := callNum.Add(1)
+			if n == 1 {
+				return 0.8 // soft watermark
+			}
+			return 0.3 // safe
+		},
+		NotifyFn: func() { notifyCount.Add(1) },
+		ClosedFn: func() bool { return false },
+	})
+	a.sleepFn = func(d time.Duration) {}
+
+	// First acquire: soft watermark should trigger notify.
+	if err := a.Acquire(100 * time.Millisecond); err != nil {
+		t.Fatalf("Acquire 1: %v", err)
+	}
+	a.Release()
+
+	if notifyCount.Load() < 1 {
+		t.Fatal("expected flusher notification at soft watermark")
+	}
+
+	// Second acquire: below soft, no additional notify.
+	before := notifyCount.Load()
+	if err := a.Acquire(100 * time.Millisecond); err != nil {
+		t.Fatalf("Acquire 2: %v", err)
+	}
+	a.Release()
+
+	if notifyCount.Load() != before {
+		t.Fatal("should not notify flusher below soft watermark")
+	}
+}
+
+// TestWALAdmission_SingleBudget_HardThenSemaphore verifies that the hard
+// watermark wait and semaphore wait share a single timeout budget.
+// If the hard watermark consumes most of the budget, the semaphore wait
+// must use only the remaining time (not a fresh timeout).
+func TestWALAdmission_SingleBudget_HardThenSemaphore(t *testing.T) {
+	var pressure atomic.Int64
+	pressure.Store(95) // above hard watermark
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 1,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return float64(pressure.Load()) / 100.0 },
+		NotifyFn:      func() {},
+		ClosedFn:      func() bool { return false },
+	})
+
+	var sleepTotal atomic.Int64
+	a.sleepFn = func(d time.Duration) {
+		sleepTotal.Add(int64(d))
+		// After some sleep cycles, drop pressure below hard mark.
+		if sleepTotal.Load() > int64(10*time.Millisecond) {
+			pressure.Store(50)
+		}
+	}
+
+	// Fill the semaphore so semaphore wait also blocks.
+	a.sem <- struct{}{}
+
+	// Total budget: 50ms. Hard watermark will consume ~10ms of it.
+	// Semaphore wait must timeout with the remaining ~40ms, NOT a fresh 50ms.
+	start := time.Now()
+	err := a.Acquire(50 * time.Millisecond)
+	elapsed := time.Since(start)
+
+	if err == nil {
+		a.Release()
+		t.Fatal("expected timeout (semaphore full)")
+	}
+	if !errors.Is(err, ErrWALFull) {
+		t.Fatalf("expected ErrWALFull, got %v", err)
+	}
+	// Total elapsed must be well under 2x the budget (100ms).
+	// With single budget, it should be ~50ms. With double budget it would be ~100ms.
+	if elapsed > 80*time.Millisecond {
+		t.Fatalf("elapsed %v exceeds single-budget expectation (~50ms), suggests double timeout", elapsed)
+	}
+
+	// Drain the semaphore.
+	<-a.sem
+}
+
+// TestWALAdmission_CloseDuringSemaphoreWait verifies that volume close is
+// detected while waiting for a full semaphore, not only during the hard
+// watermark loop.
+func TestWALAdmission_CloseDuringSemaphoreWait(t *testing.T) {
+	var closed atomic.Bool
+
+	a := NewWALAdmission(WALAdmissionConfig{
+		MaxConcurrent: 1,
+		SoftWatermark: 0.7,
+		HardWatermark: 0.9,
+		WALUsedFn:     func() float64 { return 0.0 }, // no pressure
+		NotifyFn:      func() {},
+		ClosedFn:      closed.Load,
+	})
+
+	// Fill semaphore.
+	a.sem <- struct{}{}
+
+	// Close after a short delay.
+	go func() {
+		time.Sleep(15 * time.Millisecond)
+		closed.Store(true)
+	}()
+
+	start := time.Now()
+	err := a.Acquire(2 * time.Second) // long timeout — should not wait that long
+	elapsed := time.Since(start)
+
+	if !errors.Is(err, ErrVolumeClosed) {
+		t.Fatalf("expected ErrVolumeClosed, got %v", err)
+	}
+	// Should detect close quickly (within ~20ms), not wait 2s.
+	if elapsed > 200*time.Millisecond {
+		t.Fatalf("close detection took %v, expected < 200ms", elapsed)
+	}
+
+	// Drain.
+	<-a.sem
+}