mirror of
https://github.com/seaweedfs/seaweedfs.git
synced 2026-05-29 21:20:21 +00:00
A non-owner filer forwards the whole transaction to the ring owner of route_key, so the owner's per-path lock stays the single serialization point even when the caller's ring view is stale. is_moved bounds forwarding to one hop. The gateway stamps route_key on every routed builder via the shared objectRouteKey helper. Completes taking S3 object mutations off the distributed lock.
189 lines
8.4 KiB
Go
189 lines
8.4 KiB
Go
package s3api
|
|
|
|
import (
|
|
"strconv"
|
|
"time"
|
|
|
|
"github.com/seaweedfs/seaweedfs/weed/glog"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/pb/filer_pb"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3_constants"
|
|
"github.com/seaweedfs/seaweedfs/weed/s3api/s3err"
|
|
"github.com/seaweedfs/seaweedfs/weed/util"
|
|
)
|
|
|
|
// objectWriteOwner resolves the filer that owns all of an object's writes,
|
|
// regardless of versioning state, or "" when no ring view is available. Normal,
|
|
// suspended, and versioned writes to the same object hash to one owner and
|
|
// serialize on its per-path lock.
|
|
func (s3a *S3ApiServer) objectWriteOwner(bucket, object string) pb.ServerAddress {
|
|
if s3a.objectWriteLockClient == nil {
|
|
return ""
|
|
}
|
|
return s3a.objectWriteLockClient.PrimaryForKey(s3a.objectRouteKey(bucket, object))
|
|
}
|
|
|
|
// latestPointerRecompute builds the RECOMPUTE_LATEST mutation that re-derives an
|
|
// object's .versions pointer. excludeName, when set, omits a version about to be
|
|
// deleted (so the pointer is repointed before the blob is removed); demote, when
|
|
// set, stamps the displaced prior latest with NoncurrentSinceNs.
|
|
func (s3a *S3ApiServer) latestPointerRecompute(bucket, object string, useInvertedFormat bool, excludeName string, demote bool) *filer_pb.ObjectMutation {
|
|
versionsPath := s3a.toFilerPath(bucket, object+s3_constants.VersionsFolder)
|
|
vdir, vname := util.FullPath(versionsPath).DirAndName()
|
|
rc := &filer_pb.Recompute{
|
|
ScanDir: versionsPath,
|
|
// Inverted ids sort newest-first, so the newest is the first ascending
|
|
// entry; legacy ids sort oldest-first (scan to the last).
|
|
Descending: !useInvertedFormat,
|
|
NameToKey: s3_constants.ExtLatestVersionFileNameKey,
|
|
SizeToKey: s3_constants.ExtLatestVersionSizeKey,
|
|
MtimeToKey: s3_constants.ExtLatestVersionMtimeKey,
|
|
CopyExtended: map[string]string{
|
|
s3_constants.ExtLatestVersionIdKey: s3_constants.ExtVersionIdKey,
|
|
s3_constants.ExtLatestVersionETagKey: s3_constants.ExtETagKey,
|
|
s3_constants.ExtLatestVersionOwnerKey: s3_constants.ExtAmzOwnerKey,
|
|
s3_constants.ExtLatestVersionIsDeleteMarker: s3_constants.ExtDeleteMarkerKey,
|
|
},
|
|
ExcludeName: excludeName,
|
|
}
|
|
if demote {
|
|
rc.DemoteKey = s3_constants.ExtNoncurrentSinceNsKey
|
|
rc.DemoteValue = []byte(strconv.FormatInt(time.Now().UnixNano(), 10))
|
|
}
|
|
return &filer_pb.ObjectMutation{
|
|
Type: filer_pb.ObjectMutation_RECOMPUTE_LATEST,
|
|
Directory: vdir,
|
|
Name: vname,
|
|
Recompute: rc,
|
|
}
|
|
}
|
|
|
|
// routedVersionedFinalize flips the .versions pointer to the newest version and
|
|
// demotes the prior latest, atomically under the object's per-path lock on the
|
|
// owner filer, via a single RECOMPUTE_LATEST. The version file is already
|
|
// written; the owner re-derives the pointer by scanning the directory.
|
|
func (s3a *S3ApiServer) routedVersionedFinalize(owner pb.ServerAddress, bucket, object string, useInvertedFormat bool) s3err.ErrorCode {
|
|
req := &filer_pb.ObjectTransactionRequest{
|
|
LockKey: s3a.toFilerPath(bucket, object),
|
|
RouteKey: s3a.objectRouteKey(bucket, object),
|
|
Mutations: []*filer_pb.ObjectMutation{s3a.latestPointerRecompute(bucket, object, useInvertedFormat, "", true)},
|
|
}
|
|
resp, err := s3a.objectTxnOnFiler(owner, req)
|
|
switch {
|
|
case err != nil:
|
|
glog.Errorf("routedVersionedFinalize: %s/%s on %s: %v", bucket, object, owner, err)
|
|
return s3err.ErrInternalError
|
|
case resp.Error != "":
|
|
glog.Errorf("routedVersionedFinalize: %s/%s: %s", bucket, object, resp.Error)
|
|
return s3err.ErrInternalError
|
|
default:
|
|
return s3err.ErrNone
|
|
}
|
|
}
|
|
|
|
// wormDeleteCondition returns the object-lock guards for a delete, or nil when
|
|
// the bucket has no object lock. Legal hold always blocks. Retention blocks
|
|
// while not elapsed; with governance bypass the retention guard is gated to
|
|
// COMPLIANCE mode, so a governance-mode version becomes deletable while a
|
|
// compliance-mode one stays protected — the filer decides from the version's
|
|
// mode under the lock, so the gateway never has to read it.
|
|
func wormDeleteCondition(worm, bypass bool) *filer_pb.WriteCondition {
|
|
if !worm {
|
|
return nil
|
|
}
|
|
retention := &filer_pb.WriteCondition_Clause{
|
|
Kind: filer_pb.WriteCondition_IF_EXTENDED_TIME_ELAPSED,
|
|
ExtKey: s3_constants.ExtRetentionUntilDateKey,
|
|
}
|
|
if bypass {
|
|
retention.GateKey = s3_constants.ExtObjectLockModeKey
|
|
retention.GateValue = s3_constants.RetentionModeCompliance
|
|
}
|
|
return &filer_pb.WriteCondition{Clauses: []*filer_pb.WriteCondition_Clause{
|
|
{Kind: filer_pb.WriteCondition_IF_EXTENDED_NOT_EQUAL, ExtKey: s3_constants.ExtLegalHoldKey, ExtValue: s3_constants.LegalHoldOn},
|
|
retention,
|
|
}}
|
|
}
|
|
|
|
// routedDeleteSpecificVersion deletes one version off the distributed lock: in a
|
|
// single transaction on the owner it recomputes the .versions pointer excluding
|
|
// the version (repoint-before-delete, so a crash leaves a recoverable orphan
|
|
// rather than a dangling pointer) and deletes the version file. lock_key is the
|
|
// object (serializing the pointer recompute); for object-lock buckets the
|
|
// condition gates the delete on the version's WORM guards evaluated on the owner.
|
|
func (s3a *S3ApiServer) routedDeleteSpecificVersion(owner pb.ServerAddress, bucket, object, versionId string, worm, bypass bool) s3err.ErrorCode {
|
|
versionFileName := s3a.getVersionFileName(versionId)
|
|
versionsPath := s3a.toFilerPath(bucket, object+s3_constants.VersionsFolder)
|
|
cond := wormDeleteCondition(worm, bypass)
|
|
req := &filer_pb.ObjectTransactionRequest{
|
|
LockKey: s3a.toFilerPath(bucket, object),
|
|
RouteKey: s3a.objectRouteKey(bucket, object),
|
|
ConditionKey: versionsPath + "/" + versionFileName,
|
|
Condition: cond,
|
|
Mutations: []*filer_pb.ObjectMutation{
|
|
s3a.latestPointerRecompute(bucket, object, isNewFormatVersionId(versionId), versionFileName, false),
|
|
{Type: filer_pb.ObjectMutation_DELETE, Directory: versionsPath, Name: versionFileName, IsDeleteData: true},
|
|
},
|
|
}
|
|
resp, err := s3a.objectTxnOnFiler(owner, req)
|
|
switch {
|
|
case err != nil:
|
|
glog.Errorf("routedDeleteSpecificVersion: %s/%s %s on %s: %v", bucket, object, versionId, owner, err)
|
|
return s3err.ErrInternalError
|
|
case resp.ErrorCode == filer_pb.FilerError_PRECONDITION_FAILED:
|
|
// Legal hold or retention in force on the version.
|
|
return s3err.ErrAccessDenied
|
|
case resp.Error != "":
|
|
glog.Errorf("routedDeleteSpecificVersion: %s/%s %s: %s", bucket, object, versionId, resp.Error)
|
|
return s3err.ErrInternalError
|
|
default:
|
|
return s3err.ErrNone
|
|
}
|
|
}
|
|
|
|
// routedDeleteNullVersion deletes the null version (the regular object entry, not
|
|
// a .versions file) off the distributed lock. There is no pointer to recompute;
|
|
// the WORM guards, when present, gate the delete on the object entry itself
|
|
// (condition defaults to lock_key).
|
|
func (s3a *S3ApiServer) routedDeleteNullVersion(owner pb.ServerAddress, bucket, object string, worm, bypass bool) s3err.ErrorCode {
|
|
fullpath := util.NewFullPath(s3a.bucketDir(bucket), object)
|
|
dir, name := fullpath.DirAndName()
|
|
resp, err := s3a.objectTxnOnFiler(owner, &filer_pb.ObjectTransactionRequest{
|
|
LockKey: string(fullpath),
|
|
RouteKey: s3a.objectRouteKey(bucket, object),
|
|
Condition: wormDeleteCondition(worm, bypass),
|
|
Mutations: []*filer_pb.ObjectMutation{
|
|
{Type: filer_pb.ObjectMutation_DELETE, Directory: dir, Name: name, IsDeleteData: true},
|
|
},
|
|
})
|
|
switch {
|
|
case err != nil:
|
|
glog.Errorf("routedDeleteNullVersion: %s/%s on %s: %v", bucket, object, owner, err)
|
|
return s3err.ErrInternalError
|
|
case resp.ErrorCode == filer_pb.FilerError_PRECONDITION_FAILED:
|
|
return s3err.ErrAccessDenied
|
|
case resp.Error != "":
|
|
glog.Errorf("routedDeleteNullVersion: %s/%s: %s", bucket, object, resp.Error)
|
|
return s3err.ErrInternalError
|
|
default:
|
|
return s3err.ErrNone
|
|
}
|
|
}
|
|
|
|
// versionedAfterCreate returns the putToFiler hook that finalizes a versioned
|
|
// write: the routed RECOMPUTE_LATEST when the owner is known, else the existing
|
|
// lock-free updateLatestVersionInDirectory.
|
|
func (s3a *S3ApiServer) versionedAfterCreate(bucket, object, versionId, versionFileName string, useInvertedFormat bool) func(*filer_pb.Entry) s3err.ErrorCode {
|
|
owner := s3a.objectWriteOwner(bucket, object)
|
|
return func(versionEntry *filer_pb.Entry) s3err.ErrorCode {
|
|
if owner != "" {
|
|
return s3a.routedVersionedFinalize(owner, bucket, object, useInvertedFormat)
|
|
}
|
|
if err := s3a.updateLatestVersionInDirectory(bucket, object, versionId, versionFileName, versionEntry); err != nil {
|
|
glog.Errorf("putVersionedObject: failed to update latest version in directory: %v", err)
|
|
return s3err.ErrInternalError
|
|
}
|
|
return s3err.ErrNone
|
|
}
|
|
}
|