mirror of
https://github.com/versity/scoutfs-go.git
synced 2026-01-08 12:41:11 +00:00
scoutfs now adds a new ioctl to get the parent id and some extra info given an inode within the filesystem. This is less expensive than the full path resolution for an inode, so can speed up cases when the parent is all thats needed.
1137 lines
26 KiB
Go
1137 lines
26 KiB
Go
// Copyright (c) 2018 Versity Software, Inc.
|
|
//
|
|
// Use of this source code is governed by a BSD-3-Clause license
|
|
// that can be found in the LICENSE file in the root of the source
|
|
// tree.
|
|
|
|
package scoutfs
|
|
|
|
import (
|
|
"bufio"
|
|
"bytes"
|
|
"encoding/binary"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"math"
|
|
"os"
|
|
"path/filepath"
|
|
"strconv"
|
|
"strings"
|
|
"syscall"
|
|
"time"
|
|
"unsafe"
|
|
)
|
|
|
|
const (
|
|
max64 = 0xffffffffffffffff
|
|
max32 = 0xffffffff
|
|
pathmax = 4096
|
|
sysscoutfs = "/sys/fs/scoutfs/"
|
|
statusfile = "quorum/status"
|
|
listattrBufsize = 256 * 1024
|
|
getparentBufsize = 4096 * 1024
|
|
scoutfsBS = 4096
|
|
//leaderfile = "quorum/is_leader"
|
|
)
|
|
|
|
// Query to keep track of in-process query
|
|
type Query struct {
|
|
first InodesEntry
|
|
last InodesEntry
|
|
index uint8
|
|
batch uint32
|
|
fsfd *os.File
|
|
buf []byte
|
|
}
|
|
|
|
// Time represents a time value in seconds and nanoseconds
|
|
type Time struct {
|
|
Sec uint64
|
|
Nsec uint32
|
|
}
|
|
|
|
// NewQuery creates a new scoutfs Query
|
|
// Specify query type with By*() option
|
|
// (only 1 allowed, last one wins)
|
|
// and specify batching with WithBatchSize()
|
|
// An open file within scoutfs is supplied for ioctls
|
|
// (usually just the base mount point directory)
|
|
func NewQuery(f *os.File, opts ...Option) *Query {
|
|
q := &Query{
|
|
//default batch size is 128
|
|
batch: 128,
|
|
fsfd: f,
|
|
}
|
|
|
|
for _, opt := range opts {
|
|
opt(q)
|
|
}
|
|
|
|
q.buf = make([]byte, int(unsafe.Sizeof(InodesEntry{}))*int(q.batch))
|
|
|
|
return q
|
|
}
|
|
|
|
// Option sets various options for NewQuery
|
|
type Option func(*Query)
|
|
|
|
// ByMSeq gets inodes in range of metadata sequence from, to inclusive
|
|
func ByMSeq(from, to InodesEntry) Option {
|
|
return func(q *Query) {
|
|
q.first = from
|
|
q.last = to
|
|
q.index = QUERYINODESMETASEQ
|
|
}
|
|
}
|
|
|
|
// ByDSeq gets inodes in range of data sequence from, to inclusive
|
|
func ByDSeq(from, to InodesEntry) Option {
|
|
return func(q *Query) {
|
|
q.first = from
|
|
q.last = to
|
|
q.index = QUERYINODESDATASEQ
|
|
}
|
|
}
|
|
|
|
// WithBatchSize sets the max number of inodes to be returned at a time
|
|
func WithBatchSize(size uint32) Option {
|
|
return func(q *Query) {
|
|
q.batch = size
|
|
}
|
|
}
|
|
|
|
// Next gets the next batch of inodes
|
|
func (q *Query) Next() ([]InodesEntry, error) {
|
|
query := queryInodes{
|
|
First: q.first,
|
|
Last: q.last,
|
|
Entries_ptr: uint64(uintptr(unsafe.Pointer(&q.buf[0]))),
|
|
Nr_entries: q.batch,
|
|
Index: q.index,
|
|
}
|
|
|
|
n, err := scoutfsctl(q.fsfd, IOCQUERYINODES, unsafe.Pointer(&query))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if n == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
rbuf := bytes.NewReader(q.buf)
|
|
inodes := make([]InodesEntry, n)
|
|
|
|
var e InodesEntry
|
|
for i := 0; i < n; i++ {
|
|
err := binary.Read(rbuf, binary.LittleEndian, &e)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
inodes[i] = e
|
|
}
|
|
|
|
q.first = e.Increment()
|
|
return inodes, nil
|
|
}
|
|
|
|
// SetLast updates the sequence stopping point
|
|
func (q *Query) SetLast(l InodesEntry) {
|
|
q.last = l
|
|
}
|
|
|
|
// Increment returns the next seq entry position
|
|
func (i InodesEntry) Increment() InodesEntry {
|
|
i.Ino++
|
|
if i.Ino == 0 {
|
|
i.Minor++
|
|
if i.Minor == 0 {
|
|
i.Major++
|
|
}
|
|
}
|
|
return i
|
|
}
|
|
|
|
// String returns the string representation of InodesEntry
|
|
func (i InodesEntry) String() string {
|
|
return fmt.Sprintf("{seq: %v, ino: %v}", i.Major, i.Ino)
|
|
}
|
|
|
|
// StatMore returns scoutfs specific metadata for path
|
|
func StatMore(path string) (Stat, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return Stat{}, err
|
|
}
|
|
defer f.Close()
|
|
|
|
return FStatMore(f)
|
|
}
|
|
|
|
// FStatMore returns scoutfs specific metadata for file handle
|
|
func FStatMore(f *os.File) (Stat, error) {
|
|
s := Stat{}
|
|
|
|
_, err := scoutfsctl(f, IOCSTATMORE, unsafe.Pointer(&s))
|
|
if err != nil {
|
|
return Stat{}, err
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
// SetAttrMore sets special scoutfs attributes
|
|
func SetAttrMore(path string, version, size, flags uint64, ctime time.Time, crtime time.Time) error {
|
|
f, err := os.OpenFile(path, os.O_RDWR, 0600)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
return FSetAttrMore(f, version, size, flags, ctime, crtime)
|
|
}
|
|
|
|
// FSetAttrMore sets special scoutfs attributes for file handle
|
|
func FSetAttrMore(f *os.File, version, size, flags uint64, ctime time.Time, crtime time.Time) error {
|
|
var cnsec int32
|
|
var crnsec int32
|
|
if ctime.Nanosecond() == int(int32(ctime.Nanosecond())) {
|
|
cnsec = int32(ctime.Nanosecond())
|
|
}
|
|
if crtime.Nanosecond() == int(int32(crtime.Nanosecond())) {
|
|
crnsec = int32(crtime.Nanosecond())
|
|
}
|
|
s := setattrMore{
|
|
Data_version: version,
|
|
I_size: size,
|
|
Flags: flags,
|
|
Ctime_sec: uint64(ctime.Unix()),
|
|
Ctime_nsec: uint32(cnsec),
|
|
Crtime_sec: uint64(crtime.Unix()),
|
|
Crtime_nsec: uint32(crnsec),
|
|
}
|
|
|
|
_, err := scoutfsctl(f, IOCSETATTRMORE, unsafe.Pointer(&s))
|
|
return err
|
|
}
|
|
|
|
type inoPathResult struct {
|
|
DirIno uint64
|
|
DirPos uint64
|
|
PathSize uint16
|
|
_ [6]uint8
|
|
Path [pathmax]byte
|
|
}
|
|
|
|
// InoToPath converts an inode number to a path in the filesystem
|
|
// An open file within scoutfs is supplied for ioctls
|
|
// (usually just the base mount point directory)
|
|
func InoToPath(dirfd *os.File, ino uint64) (string, error) {
|
|
var res inoPathResult
|
|
ip := inoPath{
|
|
Ino: ino,
|
|
Result_ptr: uint64(uintptr(unsafe.Pointer(&res))),
|
|
Result_bytes: uint16(unsafe.Sizeof(res)),
|
|
}
|
|
|
|
_, err := scoutfsctl(dirfd, IOCINOPATH, unsafe.Pointer(&ip))
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
b := bytes.Trim(res.Path[:res.PathSize], "\x00")
|
|
|
|
return string(b), nil
|
|
}
|
|
|
|
// InoToPaths converts an inode number to all paths in the filesystem
|
|
// An open file within scoutfs is supplied for ioctls
|
|
// (usually just the base mount point directory)
|
|
func InoToPaths(dirfd *os.File, ino uint64) ([]string, error) {
|
|
var res inoPathResult
|
|
ip := inoPath{
|
|
Ino: ino,
|
|
Result_ptr: uint64(uintptr(unsafe.Pointer(&res))),
|
|
Result_bytes: uint16(unsafe.Sizeof(res)),
|
|
}
|
|
|
|
var paths []string
|
|
for {
|
|
_, err := scoutfsctl(dirfd, IOCINOPATH, unsafe.Pointer(&ip))
|
|
if err == syscall.ENOENT {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
b := bytes.Trim(res.Path[:res.PathSize], "\x00")
|
|
paths = append(paths, string(b))
|
|
|
|
ip.Dir_ino = res.DirIno
|
|
ip.Dir_pos = res.DirPos
|
|
ip.Dir_pos++
|
|
if ip.Dir_pos == 0 {
|
|
ip.Dir_ino++
|
|
if ip.Dir_ino == 0 {
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
return paths, nil
|
|
}
|
|
|
|
// OpenByID will open a file by inode returning a typical *os.File
|
|
// An open file within scoutfs is supplied for ioctls
|
|
// (usually just the base mount point directory)
|
|
// The filename supplied is used for the *os.File info, but can be "" if
|
|
// not known or needed
|
|
func OpenByID(dirfd *os.File, ino uint64, flags int, name string) (*os.File, error) {
|
|
fd, err := OpenByHandle(dirfd, ino, flags)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return os.NewFile(fd, name), nil
|
|
}
|
|
|
|
// ReleaseFile sets file offline by freeing associated extents
|
|
func ReleaseFile(path string, version uint64) error {
|
|
f, err := os.OpenFile(path, os.O_WRONLY, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
return FReleaseFile(f, version)
|
|
}
|
|
|
|
// FReleaseFile set file offline by freeing associated extents
|
|
func FReleaseFile(f *os.File, version uint64) error {
|
|
fi, err := f.Stat()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
r := iocRelease{
|
|
Length: divRoundUp(uint64(fi.Size()), scoutfsBS),
|
|
Version: version,
|
|
}
|
|
|
|
_, err = scoutfsctl(f, IOCRELEASE, unsafe.Pointer(&r))
|
|
return err
|
|
}
|
|
|
|
func divRoundUp(size, bs uint64) uint64 {
|
|
d := (size / bs) * bs
|
|
if d == size {
|
|
return d
|
|
}
|
|
return d + bs
|
|
}
|
|
|
|
// ReleaseBlocks marks blocks offline and frees associated extents
|
|
// offset/length must be 4k aligned
|
|
func ReleaseBlocks(path string, offset, length, version uint64) error {
|
|
f, err := os.OpenFile(path, os.O_WRONLY, 0)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer f.Close()
|
|
|
|
return FReleaseBlocks(f, offset, length, version)
|
|
}
|
|
|
|
// FReleaseBlocks marks blocks offline and frees associated extents
|
|
// offset/length must be 4k aligned
|
|
func FReleaseBlocks(f *os.File, offset, length, version uint64) error {
|
|
r := iocRelease{
|
|
Offset: offset,
|
|
Length: length,
|
|
Version: version,
|
|
}
|
|
|
|
_, err := scoutfsctl(f, IOCRELEASE, unsafe.Pointer(&r))
|
|
return err
|
|
}
|
|
|
|
// StageFile rehydrates offline file
|
|
func StageFile(path string, version, offset uint64, b []byte) (int, error) {
|
|
f, err := os.OpenFile(path, os.O_WRONLY, 0)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
defer f.Close()
|
|
|
|
return FStageFile(f, version, offset, b)
|
|
}
|
|
|
|
// FStageFile rehydrates offline file
|
|
func FStageFile(f *os.File, version, offset uint64, b []byte) (int, error) {
|
|
r := iocStage{
|
|
Data_version: version,
|
|
Buf_ptr: uint64(uintptr(unsafe.Pointer(&b[0]))),
|
|
Offset: offset,
|
|
Length: int32(len(b)),
|
|
}
|
|
|
|
return scoutfsctl(f, IOCSTAGE, unsafe.Pointer(&r))
|
|
}
|
|
|
|
// Waiters to keep track of data waiters
|
|
type Waiters struct {
|
|
ino uint64
|
|
iblock uint64
|
|
batch uint16
|
|
fsfd *os.File
|
|
buf []byte
|
|
}
|
|
|
|
// NewWaiters creates a new scoutfs Waiters
|
|
// An open file within scoutfs is supplied for ioctls
|
|
// (usually just the base mount point directory)
|
|
func NewWaiters(f *os.File, opts ...WOption) *Waiters {
|
|
w := &Waiters{
|
|
//default batch size is 128
|
|
batch: 128,
|
|
fsfd: f,
|
|
}
|
|
|
|
for _, opt := range opts {
|
|
opt(w)
|
|
}
|
|
|
|
w.buf = make([]byte, int(unsafe.Sizeof(DataWaitingEntry{}))*int(w.batch))
|
|
|
|
return w
|
|
}
|
|
|
|
// WOption sets various options for NewWaiters
|
|
type WOption func(*Waiters)
|
|
|
|
// WithWaitersCount sets the max number of inodes to be returned at a time
|
|
func WithWaitersCount(size uint16) WOption {
|
|
return func(w *Waiters) {
|
|
w.batch = size
|
|
}
|
|
}
|
|
|
|
// Next gets the next batch of data waiters, returns nil, nil if no waiters
|
|
func (w *Waiters) Next() ([]DataWaitingEntry, error) {
|
|
dataWaiting := dataWaiting{
|
|
After_ino: w.ino,
|
|
After_iblock: w.iblock,
|
|
Ents_ptr: uint64(uintptr(unsafe.Pointer(&w.buf[0]))),
|
|
Ents_nr: w.batch,
|
|
}
|
|
|
|
n, err := scoutfsctl(w.fsfd, IOCDATAWAITING, unsafe.Pointer(&dataWaiting))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if n == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
rbuf := bytes.NewReader(w.buf)
|
|
inodes := make([]DataWaitingEntry, n)
|
|
|
|
var e DataWaitingEntry
|
|
for i := 0; i < n; i++ {
|
|
err := binary.Read(rbuf, binary.LittleEndian, &e)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
inodes[i] = e
|
|
}
|
|
|
|
w.ino = inodes[n-1].Ino
|
|
w.iblock = inodes[n-1].Iblock
|
|
|
|
return inodes, nil
|
|
}
|
|
|
|
// Reset sets the data waiters query back to inode 0, iblock 0
|
|
func (w *Waiters) Reset() {
|
|
w.ino = 0
|
|
w.iblock = 0
|
|
}
|
|
|
|
// SendDataWaitErr sends an error to the data waiter task indicating that
|
|
// the data is no longer aviable.
|
|
// An open file within scoutfs is supplied for ioctls
|
|
// (usually just the base mount point directory)
|
|
func SendDataWaitErr(dirfd *os.File, ino, version, offset, op, count uint64, errno int64) error {
|
|
derr := dataWaitErr{
|
|
Ino: ino,
|
|
Version: version,
|
|
Offset: offset,
|
|
Count: count,
|
|
Op: op,
|
|
Err: errno,
|
|
}
|
|
|
|
_, err := scoutfsctl(dirfd, IOCDATAWAITERR, unsafe.Pointer(&derr))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// XattrQuery to keep track of in-process xattr query
|
|
type XattrQuery struct {
|
|
next uint64
|
|
batch uint64
|
|
key string
|
|
fsfd *os.File
|
|
buf []byte
|
|
done bool
|
|
}
|
|
|
|
// NewXattrQuery creates a new scoutfs Xattr Query
|
|
// Specify query xattr key
|
|
// and specify optinally batching with WithXBatchSize()
|
|
// An open file within scoutfs is supplied for ioctls
|
|
// (usually just the base mount point directory)
|
|
func NewXattrQuery(f *os.File, key string, opts ...XOption) *XattrQuery {
|
|
q := &XattrQuery{
|
|
// default batch size is 131072 for a nice round 1MB allocation.
|
|
// making this too small risks multiple calls into Next() wich
|
|
// has significant overhead per call.
|
|
batch: (128 * 1024),
|
|
key: key,
|
|
fsfd: f,
|
|
}
|
|
|
|
for _, opt := range opts {
|
|
opt(q)
|
|
}
|
|
|
|
q.buf = make([]byte, 8*int(q.batch))
|
|
|
|
return q
|
|
}
|
|
|
|
// XOption sets various options for NewXattrQuery
|
|
type XOption func(*XattrQuery)
|
|
|
|
// WithXBatchSize sets the max number of inodes to be returned at a time
|
|
func WithXBatchSize(size uint64) XOption {
|
|
return func(q *XattrQuery) {
|
|
q.batch = size
|
|
}
|
|
}
|
|
|
|
// WithXStartIno starts query at speficied inode
|
|
func WithXStartIno(ino uint64) XOption {
|
|
return func(q *XattrQuery) {
|
|
q.next = ino
|
|
}
|
|
}
|
|
|
|
// Next gets the next batch of inodes
|
|
func (q *XattrQuery) Next() ([]uint64, error) {
|
|
name := []byte(q.key)
|
|
query := searchXattrs{
|
|
Next_ino: q.next,
|
|
Last_ino: max64,
|
|
Name_ptr: uint64(uintptr(unsafe.Pointer(&name[0]))),
|
|
Inodes_ptr: uint64(uintptr(unsafe.Pointer(&q.buf[0]))),
|
|
Name_bytes: uint16(len(name)),
|
|
Nr_inodes: q.batch,
|
|
}
|
|
|
|
if q.done {
|
|
return nil, nil
|
|
}
|
|
|
|
n, err := scoutfsctl(q.fsfd, IOCSEARCHXATTRS, unsafe.Pointer(&query))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if query.Output_flags&SEARCHXATTRSOFLAGEND != 0 {
|
|
q.done = true
|
|
}
|
|
|
|
if n == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
rbuf := bytes.NewReader(q.buf)
|
|
inodes := make([]uint64, n)
|
|
|
|
var e uint64
|
|
for i := 0; i < n; i++ {
|
|
err := binary.Read(rbuf, binary.LittleEndian, &e)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
inodes[i] = e
|
|
}
|
|
|
|
q.next = e
|
|
q.next++
|
|
|
|
return inodes, nil
|
|
}
|
|
|
|
// ListXattrHidden holds info for iterating on xattrs
|
|
type ListXattrHidden struct {
|
|
lxr *listXattrHidden
|
|
f *os.File
|
|
buf []byte
|
|
}
|
|
|
|
// NewListXattrHidden will list all scoutfs xattrs (including hidden) for file.
|
|
// If passed in buffer is nil, call will allocate its own buffer.
|
|
func NewListXattrHidden(f *os.File, b []byte) *ListXattrHidden {
|
|
if b == nil {
|
|
b = make([]byte, listattrBufsize)
|
|
}
|
|
return &ListXattrHidden{
|
|
f: f,
|
|
lxr: &listXattrHidden{},
|
|
buf: b,
|
|
}
|
|
}
|
|
|
|
// Next gets next set of results, complete when string slice is nil
|
|
func (l *ListXattrHidden) Next() ([]string, error) {
|
|
l.lxr.Buf_bytes = uint32(len(l.buf))
|
|
l.lxr.Buf_ptr = uint64(uintptr(unsafe.Pointer(&l.buf[0])))
|
|
|
|
n, err := scoutfsctl(l.f, IOCLISTXATTRHIDDEN, unsafe.Pointer(l.lxr))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if n == 0 {
|
|
return nil, nil
|
|
}
|
|
|
|
return bufToStrings(l.buf[:n]), nil
|
|
}
|
|
|
|
func bufToStrings(b []byte) []string {
|
|
var s []string
|
|
for {
|
|
i := bytes.IndexByte(b, byte(0))
|
|
if i == -1 {
|
|
break
|
|
}
|
|
s = append(s, string(b[0:i]))
|
|
b = b[i+1:]
|
|
}
|
|
return s
|
|
}
|
|
|
|
// FSID contains the statfs more info for mounted scoutfs filesystem
|
|
type FSID struct {
|
|
FSID uint64
|
|
RandomID uint64
|
|
ShortID string
|
|
CommittedSeq uint64
|
|
}
|
|
|
|
// GetIDs gets the statfs more filesystem and random id from file handle within
|
|
// scoutfs filesystem
|
|
func GetIDs(f *os.File) (FSID, error) {
|
|
stfs := statfsMore{}
|
|
|
|
_, err := scoutfsctl(f, IOCSTATFSMORE, unsafe.Pointer(&stfs))
|
|
if err != nil {
|
|
return FSID{}, fmt.Errorf("statfs more: %v", err)
|
|
}
|
|
|
|
short := fmt.Sprintf("f.%v.r.%v",
|
|
fmt.Sprintf("%016x", stfs.Fsid)[:][:6], fmt.Sprintf("%016x", stfs.Rid)[:][:6])
|
|
|
|
return FSID{
|
|
FSID: stfs.Fsid,
|
|
RandomID: stfs.Rid,
|
|
ShortID: short,
|
|
CommittedSeq: stfs.Committed_seq,
|
|
}, nil
|
|
}
|
|
|
|
// QuorumInfo holds info for current mount quorum
|
|
type QuorumInfo struct {
|
|
Slot int64
|
|
Term int64
|
|
Role string
|
|
}
|
|
|
|
// IsLeader returns true if quorum status is a leader role
|
|
func (q QuorumInfo) IsLeader() bool {
|
|
return q.Role == "(leader)"
|
|
}
|
|
|
|
// GetQuorumInfo returns quorum info for curren mount
|
|
func GetQuorumInfo(path string) (QuorumInfo, error) {
|
|
f, err := os.Open(path)
|
|
if err != nil {
|
|
return QuorumInfo{}, fmt.Errorf("open: %v", err)
|
|
}
|
|
defer f.Close()
|
|
|
|
id, err := GetIDs(f)
|
|
if err != nil {
|
|
return QuorumInfo{}, fmt.Errorf("error GetIDs: %v", err)
|
|
}
|
|
|
|
sfspath := filepath.Join(sysscoutfs, id.ShortID, statusfile)
|
|
sfs, err := os.Open(sfspath)
|
|
if err != nil {
|
|
return QuorumInfo{}, fmt.Errorf("open %q: %v", sfspath, err)
|
|
}
|
|
defer sfs.Close()
|
|
|
|
qi := QuorumInfo{}
|
|
scanner := bufio.NewScanner(sfs)
|
|
for scanner.Scan() {
|
|
fields := strings.Fields(scanner.Text())
|
|
if len(fields) < 2 {
|
|
return QuorumInfo{}, fmt.Errorf("parse line (%q): %q",
|
|
sfspath, scanner.Text())
|
|
}
|
|
switch fields[0] {
|
|
case "quorum_slot_nr":
|
|
qi.Slot, err = strconv.ParseInt(fields[1], 10, 64)
|
|
if err != nil {
|
|
return QuorumInfo{}, fmt.Errorf("parse quorum_slot_nr %q: %v",
|
|
fields[1], err)
|
|
}
|
|
case "term":
|
|
qi.Term, err = strconv.ParseInt(fields[1], 10, 64)
|
|
if err != nil {
|
|
return QuorumInfo{}, fmt.Errorf("term %q: %v",
|
|
fields[1], err)
|
|
}
|
|
case "role":
|
|
if len(fields) < 3 {
|
|
return QuorumInfo{}, fmt.Errorf("parse line (%q): %q",
|
|
sfspath, scanner.Text())
|
|
}
|
|
qi.Role = fields[2]
|
|
}
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
return QuorumInfo{}, fmt.Errorf("parse %q: %v", sfspath, err)
|
|
}
|
|
|
|
return qi, nil
|
|
}
|
|
|
|
// DiskUsage holds usage information reported by the filesystem
|
|
type DiskUsage struct {
|
|
TotalMetaBlocks uint64
|
|
FreeMetaBlocks uint64
|
|
TotalDataBlocks uint64
|
|
FreeDataBlocks uint64
|
|
}
|
|
|
|
var dfBatchCount uint64 = 4096
|
|
var metaFlag uint8 = 0x1
|
|
|
|
// GetDF returns usage data for the filesystem
|
|
func GetDF(f *os.File) (DiskUsage, error) {
|
|
stfs := statfsMore{}
|
|
|
|
_, err := scoutfsctl(f, IOCSTATFSMORE, unsafe.Pointer(&stfs))
|
|
if err != nil {
|
|
return DiskUsage{}, fmt.Errorf("statfs more: %v", err)
|
|
}
|
|
|
|
nr := dfBatchCount
|
|
buf := make([]byte, int(unsafe.Sizeof(allocDetailEntry{}))*int(nr))
|
|
var ret int
|
|
for {
|
|
ad := allocDetail{
|
|
Nr: nr,
|
|
Ptr: uint64(uintptr(unsafe.Pointer(&buf[0]))),
|
|
}
|
|
ret, err = scoutfsctl(f, IOCALLOCDETAIL, unsafe.Pointer(&ad))
|
|
if err == syscall.EOVERFLOW {
|
|
nr = nr * 2
|
|
buf = make([]byte, int(unsafe.Sizeof(allocDetailEntry{}))*int(nr))
|
|
continue
|
|
}
|
|
if err != nil {
|
|
return DiskUsage{}, fmt.Errorf("alloc detail: %v", err)
|
|
}
|
|
break
|
|
}
|
|
|
|
rbuf := bytes.NewReader(buf)
|
|
var ade allocDetailEntry
|
|
var metaFree, dataFree uint64
|
|
for i := 0; i < ret; i++ {
|
|
err := binary.Read(rbuf, binary.LittleEndian, &ade)
|
|
if err != nil {
|
|
return DiskUsage{}, fmt.Errorf("parse alloc detail results: %v", err)
|
|
}
|
|
if ade.Flags&metaFlag != 0 {
|
|
metaFree += ade.Blocks
|
|
} else {
|
|
dataFree += ade.Blocks
|
|
}
|
|
}
|
|
|
|
return DiskUsage{
|
|
TotalMetaBlocks: stfs.Total_meta_blocks,
|
|
FreeMetaBlocks: metaFree,
|
|
TotalDataBlocks: stfs.Total_data_blocks,
|
|
FreeDataBlocks: dataFree,
|
|
}, nil
|
|
}
|
|
|
|
// MoveData will move all of the extents in "from" file handle
|
|
// and append to the end of "to" file handle.
|
|
// The end of "to" must be 4KB aligned boundary.
|
|
// errors this can return:
|
|
// EINVAL: from_off, len, or to_off aren't a multiple of 4KB; the source
|
|
//
|
|
// and destination files are the same inode; either the source or
|
|
// destination is not a regular file; the destination file has
|
|
// an existing overlapping extent.
|
|
//
|
|
// EOVERFLOW: either from_off + len or to_off + len exceeded 64bits.
|
|
// EBADF: from_fd isn't a valid open file descriptor.
|
|
// EXDEV: the source and destination files are in different filesystems.
|
|
// EISDIR: either the source or destination is a directory.
|
|
// ENODATA: either the source or destination file have offline extents.
|
|
func MoveData(from, to *os.File) error {
|
|
ffi, err := from.Stat()
|
|
if err != nil {
|
|
return fmt.Errorf("stat from: %v", err)
|
|
}
|
|
tfi, err := to.Stat()
|
|
if err != nil {
|
|
return fmt.Errorf("stat to: %v", err)
|
|
}
|
|
|
|
mb := moveBlocks{
|
|
From_fd: uint64(from.Fd()),
|
|
From_off: 0,
|
|
Len: uint64(ffi.Size()),
|
|
To_off: uint64(tfi.Size()),
|
|
}
|
|
|
|
_, err = scoutfsctl(to, IOCMOVEBLOCKS, unsafe.Pointer(&mb))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
from.Truncate(0)
|
|
from.Seek(0, io.SeekStart)
|
|
|
|
return nil
|
|
}
|
|
|
|
// StageMove will move all of the extents in "from" file handle
|
|
// and stage the offline extents at offset "offset" in "to" file handle.
|
|
// The size of from and offset of "to" must be 4KB aligned boundary.
|
|
// errors this can return:
|
|
// EINVAL: from_off, len, or to_off aren't a multiple of 4KB; the source
|
|
//
|
|
// and destination files are the same inode; either the source or
|
|
// destination is not a regular file; the destination file has
|
|
// an existing overlapping extent.
|
|
//
|
|
// EOVERFLOW: either from_off + len or to_off + len exceeded 64bits.
|
|
// EBADF: from_fd isn't a valid open file descriptor.
|
|
// EXDEV: the source and destination files are in different filesystems.
|
|
// EISDIR: either the source or destination is a directory.
|
|
// ENODATA: either the source or destination file have offline extents.
|
|
func StageMove(from, to *os.File, offset, version uint64) error {
|
|
ffi, err := from.Stat()
|
|
if err != nil {
|
|
return fmt.Errorf("stat from: %v", err)
|
|
}
|
|
|
|
mb := moveBlocks{
|
|
From_fd: uint64(from.Fd()),
|
|
From_off: 0,
|
|
Len: uint64(ffi.Size()),
|
|
To_off: offset,
|
|
Data_version: version,
|
|
Flags: MBSTAGEFLG,
|
|
}
|
|
|
|
_, err = scoutfsctl(to, IOCMOVEBLOCKS, unsafe.Pointer(&mb))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
from.Truncate(0)
|
|
from.Seek(0, io.SeekStart)
|
|
|
|
return nil
|
|
}
|
|
|
|
// StageMoveAt will move the extents (based on len) in "from" file handle at
|
|
// given offset to the "to" file handle at given offset.
|
|
// All offsets must be 4KB aligned boundary.
|
|
// All destination offsets must be offline extents.
|
|
// EINVAL: from_off, len, or to_off aren't a multiple of 4KB; the source
|
|
//
|
|
// and destination files are the same inode; either the source or
|
|
// destination is not a regular file; the destination file has
|
|
// an existing overlapping extent.
|
|
//
|
|
// EOVERFLOW: either from_off + len or to_off + len exceeded 64bits.
|
|
// EBADF: from_fd isn't a valid open file descriptor.
|
|
// EXDEV: the source and destination files are in different filesystems.
|
|
// EISDIR: either the source or destination is a directory.
|
|
// ENODATA: either the source or destination file have offline extents.
|
|
func StageMoveAt(from, to *os.File, len, fromOffset, toOffset, version uint64) error {
|
|
mb := moveBlocks{
|
|
From_fd: uint64(from.Fd()),
|
|
From_off: fromOffset,
|
|
Len: len,
|
|
To_off: toOffset,
|
|
Data_version: version,
|
|
Flags: MBSTAGEFLG,
|
|
}
|
|
|
|
_, err := scoutfsctl(to, IOCMOVEBLOCKS, unsafe.Pointer(&mb))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// XattrTotal has the total values matching id triple
|
|
type XattrTotal struct {
|
|
// Total is sum of all xattr values matching ids
|
|
Total uint64
|
|
// Count is number of xattrs matching ids
|
|
Count uint64
|
|
// ID is the id for this total
|
|
ID [3]uint64
|
|
}
|
|
|
|
// ReadXattrTotals returns the XattrTotal for the given id
|
|
func ReadXattrTotals(f *os.File, id1, id2, id3 uint64) (XattrTotal, error) {
|
|
totls := make([]xattrTotal, 1)
|
|
|
|
query := readXattrTotals{
|
|
Pos_name: [3]uint64{id1, id2, id3},
|
|
Totals_ptr: uint64(uintptr(unsafe.Pointer(&totls[0]))),
|
|
Totals_bytes: sizeofxattrTotal,
|
|
}
|
|
|
|
n, err := scoutfsctl(f, IOCREADXATTRTOTALS, unsafe.Pointer(&query))
|
|
if err != nil {
|
|
return XattrTotal{}, err
|
|
}
|
|
if n == 0 ||
|
|
totls[0].Name[0] != id1 ||
|
|
totls[0].Name[1] != id2 ||
|
|
totls[0].Name[2] != id3 {
|
|
return XattrTotal{}, nil
|
|
}
|
|
|
|
return XattrTotal{
|
|
Total: totls[0].Total,
|
|
Count: totls[0].Count,
|
|
}, nil
|
|
}
|
|
|
|
type TotalsGroup struct {
|
|
totls []xattrTotal
|
|
pos [3]uint64
|
|
id1 uint64
|
|
id2 uint64
|
|
count int
|
|
f *os.File
|
|
done bool
|
|
}
|
|
|
|
// NewTotalsGroup creates a query to get the totals values for a defined
|
|
// group of totls (group is defined to match first 2 identifiers). Count
|
|
// specifies max number returned for each Next() call.
|
|
func NewTotalsGroup(f *os.File, id1, id2 uint64, count int) *TotalsGroup {
|
|
totls := make([]xattrTotal, count)
|
|
|
|
return &TotalsGroup{
|
|
totls: totls,
|
|
f: f,
|
|
id1: id1,
|
|
id2: id2,
|
|
count: count,
|
|
pos: [3]uint64{id1, id2, 0},
|
|
}
|
|
}
|
|
|
|
// Next returns next set of total values for the group
|
|
func (t *TotalsGroup) Next() ([]XattrTotal, error) {
|
|
if t.done {
|
|
return nil, nil
|
|
}
|
|
|
|
query := readXattrTotals{
|
|
Pos_name: t.pos,
|
|
Totals_ptr: uint64(uintptr(unsafe.Pointer(&t.totls[0]))),
|
|
Totals_bytes: sizeofxattrTotal * uint64(t.count),
|
|
}
|
|
|
|
n, err := scoutfsctl(t.f, IOCREADXATTRTOTALS, unsafe.Pointer(&query))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if n == 0 {
|
|
t.done = true
|
|
return nil, nil
|
|
}
|
|
|
|
t.pos = t.totls[n-1].Name
|
|
if t.pos[2] == math.MaxUint64 {
|
|
t.done = true
|
|
}
|
|
t.pos[2]++
|
|
|
|
ret := make([]XattrTotal, n)
|
|
for i := range ret {
|
|
if t.totls[i].Name[0] != t.id1 || t.totls[i].Name[1] != t.id2 {
|
|
t.done = true
|
|
// id sequence we want is done
|
|
return ret[:i], nil
|
|
}
|
|
ret[i].Count = t.totls[i].Count
|
|
ret[i].Total = t.totls[i].Total
|
|
ret[i].ID = t.totls[i].Name
|
|
}
|
|
return ret, nil
|
|
}
|
|
|
|
// Reset resets the totl query to the start of the group id again
|
|
func (t *TotalsGroup) Reset() {
|
|
t.done = false
|
|
t.pos[0] = t.id1
|
|
t.pos[1] = t.id2
|
|
t.pos[2] = 0
|
|
}
|
|
|
|
// Parent contains inode of parent and what the child inode is named within
|
|
// this parent
|
|
type Parent struct {
|
|
Ino uint64 // Parent inode
|
|
Pos uint64 // Entry directory position in parent
|
|
Type uint8 // Entry inode type matching DT_ enum values in readdir(3)
|
|
Ent string // Entry name as known by parent
|
|
}
|
|
|
|
// GetParents returns all parents for the given inode
|
|
// An open file within scoutfs is supplied for ioctls
|
|
// (usually just the base mount point directory)
|
|
// If passed in buffer is nil, call will allocate its own buffer.
|
|
func GetParents(dirfd *os.File, ino uint64, b []byte) ([]Parent, error) {
|
|
if b == nil {
|
|
b = make([]byte, getparentBufsize)
|
|
}
|
|
|
|
gre := getReferringEntries{}
|
|
|
|
gre.Entries_bytes = uint64(len(b))
|
|
gre.Entries_ptr = uint64(uintptr(unsafe.Pointer(&b[0])))
|
|
gre.Ino = ino
|
|
|
|
var parents []Parent
|
|
|
|
for {
|
|
n, err := scoutfsctl(dirfd, IOCGETREFERRINGENTRIES, unsafe.Pointer(&gre))
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if n == 0 {
|
|
break
|
|
}
|
|
|
|
ents, isLast, err := parseDents(b)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
parents = append(parents, ents...)
|
|
if isLast {
|
|
break
|
|
}
|
|
}
|
|
|
|
return parents, nil
|
|
}
|
|
|
|
func parseDents(b []byte) ([]Parent, bool, error) {
|
|
r := bytes.NewReader(b)
|
|
var parents []Parent
|
|
var isLast bool
|
|
for {
|
|
var err error
|
|
var parent Parent
|
|
parent, isLast, err = parseDent(r)
|
|
if errors.Is(err, io.EOF) {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return nil, false, err
|
|
}
|
|
parents = append(parents, parent)
|
|
if isLast {
|
|
break
|
|
}
|
|
if r.Len() == 0 {
|
|
break
|
|
}
|
|
}
|
|
return parents, isLast, nil
|
|
}
|
|
|
|
type dirent struct {
|
|
Dir_ino uint64
|
|
Dir_pos uint64
|
|
Ino uint64
|
|
Entry_bytes uint16
|
|
Flags uint8
|
|
D_type uint8
|
|
Name_len uint8
|
|
}
|
|
|
|
func parseDent(r *bytes.Reader) (Parent, bool, error) {
|
|
var dent dirent
|
|
err := binary.Read(r, binary.LittleEndian, &dent)
|
|
if err != nil {
|
|
return Parent{}, false, err
|
|
}
|
|
|
|
b := new(strings.Builder)
|
|
_, err = io.CopyN(b, r, int64(dent.Name_len))
|
|
if err != nil {
|
|
return Parent{}, false, err
|
|
}
|
|
|
|
pad := int(dent.Entry_bytes) - int(unsafe.Sizeof(dent)) + int(dent.Name_len)
|
|
for i := 0; i < pad; i++ {
|
|
_, err = r.ReadByte()
|
|
if err != nil {
|
|
return Parent{}, false, err
|
|
}
|
|
}
|
|
|
|
return Parent{
|
|
Ino: dent.Dir_ino,
|
|
Pos: dent.Dir_pos,
|
|
Type: dent.D_type,
|
|
Ent: b.String(),
|
|
}, dent.Flags&DIRENTFLAGLAST == DIRENTFLAGLAST, nil
|
|
}
|