feat: Implement transparent compression based on GZip

This commit is contained in:
Felicitas Pojtinger
2021-11-30 17:51:09 +01:00
parent 8f93dcfef9
commit 82621af612
7 changed files with 196 additions and 25 deletions

View File

@@ -2,15 +2,19 @@ package cmd
import (
"archive/tar"
"compress/gzip"
"context"
"io"
"io/fs"
"os"
"path/filepath"
"strconv"
"github.com/pojntfx/stfs/pkg/adapters"
"github.com/pojntfx/stfs/pkg/controllers"
"github.com/pojntfx/stfs/pkg/counters"
"github.com/pojntfx/stfs/pkg/formatting"
"github.com/pojntfx/stfs/pkg/pax"
"github.com/pojntfx/stfs/pkg/persisters"
"github.com/spf13/cobra"
"github.com/spf13/viper"
@@ -58,6 +62,7 @@ var archiveCmd = &cobra.Command{
viper.GetInt(recordSizeFlag),
viper.GetString(srcFlag),
viper.GetBool(overwriteFlag),
viper.GetString(compressionFlag),
); err != nil {
return err
}
@@ -69,6 +74,7 @@ var archiveCmd = &cobra.Command{
int(lastIndexedRecord),
int(lastIndexedBlock),
viper.GetBool(overwriteFlag),
viper.GetString(compressionFlag),
)
},
}
@@ -78,6 +84,7 @@ func archive(
recordSize int,
src string,
overwrite bool,
compressionFormat string,
) error {
dirty := false
tw, isRegular, cleanup, err := openTapeWriter(tape)
@@ -162,6 +169,47 @@ func archive(
hdr.Name = path
hdr.Format = tar.FormatPAX
if info.Mode().IsRegular() {
switch compressionFormat {
case compressionFormatGZipKey:
// Get the compressed size for the header
file, err := os.Open(path)
if err != nil {
return err
}
fileSizeCounter := counters.CounterWriter{
Writer: io.Discard,
}
gz := gzip.NewWriter(&fileSizeCounter)
if _, err := io.Copy(gz, file); err != nil {
return err
}
if err := gz.Flush(); err != nil {
return err
}
if err := gz.Close(); err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
if hdr.PAXRecords == nil {
hdr.PAXRecords = map[string]string{}
}
hdr.PAXRecords[pax.STFSRecordUncompressedSize] = strconv.Itoa(int(hdr.Size))
hdr.Size = int64(fileSizeCounter.BytesRead)
hdr.Name += compressionFormatGZipSuffix
case compressionFormatNoneKey:
default:
return errUnsupportedCompressionFormat
}
}
if first {
if err := formatting.PrintCSV(formatting.TARHeaderCSV); err != nil {
return err
@@ -182,21 +230,59 @@ func archive(
return nil
}
file, err := os.Open(path)
if err != nil {
return err
}
defer file.Close()
switch compressionFormat {
case compressionFormatGZipKey:
// Compress and write the file
file, err := os.Open(path)
if err != nil {
return err
}
if isRegular {
if _, err := io.Copy(tw, file); err != nil {
gz := gzip.NewWriter(tw)
if isRegular {
if _, err := io.Copy(gz, file); err != nil {
return err
}
} else {
buf := make([]byte, controllers.BlockSize*recordSize)
if _, err := io.CopyBuffer(gz, file, buf); err != nil {
return err
}
}
if err := gz.Flush(); err != nil {
return err
}
} else {
buf := make([]byte, controllers.BlockSize*recordSize)
if _, err := io.CopyBuffer(tw, file, buf); err != nil {
if err := gz.Close(); err != nil {
return err
}
if err := file.Close(); err != nil {
return err
}
case compressionFormatNoneKey:
// Write the file
file, err := os.Open(path)
if err != nil {
return err
}
if isRegular {
if _, err := io.Copy(tw, file); err != nil {
return err
}
} else {
buf := make([]byte, controllers.BlockSize*recordSize)
if _, err := io.CopyBuffer(tw, file, buf); err != nil {
return err
}
}
if err := file.Close(); err != nil {
return err
}
default:
return errUnsupportedCompressionFormat
}
dirty = true

View File

@@ -3,6 +3,7 @@ package cmd
import (
"archive/tar"
"bufio"
"compress/gzip"
"io"
"os"
"path/filepath"
@@ -41,6 +42,7 @@ var recoveryFetchCmd = &cobra.Command{
viper.GetString(dstFlag),
viper.GetBool(previewFlag),
true,
viper.GetString(compressionFlag),
)
},
}
@@ -53,6 +55,7 @@ func restoreFromRecordAndBlock(
dst string,
preview bool,
showHeader bool,
compressionFormat string,
) error {
f, isRegular, err := openTapeReadOnly(tape)
if err != nil {
@@ -116,8 +119,32 @@ func restoreFromRecordAndBlock(
return err
}
if _, err := io.Copy(dstFile, tr); err != nil {
return err
// Don't decompress non-regular files
if !hdr.FileInfo().Mode().IsRegular() {
if _, err := io.Copy(dstFile, tr); err != nil {
return err
}
return nil
}
switch compressionFormat {
case compressionFormatGZipKey:
gz, err := gzip.NewReader(tr)
if err != nil {
return err
}
defer gz.Close()
if _, err := io.Copy(dstFile, gz); err != nil {
return err
}
case compressionFormatNoneKey:
if _, err := io.Copy(dstFile, tr); err != nil {
return err
}
default:
return errUnsupportedCompressionFormat
}
}

View File

@@ -8,6 +8,8 @@ import (
"io/ioutil"
"math"
"os"
"strconv"
"strings"
"github.com/pojntfx/stfs/pkg/controllers"
"github.com/pojntfx/stfs/pkg/converters"
@@ -40,6 +42,7 @@ var recoveryIndexCmd = &cobra.Command{
viper.GetInt(recordFlag),
viper.GetInt(blockFlag),
viper.GetBool(overwriteFlag),
viper.GetString(compressionFlag),
)
},
}
@@ -51,6 +54,7 @@ func index(
record int,
block int,
overwrite bool,
compressionFormat string,
) error {
if overwrite {
f, err := os.OpenFile(metadata, os.O_WRONLY|os.O_CREATE, 0600)
@@ -131,7 +135,7 @@ func index(
}
}
if err := indexHeader(record, block, hdr, metadataPersister); err != nil {
if err := indexHeader(record, block, hdr, metadataPersister, compressionFormat); err != nil {
return nil
}
@@ -204,7 +208,7 @@ func index(
}
}
if err := indexHeader(record, block, hdr, metadataPersister); err != nil {
if err := indexHeader(record, block, hdr, metadataPersister, compressionFormat); err != nil {
return nil
}
@@ -241,13 +245,38 @@ func init() {
recoveryCmd.AddCommand(recoveryIndexCmd)
}
func indexHeader(record, block int64, hdr *tar.Header, metadataPersister *persisters.MetadataPersister) error {
func indexHeader(
record, block int64,
hdr *tar.Header,
metadataPersister *persisters.MetadataPersister,
compressionFormat string,
) error {
if record == 0 && block == 0 {
if err := formatting.PrintCSV(formatting.TARHeaderCSV); err != nil {
return err
}
}
uncompressedSize, ok := hdr.PAXRecords[pax.STFSRecordUncompressedSize]
if ok {
size, err := strconv.Atoi(uncompressedSize)
if err != nil {
return err
}
hdr.Size = int64(size)
}
switch compressionFormat {
case compressionFormatGZipKey:
if hdr.FileInfo().Mode().IsRegular() {
hdr.Name = strings.TrimSuffix(hdr.Name, compressionFormatGZipSuffix)
}
case compressionFormatNoneKey:
default:
return errUnsupportedCompressionFormat
}
if err := formatting.PrintCSV(formatting.GetTARHeaderAsCSV(record, block, hdr)); err != nil {
return err
}

View File

@@ -103,6 +103,7 @@ var restoreCmd = &cobra.Command{
dst,
false,
false,
viper.GetString(compressionFlag),
); err != nil {
return err
}

View File

@@ -1,6 +1,7 @@
package cmd
import (
"errors"
"os"
"path/filepath"
"strings"
@@ -10,9 +11,21 @@ import (
)
const (
tapeFlag = "tape"
metadataFlag = "metadata"
verboseFlag = "verbose"
tapeFlag = "tape"
metadataFlag = "metadata"
verboseFlag = "verbose"
compressionFlag = "compression"
compressionFormatNoneKey = ""
compressionFormatGZipKey = "gzip"
compressionFormatGZipSuffix = ".gz"
)
var (
knownCompressionFormats = []string{compressionFormatNoneKey, compressionFormatGZipKey}
errUnknownCompressionFormat = errors.New("unknown compression format")
errUnsupportedCompressionFormat = errors.New("unsupported compression format")
)
var rootCmd = &cobra.Command{
@@ -22,9 +35,24 @@ var rootCmd = &cobra.Command{
Find more information at:
https://github.com/pojntfx/stfs`,
PersistentPreRun: func(cmd *cobra.Command, args []string) {
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
viper.SetEnvPrefix("stbak")
viper.SetEnvKeyReplacer(strings.NewReplacer("-", "_", ".", "_"))
compressionIsKnown := false
chosenCompression := viper.GetString(compressionFlag)
for _, candidate := range knownCompressionFormats {
if chosenCompression == candidate {
compressionIsKnown = true
}
}
if !compressionIsKnown {
return errUnknownCompressionFormat
}
return nil
},
}
@@ -39,6 +67,7 @@ func Execute() {
rootCmd.PersistentFlags().StringP(tapeFlag, "t", "/dev/nst0", "Tape or tar file to use")
rootCmd.PersistentFlags().StringP(metadataFlag, "m", metadataPath, "Metadata database to use")
rootCmd.PersistentFlags().BoolP(verboseFlag, "v", false, "Enable verbose logging")
rootCmd.PersistentFlags().StringP(compressionFlag, "c", "", "Compression format to use (default none, available are none, gzip)")
if err := viper.BindPFlags(rootCmd.PersistentFlags()); err != nil {
panic(err)

View File

@@ -18,10 +18,6 @@ import (
"github.com/volatiletech/sqlboiler/v4/boil"
)
const (
contentFlag = "content"
)
var updateCmd = &cobra.Command{
Use: "update",
Aliases: []string{"upd", "u"},
@@ -49,7 +45,7 @@ var updateCmd = &cobra.Command{
viper.GetString(tapeFlag),
viper.GetInt(recordSizeFlag),
viper.GetString(srcFlag),
viper.GetBool(contentFlag),
viper.GetBool(overwriteFlag),
); err != nil {
return err
}
@@ -61,6 +57,7 @@ var updateCmd = &cobra.Command{
int(lastIndexedRecord),
int(lastIndexedBlock),
false,
viper.GetString(compressionFlag),
)
},
}
@@ -168,7 +165,7 @@ func update(
func init() {
updateCmd.PersistentFlags().IntP(recordSizeFlag, "e", 20, "Amount of 512-bit blocks per record")
updateCmd.PersistentFlags().StringP(srcFlag, "s", "", "Path of the file or directory to update")
updateCmd.PersistentFlags().BoolP(contentFlag, "c", false, "Replace the content on the tape or tar file")
updateCmd.PersistentFlags().BoolP(overwriteFlag, "o", false, "Replace the content on the tape or tar file")
viper.AutomaticEnv()

View File

@@ -16,6 +16,8 @@ const (
STFSRecordReplacesContentFalse = "false"
STFSRecordReplacesName = "STFS.ReplacesName"
STFSRecordUncompressedSize = "STFS.UncompressedSize"
)
var (