Use SEEK_HOLE for hole detection
Based on patch by Pavel Raiskup. Use SEEK_HOLE/SEEK_DATA feature of lseek on systems that support it. This can make archiving of sparse files much faster. Implement the --hole-detection option to allow users to select hole-detection method. * src/common.h (hole_detection_method): New enum. (hole_detection): New global. * src/sparse.c (sparse_scan_file_wholesparse): New function as a method for detecting sparse files without any data. (sparse_scan_file_raw): Rename from sparse_scan_file; with edits. (sparse_scan_file_seek): New function. (sparse_scan_file): Reimplement function. * src/tar.c: New option --hole-detection * tests/checkseekhole.c: New file. * tests/.gitignore: Mention two test binaries. * tests/Makefile.am: Add new tests. * tests/testsuite.at (AT_SEEKHOLE_PREREQ): New macro. Include sparse06.at. * tests/sparse06.at: New test case. * tests/sparse02.at: Force raw hole-detection method. * tests/sparsemv.at: Likewise. * tests/sparsemvp.at: Likewise. * doc/tar.1: Document --hole-detection option. * doc/tar.texi: Document hole-detection algorithms and command-line options. * NEWS: Document hole-detection.
This commit is contained in:
@@ -280,6 +280,15 @@ GLOBAL bool sparse_option;
|
||||
GLOBAL unsigned tar_sparse_major;
|
||||
GLOBAL unsigned tar_sparse_minor;
|
||||
|
||||
enum hole_detection_method
|
||||
{
|
||||
HOLE_DETECTION_DEFAULT,
|
||||
HOLE_DETECTION_RAW,
|
||||
HOLE_DETECTION_SEEK
|
||||
};
|
||||
|
||||
GLOBAL enum hole_detection_method hole_detection;
|
||||
|
||||
GLOBAL bool starting_file_option;
|
||||
|
||||
/* Specified maximum byte length of each tape volume (multiple of 1024). */
|
||||
|
||||
173
src/sparse.c
173
src/sparse.c
@@ -1,6 +1,6 @@
|
||||
/* Functions for dealing with sparse files
|
||||
|
||||
Copyright 2003-2007, 2010, 2013-2014 Free Software Foundation, Inc.
|
||||
Copyright 2003-2007, 2010, 2013-2015 Free Software Foundation, Inc.
|
||||
|
||||
This program is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU General Public License as published by the
|
||||
@@ -208,9 +208,9 @@ sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp)
|
||||
st->sparse_map_avail = avail + 1;
|
||||
}
|
||||
|
||||
/* Scan the sparse file and create its map */
|
||||
/* Scan the sparse file byte-by-byte and create its map. */
|
||||
static bool
|
||||
sparse_scan_file (struct tar_sparse_file *file)
|
||||
sparse_scan_file_raw (struct tar_sparse_file *file)
|
||||
{
|
||||
struct tar_stat_info *st = file->stat_info;
|
||||
int fd = file->fd;
|
||||
@@ -221,41 +221,38 @@ sparse_scan_file (struct tar_sparse_file *file)
|
||||
|
||||
st->archive_file_size = 0;
|
||||
|
||||
if (ST_NBLOCKS (st->stat) == 0)
|
||||
offset = st->stat.st_size;
|
||||
else
|
||||
if (!tar_sparse_scan (file, scan_begin, NULL))
|
||||
return false;
|
||||
|
||||
while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0
|
||||
&& count != SAFE_READ_ERROR)
|
||||
{
|
||||
if (!tar_sparse_scan (file, scan_begin, NULL))
|
||||
return false;
|
||||
/* Analyze the block. */
|
||||
if (zero_block_p (buffer, count))
|
||||
{
|
||||
if (sp.numbytes)
|
||||
{
|
||||
sparse_add_map (st, &sp);
|
||||
sp.numbytes = 0;
|
||||
if (!tar_sparse_scan (file, scan_block, NULL))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (sp.numbytes == 0)
|
||||
sp.offset = offset;
|
||||
sp.numbytes += count;
|
||||
st->archive_file_size += count;
|
||||
if (!tar_sparse_scan (file, scan_block, buffer))
|
||||
return false;
|
||||
}
|
||||
|
||||
while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0
|
||||
&& count != SAFE_READ_ERROR)
|
||||
{
|
||||
/* Analyze the block. */
|
||||
if (zero_block_p (buffer, count))
|
||||
{
|
||||
if (sp.numbytes)
|
||||
{
|
||||
sparse_add_map (st, &sp);
|
||||
sp.numbytes = 0;
|
||||
if (!tar_sparse_scan (file, scan_block, NULL))
|
||||
return false;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if (sp.numbytes == 0)
|
||||
sp.offset = offset;
|
||||
sp.numbytes += count;
|
||||
st->archive_file_size += count;
|
||||
if (!tar_sparse_scan (file, scan_block, buffer))
|
||||
return false;
|
||||
}
|
||||
|
||||
offset += count;
|
||||
}
|
||||
offset += count;
|
||||
}
|
||||
|
||||
/* save one more sparse segment of length 0 to indicate that
|
||||
the file ends with a hole */
|
||||
if (sp.numbytes == 0)
|
||||
sp.offset = offset;
|
||||
|
||||
@@ -264,6 +261,114 @@ sparse_scan_file (struct tar_sparse_file *file)
|
||||
return tar_sparse_scan (file, scan_end, NULL);
|
||||
}
|
||||
|
||||
static bool
|
||||
sparse_scan_file_wholesparse (struct tar_sparse_file *file)
|
||||
{
|
||||
struct tar_stat_info *st = file->stat_info;
|
||||
struct sp_array sp = {0, 0};
|
||||
|
||||
/* Note that this function is called only for truly sparse files of size >= 1
|
||||
block size (checked via ST_IS_SPARSE before). See the thread
|
||||
http://www.mail-archive.com/bug-tar@gnu.org/msg04209.html for more info */
|
||||
if (ST_NBLOCKS (st->stat) == 0)
|
||||
{
|
||||
st->archive_file_size = 0;
|
||||
sp.offset = st->stat.st_size;
|
||||
sparse_add_map (st, &sp);
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#ifdef SEEK_HOLE
|
||||
/* Try to engage SEEK_HOLE/SEEK_DATA feature. */
|
||||
static bool
|
||||
sparse_scan_file_seek (struct tar_sparse_file *file)
|
||||
{
|
||||
struct tar_stat_info *st = file->stat_info;
|
||||
int fd = file->fd;
|
||||
struct sp_array sp = {0, 0};
|
||||
off_t offset = 0;
|
||||
off_t data_offset;
|
||||
off_t hole_offset;
|
||||
|
||||
st->archive_file_size = 0;
|
||||
|
||||
for (;;)
|
||||
{
|
||||
/* locate first chunk of data */
|
||||
data_offset = lseek (fd, offset, SEEK_DATA);
|
||||
|
||||
if (data_offset == (off_t)-1)
|
||||
/* ENXIO == EOF; error otherwise */
|
||||
{
|
||||
if (errno == ENXIO)
|
||||
{
|
||||
/* file ends with hole, add one more empty chunk of data */
|
||||
sp.numbytes = 0;
|
||||
sp.offset = st->stat.st_size;
|
||||
sparse_add_map (st, &sp);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
hole_offset = lseek (fd, data_offset, SEEK_HOLE);
|
||||
|
||||
/* according to specs, if FS does not fully support
|
||||
SEEK_DATA/SEEK_HOLE it may just implement kind of "wrapper" around
|
||||
classic lseek() call. We must detect it here and try to use other
|
||||
hole-detection methods. */
|
||||
if (offset == 0 /* first loop */
|
||||
&& data_offset == 0
|
||||
&& hole_offset == st->stat.st_size)
|
||||
{
|
||||
lseek (fd, 0, SEEK_SET);
|
||||
return false;
|
||||
}
|
||||
|
||||
sp.offset = data_offset;
|
||||
sp.numbytes = hole_offset - data_offset;
|
||||
sparse_add_map (st, &sp);
|
||||
|
||||
st->archive_file_size += sp.numbytes;
|
||||
offset = hole_offset;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
static bool
|
||||
sparse_scan_file (struct tar_sparse_file *file)
|
||||
{
|
||||
/* always check for completely sparse files */
|
||||
if (sparse_scan_file_wholesparse (file))
|
||||
return true;
|
||||
|
||||
switch (hole_detection)
|
||||
{
|
||||
case HOLE_DETECTION_DEFAULT:
|
||||
case HOLE_DETECTION_SEEK:
|
||||
#ifdef SEEK_HOLE
|
||||
if (sparse_scan_file_seek (file))
|
||||
return true;
|
||||
#else
|
||||
if (hole_detection == HOLE_DETECTION_SEEK)
|
||||
WARN((0, 0,
|
||||
_("\"seek\" hole detection is not supported, using \"raw\".")));
|
||||
/* fall back to "raw" for this and all other files */
|
||||
hole_detection = HOLE_DETECTION_RAW;
|
||||
#endif
|
||||
case HOLE_DETECTION_RAW:
|
||||
if (sparse_scan_file_raw (file))
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static struct tar_sparse_optab const oldgnu_optab;
|
||||
static struct tar_sparse_optab const star_optab;
|
||||
static struct tar_sparse_optab const pax_optab;
|
||||
|
||||
23
src/tar.c
23
src/tar.c
@@ -362,6 +362,7 @@ enum
|
||||
SHOW_TRANSFORMED_NAMES_OPTION,
|
||||
SKIP_OLD_FILES_OPTION,
|
||||
SORT_OPTION,
|
||||
HOLE_DETECTION_OPTION,
|
||||
SPARSE_VERSION_OPTION,
|
||||
STRIP_COMPONENTS_OPTION,
|
||||
SUFFIX_OPTION,
|
||||
@@ -451,6 +452,8 @@ static struct argp_option options[] = {
|
||||
|
||||
{"sparse", 'S', 0, 0,
|
||||
N_("handle sparse files efficiently"), GRID+1 },
|
||||
{"hole-detection", HOLE_DETECTION_OPTION, N_("TYPE"), 0,
|
||||
N_("technique to detect holes"), GRID+1 },
|
||||
{"sparse-version", SPARSE_VERSION_OPTION, N_("MAJOR[.MINOR]"), 0,
|
||||
N_("set version of the sparse format to use (implies --sparse)"), GRID+1},
|
||||
{"incremental", 'G', 0, 0,
|
||||
@@ -1464,6 +1467,19 @@ static int sort_mode_flag[] = {
|
||||
};
|
||||
|
||||
ARGMATCH_VERIFY (sort_mode_arg, sort_mode_flag);
|
||||
|
||||
static char const *const hole_detection_args[] =
|
||||
{
|
||||
"raw", "seek", NULL
|
||||
};
|
||||
|
||||
static int const hole_detection_types[] =
|
||||
{
|
||||
HOLE_DETECTION_RAW, HOLE_DETECTION_SEEK
|
||||
};
|
||||
|
||||
ARGMATCH_VERIFY (hole_detection_args, hole_detection_types);
|
||||
|
||||
|
||||
static void
|
||||
set_old_files_option (int code, struct option_locus *loc)
|
||||
@@ -1753,6 +1769,12 @@ parse_opt (int key, char *arg, struct argp_state *state)
|
||||
set_old_files_option (SKIP_OLD_FILES, args->loc);
|
||||
break;
|
||||
|
||||
case HOLE_DETECTION_OPTION:
|
||||
hole_detection = XARGMATCH ("--hole-detection", arg,
|
||||
hole_detection_args, hole_detection_types);
|
||||
sparse_option = true;
|
||||
break;
|
||||
|
||||
case SPARSE_VERSION_OPTION:
|
||||
sparse_option = true;
|
||||
{
|
||||
@@ -2523,6 +2545,7 @@ decode_options (int argc, char **argv)
|
||||
blocking_factor = DEFAULT_BLOCKING;
|
||||
record_size = DEFAULT_BLOCKING * BLOCKSIZE;
|
||||
excluded = new_exclude ();
|
||||
hole_detection = HOLE_DETECTION_DEFAULT;
|
||||
|
||||
newer_mtime_option.tv_sec = TYPE_MINIMUM (time_t);
|
||||
newer_mtime_option.tv_nsec = -1;
|
||||
|
||||
Reference in New Issue
Block a user