Use SEEK_HOLE for hole detection

Based on patch by Pavel Raiskup.

Use SEEK_HOLE/SEEK_DATA feature of lseek on systems that support
it.  This can make archiving of sparse files much faster.

Implement the --hole-detection option to allow users to select
hole-detection method.

* src/common.h (hole_detection_method): New enum.
(hole_detection): New global.
* src/sparse.c  (sparse_scan_file_wholesparse): New function as a
method for detecting sparse files without any data.
(sparse_scan_file_raw): Rename from sparse_scan_file; with edits.
(sparse_scan_file_seek): New function.
(sparse_scan_file): Reimplement function.
* src/tar.c: New option --hole-detection

* tests/checkseekhole.c: New file.
* tests/.gitignore: Mention two test binaries.
* tests/Makefile.am: Add new tests.
* tests/testsuite.at (AT_SEEKHOLE_PREREQ): New macro.
Include sparse06.at.
* tests/sparse06.at: New test case.
* tests/sparse02.at: Force raw hole-detection method.
* tests/sparsemv.at: Likewise.
* tests/sparsemvp.at: Likewise.

* doc/tar.1: Document --hole-detection option.
* doc/tar.texi: Document hole-detection algorithms and
command-line options.
* NEWS: Document hole-detection.
This commit is contained in:
Sergey Poznyakoff
2015-12-05 23:36:22 +02:00
parent 589ba77faf
commit b684326e69
14 changed files with 425 additions and 78 deletions

View File

@@ -1,6 +1,6 @@
/* Functions for dealing with sparse files
Copyright 2003-2007, 2010, 2013-2014 Free Software Foundation, Inc.
Copyright 2003-2007, 2010, 2013-2015 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -208,9 +208,9 @@ sparse_add_map (struct tar_stat_info *st, struct sp_array const *sp)
st->sparse_map_avail = avail + 1;
}
/* Scan the sparse file and create its map */
/* Scan the sparse file byte-by-byte and create its map. */
static bool
sparse_scan_file (struct tar_sparse_file *file)
sparse_scan_file_raw (struct tar_sparse_file *file)
{
struct tar_stat_info *st = file->stat_info;
int fd = file->fd;
@@ -221,41 +221,38 @@ sparse_scan_file (struct tar_sparse_file *file)
st->archive_file_size = 0;
if (ST_NBLOCKS (st->stat) == 0)
offset = st->stat.st_size;
else
if (!tar_sparse_scan (file, scan_begin, NULL))
return false;
while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0
&& count != SAFE_READ_ERROR)
{
if (!tar_sparse_scan (file, scan_begin, NULL))
return false;
/* Analyze the block. */
if (zero_block_p (buffer, count))
{
if (sp.numbytes)
{
sparse_add_map (st, &sp);
sp.numbytes = 0;
if (!tar_sparse_scan (file, scan_block, NULL))
return false;
}
}
else
{
if (sp.numbytes == 0)
sp.offset = offset;
sp.numbytes += count;
st->archive_file_size += count;
if (!tar_sparse_scan (file, scan_block, buffer))
return false;
}
while ((count = blocking_read (fd, buffer, sizeof buffer)) != 0
&& count != SAFE_READ_ERROR)
{
/* Analyze the block. */
if (zero_block_p (buffer, count))
{
if (sp.numbytes)
{
sparse_add_map (st, &sp);
sp.numbytes = 0;
if (!tar_sparse_scan (file, scan_block, NULL))
return false;
}
}
else
{
if (sp.numbytes == 0)
sp.offset = offset;
sp.numbytes += count;
st->archive_file_size += count;
if (!tar_sparse_scan (file, scan_block, buffer))
return false;
}
offset += count;
}
offset += count;
}
/* save one more sparse segment of length 0 to indicate that
the file ends with a hole */
if (sp.numbytes == 0)
sp.offset = offset;
@@ -264,6 +261,114 @@ sparse_scan_file (struct tar_sparse_file *file)
return tar_sparse_scan (file, scan_end, NULL);
}
static bool
sparse_scan_file_wholesparse (struct tar_sparse_file *file)
{
struct tar_stat_info *st = file->stat_info;
struct sp_array sp = {0, 0};
/* Note that this function is called only for truly sparse files of size >= 1
block size (checked via ST_IS_SPARSE before). See the thread
http://www.mail-archive.com/bug-tar@gnu.org/msg04209.html for more info */
if (ST_NBLOCKS (st->stat) == 0)
{
st->archive_file_size = 0;
sp.offset = st->stat.st_size;
sparse_add_map (st, &sp);
return true;
}
return false;
}
#ifdef SEEK_HOLE
/* Try to engage SEEK_HOLE/SEEK_DATA feature. */
static bool
sparse_scan_file_seek (struct tar_sparse_file *file)
{
struct tar_stat_info *st = file->stat_info;
int fd = file->fd;
struct sp_array sp = {0, 0};
off_t offset = 0;
off_t data_offset;
off_t hole_offset;
st->archive_file_size = 0;
for (;;)
{
/* locate first chunk of data */
data_offset = lseek (fd, offset, SEEK_DATA);
if (data_offset == (off_t)-1)
/* ENXIO == EOF; error otherwise */
{
if (errno == ENXIO)
{
/* file ends with hole, add one more empty chunk of data */
sp.numbytes = 0;
sp.offset = st->stat.st_size;
sparse_add_map (st, &sp);
return true;
}
return false;
}
hole_offset = lseek (fd, data_offset, SEEK_HOLE);
/* according to specs, if FS does not fully support
SEEK_DATA/SEEK_HOLE it may just implement kind of "wrapper" around
classic lseek() call. We must detect it here and try to use other
hole-detection methods. */
if (offset == 0 /* first loop */
&& data_offset == 0
&& hole_offset == st->stat.st_size)
{
lseek (fd, 0, SEEK_SET);
return false;
}
sp.offset = data_offset;
sp.numbytes = hole_offset - data_offset;
sparse_add_map (st, &sp);
st->archive_file_size += sp.numbytes;
offset = hole_offset;
}
return true;
}
#endif
static bool
sparse_scan_file (struct tar_sparse_file *file)
{
/* always check for completely sparse files */
if (sparse_scan_file_wholesparse (file))
return true;
switch (hole_detection)
{
case HOLE_DETECTION_DEFAULT:
case HOLE_DETECTION_SEEK:
#ifdef SEEK_HOLE
if (sparse_scan_file_seek (file))
return true;
#else
if (hole_detection == HOLE_DETECTION_SEEK)
WARN((0, 0,
_("\"seek\" hole detection is not supported, using \"raw\".")));
/* fall back to "raw" for this and all other files */
hole_detection = HOLE_DETECTION_RAW;
#endif
case HOLE_DETECTION_RAW:
if (sparse_scan_file_raw (file))
return true;
}
return false;
}
static struct tar_sparse_optab const oldgnu_optab;
static struct tar_sparse_optab const star_optab;
static struct tar_sparse_optab const pax_optab;