Files
tar/src/sparse.c
2003-11-17 11:04:16 +00:00

643 lines
17 KiB
C
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* Functions for dealing with sparse files
Copyright (C) 2003 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
Free Software Foundation; either version 2, or (at your option) any later
version.
This program is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
#include "system.h"
#include <quotearg.h>
#include "common.h"
struct tar_sparse_file;
enum sparse_scan_state
{
scan_begin,
scan_block,
scan_end
};
struct tar_sparse_optab
{
bool (*init) (struct tar_sparse_file *);
bool (*done) (struct tar_sparse_file *);
bool (*dump_header) (struct tar_sparse_file *);
bool (*decode_header) (struct tar_sparse_file *);
bool (*scan_block) (struct tar_sparse_file *, enum sparse_scan_state,
void *);
bool (*dump_region) (struct tar_sparse_file *, size_t index);
bool (*extract_region) (struct tar_sparse_file *, size_t index);
};
struct tar_sparse_file
{
int fd; /* File descriptor */
size_t dumped_size; /* Number of bytes actually written
to the archive */
struct tar_stat_info *stat_info; /* Information about the file */
struct tar_sparse_optab *optab;
void *closure; /* Any additional data optab calls might
reqiure */
};
static bool
tar_sparse_init (struct tar_sparse_file *file)
{
file->dumped_size = 0;
if (file->optab->init)
return file->optab->init (file);
return true;
}
static bool
tar_sparse_done (struct tar_sparse_file *file)
{
if (file->optab->done)
return file->optab->done (file);
return true;
}
static bool
tar_sparse_scan (struct tar_sparse_file *file, enum sparse_scan_state state,
void *block)
{
if (file->optab->scan_block)
return file->optab->scan_block (file, state, block);
return true;
}
static bool
tar_sparse_dump_region (struct tar_sparse_file *file, size_t index)
{
if (file->optab->dump_region)
return file->optab->dump_region (file, index);
return false;
}
static bool
tar_sparse_extract_region (struct tar_sparse_file *file, size_t index)
{
if (file->optab->extract_region)
return file->optab->extract_region (file, index);
return false;
}
static bool
tar_sparse_dump_header (struct tar_sparse_file *file)
{
if (file->optab->dump_header)
return file->optab->dump_header (file);
return false;
}
static bool
tar_sparse_decode_header (struct tar_sparse_file *file)
{
if (file->optab->decode_header)
return file->optab->decode_header (file);
return false;
}
static bool
lseek_or_error (struct tar_sparse_file *file, off_t offset, int whence)
{
if (lseek (file->fd, offset, whence) < 0)
{
seek_diag_details (file->stat_info->orig_file_name, offset);
return false;
}
return true;
}
/* Takes a blockful of data and basically cruises through it to see if
it's made *entirely* of zeros, returning a 0 the instant it finds
something that is a nonzero, i.e., useful data. */
static bool
zero_block_p (char *buffer, size_t size)
{
while (size--)
if (*buffer++)
return false;
return true;
}
#define clear_block(p) memset (p, 0, BLOCKSIZE);
#define SPARSES_INIT_COUNT SPARSES_IN_SPARSE_HEADER
static void
sparse_add_map (struct tar_sparse_file *file, struct sp_array *sp)
{
if (file->stat_info->sparse_map == NULL)
{
file->stat_info->sparse_map =
xmalloc (SPARSES_INIT_COUNT * sizeof file->stat_info->sparse_map[0]);
file->stat_info->sparse_map_size = SPARSES_INIT_COUNT;
}
else if (file->stat_info->sparse_map_avail == file->stat_info->sparse_map_size)
{
file->stat_info->sparse_map_size *= 2;
file->stat_info->sparse_map =
xrealloc (file->stat_info->sparse_map,
file->stat_info->sparse_map_size
* sizeof file->stat_info->sparse_map[0]);
}
file->stat_info->sparse_map[file->stat_info->sparse_map_avail++] = *sp;
}
/* Scan the sparse file and create its map */
static bool
sparse_scan_file (struct tar_sparse_file *file)
{
static char buffer[BLOCKSIZE];
size_t count;
size_t offset = 0;
struct sp_array sp = {0, 0};
if (!lseek_or_error (file, 0, SEEK_SET))
return false;
clear_block (buffer);
file->stat_info->sparse_map_size = 0;
file->stat_info->archive_file_size = 0;
if (!tar_sparse_scan (file, scan_begin, NULL))
return false;
while ((count = safe_read (file->fd, buffer, sizeof buffer)) > 0)
{
/* Analize the block */
if (zero_block_p (buffer, count))
{
if (sp.numbytes)
{
sparse_add_map (file, &sp);
sp.numbytes = 0;
if (!tar_sparse_scan (file, scan_block, NULL))
return false;
}
}
else
{
if (sp.numbytes == 0)
sp.offset = offset;
sp.numbytes += count;
file->stat_info->archive_file_size += count;
if (!tar_sparse_scan (file, scan_block, buffer))
return false;
}
offset += count;
clear_block (buffer);
}
if (sp.numbytes == 0)
{
sp.offset = offset - 1;
sp.numbytes = 1;
}
sparse_add_map (file, &sp);
file->stat_info->archive_file_size += count;
return tar_sparse_scan (file, scan_end, NULL);
}
static struct tar_sparse_optab oldgnu_optab;
static bool
sparse_select_optab (struct tar_sparse_file *file)
{
switch (archive_format)
{
case V7_FORMAT:
case USTAR_FORMAT:
return false;
case OLDGNU_FORMAT:
case GNU_FORMAT: /*FIXME: This one should disappear? */
file->optab = &oldgnu_optab;
break;
case POSIX_FORMAT:
case STAR_FORMAT:
/* FIXME: Add methods */
return false;
default:
break;
}
return true;
}
static bool
sparse_dump_region (struct tar_sparse_file *file, size_t index)
{
union block *blk;
off_t bytes_left = file->stat_info->sparse_map[index].numbytes;
if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
SEEK_SET))
return false;
do
{
size_t bufsize = (bytes_left > BLOCKSIZE) ? BLOCKSIZE : bytes_left;
off_t bytes_read;
blk = find_next_block ();
memset (blk->buffer, 0, BLOCKSIZE);
bytes_read = safe_read (file->fd, blk->buffer, bufsize);
if (bytes_read < 0)
{
read_diag_details (file->stat_info->orig_file_name,
file->stat_info->sparse_map[index].offset
+ file->stat_info->sparse_map[index].numbytes
- bytes_left,
bufsize);
return false;
}
bytes_left -= bytes_read;
file->dumped_size += bytes_read;
set_next_block_after (blk);
}
while (bytes_left > 0);
return true;
}
static bool
sparse_extract_region (struct tar_sparse_file *file, size_t index)
{
size_t write_size;
if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
SEEK_SET))
return false;
write_size = file->stat_info->sparse_map[index].numbytes;
while (write_size > 0)
{
size_t count;
size_t wrbytes = (write_size > BLOCKSIZE) ? BLOCKSIZE : write_size;
union block *blk = find_next_block ();
if (!blk)
{
ERROR ((0, 0, _("Unexpected EOF in archive")));
return false;
}
set_next_block_after (blk);
count = full_write (file->fd, blk->buffer, wrbytes);
write_size -= count;
file->dumped_size += count;
if (count != wrbytes)
{
write_error_details (file->stat_info->orig_file_name,
count, wrbytes);
return false;
}
}
return true;
}
/* Interface functions */
enum dump_status
sparse_dump_file (int fd, struct tar_stat_info *stat)
{
bool rc;
struct tar_sparse_file file;
file.stat_info = stat;
file.fd = fd;
if (!sparse_select_optab (&file)
|| !tar_sparse_init (&file))
return dump_status_not_implemented;
rc = sparse_scan_file (&file);
if (rc && file.optab->dump_region)
{
tar_sparse_dump_header (&file);
if (fd >= 0)
{
size_t i;
for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
rc = tar_sparse_dump_region (&file, i);
}
}
pad_archive(file.stat_info->archive_file_size - file.dumped_size);
return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
}
/* Returns true if the file represented by stat is a sparse one */
bool
sparse_file_p (struct tar_stat_info *stat)
{
return (ST_NBLOCKS (stat->stat)
< (stat->stat.st_size / ST_NBLOCKSIZE
+ (stat->stat.st_size % ST_NBLOCKSIZE != 0)));
}
enum dump_status
sparse_extract_file (int fd, struct tar_stat_info *stat, off_t *size)
{
bool rc = true;
struct tar_sparse_file file;
size_t i;
file.stat_info = stat;
file.fd = fd;
if (!sparse_select_optab (&file)
|| !tar_sparse_init (&file))
return dump_status_not_implemented;
rc = tar_sparse_decode_header (&file);
for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
rc = tar_sparse_extract_region (&file, i);
*size = file.stat_info->archive_file_size - file.dumped_size;
return (tar_sparse_done (&file) && rc) ? dump_status_ok : dump_status_short;
}
static char diff_buffer[BLOCKSIZE];
static bool
check_sparse_region (struct tar_sparse_file *file, off_t beg, off_t end)
{
if (!lseek_or_error (file, beg, SEEK_SET))
return false;
while (beg < end)
{
size_t bytes_read;
size_t rdsize = end - beg;
if (rdsize > BLOCKSIZE)
rdsize = BLOCKSIZE;
clear_block (diff_buffer);
bytes_read = safe_read (file->fd, diff_buffer, rdsize);
if (bytes_read < 0)
{
read_diag_details (file->stat_info->orig_file_name,
beg,
rdsize);
return false;
}
if (!zero_block_p (diff_buffer, bytes_read))
{
report_difference (file->stat_info,
_("File fragment at %lu is not a hole"), beg);
return false;
}
beg += bytes_read;
}
return true;
}
static bool
check_data_region (struct tar_sparse_file *file, size_t index)
{
size_t size_left;
if (!lseek_or_error (file, file->stat_info->sparse_map[index].offset,
SEEK_SET))
return false;
size_left = file->stat_info->sparse_map[index].numbytes;
while (size_left > 0)
{
size_t bytes_read;
size_t rdsize = (size_left > BLOCKSIZE) ? BLOCKSIZE : size_left;
union block *blk = find_next_block ();
if (!blk)
{
ERROR ((0, 0, _("Unexpected EOF in archive")));
return false;
}
set_next_block_after (blk);
bytes_read = safe_read (file->fd, diff_buffer, rdsize);
if (bytes_read < 0)
{
read_diag_details (file->stat_info->orig_file_name,
file->stat_info->sparse_map[index].offset
+ file->stat_info->sparse_map[index].numbytes
- size_left,
rdsize);
return false;
}
file->dumped_size += bytes_read;
size_left -= bytes_read;
if (memcmp (blk->buffer, diff_buffer, rdsize))
{
report_difference (file->stat_info, _("Contents differ"));
return false;
}
}
return true;
}
bool
sparse_diff_file (int fd, struct tar_stat_info *stat)
{
bool rc = true;
struct tar_sparse_file file;
size_t i;
off_t offset = 0;
file.stat_info = stat;
file.fd = fd;
if (!sparse_select_optab (&file)
|| !tar_sparse_init (&file))
return dump_status_not_implemented;
rc = tar_sparse_decode_header (&file);
for (i = 0; rc && i < file.stat_info->sparse_map_avail; i++)
{
rc = check_sparse_region (&file,
offset, file.stat_info->sparse_map[i].offset)
&& check_data_region (&file, i);
offset = file.stat_info->sparse_map[i].offset
+ file.stat_info->sparse_map[i].numbytes;
}
if (rc)
skip_file (file.stat_info->archive_file_size - file.dumped_size);
tar_sparse_done (&file);
return rc;
}
/* Old GNU Format. The sparse file information is stored in the
oldgnu_header in the following manner:
The header is marked with type 'S'. Its `size' field contains
the cumulative size of all non-empty blocks of the file. The
actual file size is stored in `realsize' member of oldgnu_header.
The map of the file is stored in a list of `struct sparse'.
Each struct contains offset to the block of data and its
size (both as octal numbers). The first file header contains
at most 4 such structs (SPARSES_IN_OLDGNU_HEADER). If the map
contains more structs, then the field `isextended' of the main
header is set to 1 (binary) and the `struct sparse_header'
header follows, containing at most 21 following structs
(SPARSES_IN_SPARSE_HEADER). If more structs follow, `isextended'
field of the extended header is set and next next extension header
follows, etc... */
enum oldgnu_add_status
{
add_ok,
add_finish,
add_fail
};
/* Add a sparse item to the sparse file and its obstack */
static enum oldgnu_add_status
oldgnu_add_sparse (struct tar_sparse_file *file, struct sparse *s)
{
struct sp_array sp;
if (s->numbytes[0] == '\0')
return add_finish;
sp.offset = OFF_FROM_HEADER (s->offset);
sp.numbytes = SIZE_FROM_HEADER (s->numbytes);
if (sp.offset < 0
|| file->stat_info->stat.st_size < sp.offset + sp.numbytes
|| file->stat_info->archive_file_size < 0)
return add_fail;
sparse_add_map (file, &sp);
return add_ok;
}
/* Convert old GNU format sparse data to internal representation
FIXME: Clubbers current_header! */
static bool
oldgnu_get_sparse_info (struct tar_sparse_file *file)
{
size_t i;
union block *h = current_header;
int ext_p;
static enum oldgnu_add_status rc;
/* FIXME: note this! st_size was initialized from the header
which actually contains archived size. The following fixes it */
file->stat_info->archive_file_size = file->stat_info->stat.st_size;
file->stat_info->stat.st_size =
OFF_FROM_HEADER (current_header->oldgnu_header.realsize);
file->stat_info->sparse_map_size = 0;
for (i = 0; i < SPARSES_IN_OLDGNU_HEADER; i++)
{
rc = oldgnu_add_sparse (file, &h->oldgnu_header.sp[i]);
if (rc != add_ok)
break;
}
for (ext_p = h->oldgnu_header.isextended;
rc == add_ok && ext_p; ext_p = h->sparse_header.isextended)
{
h = find_next_block ();
if (!h)
{
ERROR ((0, 0, _("Unexpected EOF in archive")));
return false;
}
set_next_block_after (h);
for (i = 0; i < SPARSES_IN_SPARSE_HEADER && rc == add_ok; i++)
rc = oldgnu_add_sparse (file, &h->sparse_header.sp[i]);
}
if (rc == add_fail)
{
ERROR ((0, 0, _("%s: invalid sparse archive member"),
file->stat_info->orig_file_name));
return false;
}
return true;
}
static void
oldgnu_store_sparse_info (struct tar_sparse_file *file, size_t *pindex,
struct sparse *sp, size_t sparse_size)
{
for (; *pindex < file->stat_info->sparse_map_avail
&& sparse_size > 0; sparse_size--, sp++, ++*pindex)
{
OFF_TO_CHARS (file->stat_info->sparse_map[*pindex].offset,
sp->offset);
SIZE_TO_CHARS (file->stat_info->sparse_map[*pindex].numbytes,
sp->numbytes);
}
}
static bool
oldgnu_dump_header (struct tar_sparse_file *file)
{
off_t block_ordinal = current_block_ordinal ();
union block *blk;
size_t i;
blk = start_header (file->stat_info);
blk->header.typeflag = GNUTYPE_SPARSE;
if (file->stat_info->sparse_map_avail > SPARSES_IN_OLDGNU_HEADER)
blk->oldgnu_header.isextended = 1;
/* Store the real file size */
OFF_TO_CHARS (file->stat_info->stat.st_size, blk->oldgnu_header.realsize);
/* Store the effective (shrunken) file size */
OFF_TO_CHARS (file->stat_info->archive_file_size, blk->header.size);
i = 0;
oldgnu_store_sparse_info (file, &i,
blk->oldgnu_header.sp,
SPARSES_IN_OLDGNU_HEADER);
blk->oldgnu_header.isextended = i < file->stat_info->sparse_map_avail;
finish_header (file->stat_info, blk, block_ordinal);
while (i < file->stat_info->sparse_map_avail)
{
blk = find_next_block ();
memset (blk->buffer, 0, BLOCKSIZE);
oldgnu_store_sparse_info (file, &i,
blk->sparse_header.sp,
SPARSES_IN_SPARSE_HEADER);
set_next_block_after (blk);
if (i < file->stat_info->sparse_map_avail)
blk->sparse_header.isextended = 1;
else
break;
}
return true;
}
static struct tar_sparse_optab oldgnu_optab = {
NULL, /* No init function */
NULL, /* No done function */
oldgnu_dump_header,
oldgnu_get_sparse_info,
NULL, /* No scan_block function */
sparse_dump_region,
sparse_extract_region,
};