Port more code to UBSan, and fix alignment bug
Problem with extract_file reported by Kirill Furman in: https://lists.gnu.org/r/bug-tar/2025-07/msg00003.html Since the UBSan thing seems to be a recurring issue, I fixed other instances of the problem that I found. Also, I noticed that the same line of code had another failure to conform to C23’s rules for pointers (an alignment issue not caught by UBSan), so I fixed that too. None of these issues matter on practical production hosts. * src/common.h (charptr): New function. * src/buffer.c (available_space_after, short_read, flush_archive) (backspace_output, try_new_volume, simple_flush_read) (_gnu_flush_read, _gnu_flush_write): * src/compare.c (read_and_process): * src/create.c (write_eot, write_gnu_long_link) (dump_regular_file, dump_dir0): * src/extract.c (extract_file): * src/incremen.c (get_gnu_dumpdir): * src/list.c (read_header): * src/sparse.c (sparse_dump_region, sparse_extract_region): * src/system.c (sys_write_archive_buffer) (sys_child_open_for_compress, sys_child_open_for_uncompress): * src/update.c (append_file, update_archive): Use it. * src/buffer.c (set_next_block_after): Arg is now void *, not union block *, since it need not be a valid union block * pointer and this can matter on unusual or debugging implementations. Turn a loop into an if so that the code is O(1) not O(N).
This commit is contained in:
5
NEWS
5
NEWS
@@ -1,4 +1,4 @@
|
||||
GNU tar NEWS - User visible changes. 2025-05-06
|
||||
GNU tar NEWS - User visible changes. 2025-07-25
|
||||
Please send GNU tar bug reports to <bug-tar@gnu.org>
|
||||
|
||||
version TBD
|
||||
@@ -45,6 +45,9 @@ option.
|
||||
** Transformations that change case (e.g., --transform='s/.*/\L&/')
|
||||
now work correctly with multi-byte characters.
|
||||
|
||||
** tar now works better in strict debugging environments that do not
|
||||
allow pointer arithmetic to escape from a sub-element of an array.
|
||||
|
||||
* Performance improvements
|
||||
|
||||
** Sparse files are now read and written with larger blocksizes.
|
||||
|
||||
41
src/buffer.c
41
src/buffer.c
@@ -626,12 +626,16 @@ find_next_block (void)
|
||||
return current_block;
|
||||
}
|
||||
|
||||
/* Indicate that we have used all blocks up thru BLOCK. */
|
||||
/* Indicate that we have used all blocks up thru the block addressed by PTR,
|
||||
which may point to any of the bytes in the addressed block.
|
||||
This does nothing if PTR points before the current block. */
|
||||
void
|
||||
set_next_block_after (union block *block)
|
||||
set_next_block_after (void *ptr)
|
||||
{
|
||||
while (block >= current_block)
|
||||
current_block++;
|
||||
char *p = ptr;
|
||||
ptrdiff_t offset = p - charptr (current_block);
|
||||
if (0 <= offset)
|
||||
current_block += (offset >> LG_BLOCKSIZE) + 1;
|
||||
|
||||
/* Do *not* flush the archive here. If we do, the same argument to
|
||||
set_next_block_after could mean the next block (if the input record
|
||||
@@ -648,7 +652,7 @@ set_next_block_after (union block *block)
|
||||
idx_t
|
||||
available_space_after (union block *pointer)
|
||||
{
|
||||
return record_end->buffer - pointer->buffer;
|
||||
return charptr (record_end) - charptr (pointer);
|
||||
}
|
||||
|
||||
/* Close file having descriptor FD, and abort if close unsuccessful. */
|
||||
@@ -960,7 +964,7 @@ static void
|
||||
short_read (idx_t status)
|
||||
{
|
||||
idx_t left = record_size - status; /* bytes left to read */
|
||||
char *more = (char *) record_start + status; /* address of next read */
|
||||
char *more = charptr (record_start) + status; /* address of next read */
|
||||
|
||||
if (left && left % BLOCKSIZE == 0
|
||||
&& (warning_option & WARN_RECORD_SIZE)
|
||||
@@ -1029,7 +1033,7 @@ flush_archive (void)
|
||||
}
|
||||
}
|
||||
|
||||
buffer_level = current_block->buffer - record_start->buffer;
|
||||
buffer_level = charptr (current_block) - charptr (record_start);
|
||||
record_start_block += record_end - record_start;
|
||||
current_block = record_start;
|
||||
record_end = record_start + blocking_factor;
|
||||
@@ -1063,7 +1067,7 @@ backspace_output (void)
|
||||
|
||||
/* Seek back to the beginning of this record and start writing there. */
|
||||
|
||||
position -= record_end->buffer - record_start->buffer;
|
||||
position -= charptr (record_end) - charptr (record_start);
|
||||
if (position < 0)
|
||||
position = 0;
|
||||
if (rmtlseek (archive, position, SEEK_SET) != position)
|
||||
@@ -1076,8 +1080,7 @@ backspace_output (void)
|
||||
/* Replace the first part of the record with NULs. */
|
||||
|
||||
if (record_start->buffer != output_start)
|
||||
memset (record_start->buffer, 0,
|
||||
output_start - record_start->buffer);
|
||||
memset (record_start, 0, output_start - charptr (record_start));
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1444,7 +1447,7 @@ try_new_volume (void)
|
||||
if (!new_volume (acc))
|
||||
return true;
|
||||
|
||||
while ((status = rmtread (archive, record_start->buffer, record_size))
|
||||
while ((status = rmtread (archive, charptr (record_start), record_size))
|
||||
< 0)
|
||||
archive_read_error ();
|
||||
|
||||
@@ -1809,7 +1812,7 @@ simple_flush_read (void)
|
||||
}
|
||||
|
||||
ptrdiff_t nread;
|
||||
while ((nread = rmtread (archive, record_start->buffer, record_size)) < 0)
|
||||
while ((nread = rmtread (archive, charptr (record_start), record_size)) < 0)
|
||||
archive_read_error ();
|
||||
short_read_slop = 0;
|
||||
if (nread == record_size)
|
||||
@@ -1856,7 +1859,7 @@ _gnu_flush_read (void)
|
||||
}
|
||||
|
||||
ptrdiff_t nread;
|
||||
while ((nread = rmtread (archive, record_start->buffer, record_size)) < 0
|
||||
while ((nread = rmtread (archive, charptr (record_start), record_size)) < 0
|
||||
&& ! (errno == ENOSPC && multi_volume_option))
|
||||
archive_read_error ();
|
||||
/* The condition below used to include
|
||||
@@ -1934,7 +1937,7 @@ _gnu_flush_write (idx_t buffer_level)
|
||||
prev_written += bytes_written;
|
||||
bytes_written = 0;
|
||||
|
||||
copy_ptr = record_start->buffer + status;
|
||||
copy_ptr = charptr (record_start) + status;
|
||||
copy_size = buffer_level - status;
|
||||
|
||||
/* Switch to the next buffer */
|
||||
@@ -1960,16 +1963,16 @@ _gnu_flush_write (idx_t buffer_level)
|
||||
inhibit_map = false;
|
||||
while (bufsize < copy_size)
|
||||
{
|
||||
memcpy (header->buffer, copy_ptr, bufsize);
|
||||
memcpy (header, copy_ptr, bufsize);
|
||||
copy_ptr += bufsize;
|
||||
copy_size -= bufsize;
|
||||
set_next_block_after (header + ((bufsize - 1) >> LG_BLOCKSIZE));
|
||||
set_next_block_after (charptr (header) + bufsize - 1);
|
||||
header = find_next_block ();
|
||||
bufsize = available_space_after (header);
|
||||
}
|
||||
memcpy (header->buffer, copy_ptr, copy_size);
|
||||
memset (header->buffer + copy_size, 0, bufsize - copy_size);
|
||||
set_next_block_after (header + ((copy_size - 1) >> LG_BLOCKSIZE));
|
||||
memcpy (header, copy_ptr, copy_size);
|
||||
memset (charptr (header) + copy_size, 0, bufsize - copy_size);
|
||||
set_next_block_after (charptr (header) + copy_size - 1);
|
||||
find_next_block ();
|
||||
}
|
||||
|
||||
|
||||
18
src/common.h
18
src/common.h
@@ -412,6 +412,22 @@ extern enum access_mode access_mode;
|
||||
|
||||
/* Module buffer.c. */
|
||||
|
||||
/* Return BLOCK, but as a char * pointer that can be used to address
|
||||
any byte in the array of blocks containing *BLOCK, as opposed to
|
||||
BLOCK->buffer which can address only the bytes in *BLOCK itself.
|
||||
The distinction can matter in strict debugging environments.
|
||||
|
||||
Callers should use this function only when possibly needing access
|
||||
to storage outside of *BLOCK, or when the resulting pointer needs
|
||||
to be compared to other pointers that point outside of *BLOCK.
|
||||
Code not needing such access should use BLOCK->buffer instead, as
|
||||
some debugging environments can catch some subscript errors that way. */
|
||||
COMMON_INLINE char *
|
||||
charptr (union block *block)
|
||||
{
|
||||
return (char *) block;
|
||||
}
|
||||
|
||||
/* File descriptor for archive file. */
|
||||
extern int archive;
|
||||
|
||||
@@ -456,7 +472,7 @@ void init_volume_number (void);
|
||||
void open_archive (enum access_mode mode);
|
||||
void print_total_stats (void);
|
||||
void reset_eof (void);
|
||||
void set_next_block_after (union block *block);
|
||||
void set_next_block_after (void *);
|
||||
void clear_read_error_count (void);
|
||||
void xclose (int fd);
|
||||
_Noreturn void archive_write_error (ssize_t status);
|
||||
|
||||
@@ -136,10 +136,9 @@ read_and_process (struct tar_stat_info *st, bool (*processor) (idx_t, char *))
|
||||
idx_t data_size = available_space_after (data_block);
|
||||
if (data_size > size)
|
||||
data_size = size;
|
||||
if (!processor (data_size, data_block->buffer))
|
||||
if (!processor (data_size, charptr (data_block)))
|
||||
processor = process_noop;
|
||||
set_next_block_after ((union block *)
|
||||
(data_block->buffer + data_size - 1));
|
||||
set_next_block_after (charptr (data_block) + data_size - 1);
|
||||
size -= data_size;
|
||||
mv_size_left (size);
|
||||
}
|
||||
|
||||
22
src/create.c
22
src/create.c
@@ -473,7 +473,7 @@ write_eot (void)
|
||||
memset (pointer->buffer, 0, BLOCKSIZE);
|
||||
set_next_block_after (pointer);
|
||||
pointer = find_next_block ();
|
||||
memset (pointer->buffer, 0, available_space_after (pointer));
|
||||
memset (charptr (pointer), 0, available_space_after (pointer));
|
||||
set_next_block_after (pointer);
|
||||
}
|
||||
|
||||
@@ -540,16 +540,16 @@ write_gnu_long_link (struct tar_stat_info *st, const char *p, char type)
|
||||
|
||||
while (bufsize < size)
|
||||
{
|
||||
memcpy (header->buffer, p, bufsize);
|
||||
memcpy (charptr (header), p, bufsize);
|
||||
p += bufsize;
|
||||
size -= bufsize;
|
||||
set_next_block_after (header + ((bufsize - 1) >> LG_BLOCKSIZE));
|
||||
set_next_block_after (charptr (header) + bufsize - 1);
|
||||
header = find_next_block ();
|
||||
bufsize = available_space_after (header);
|
||||
}
|
||||
memcpy (header->buffer, p, size);
|
||||
memset (header->buffer + size, 0, bufsize - size);
|
||||
set_next_block_after (header + ((size - 1) >> LG_BLOCKSIZE));
|
||||
memcpy (charptr (header), p, size);
|
||||
memset (charptr (header) + size, 0, bufsize - size);
|
||||
set_next_block_after (charptr (header) + size - 1);
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -1047,16 +1047,16 @@ dump_regular_file (int fd, struct tar_stat_info *st)
|
||||
}
|
||||
|
||||
idx_t count = (fd <= 0 ? bufsize
|
||||
: blocking_read (fd, blk->buffer, bufsize));
|
||||
: blocking_read (fd, charptr (blk), bufsize));
|
||||
size_left -= count;
|
||||
set_next_block_after (blk + ((bufsize - 1) >> LG_BLOCKSIZE));
|
||||
set_next_block_after (charptr (blk) + bufsize - 1);
|
||||
|
||||
if (count != bufsize)
|
||||
{
|
||||
if (errno)
|
||||
read_diag_details (st->orig_file_name,
|
||||
st->stat.st_size - size_left, bufsize);
|
||||
memset (blk->buffer + count, 0, bufsize - count);
|
||||
memset (charptr (blk) + count, 0, bufsize - count);
|
||||
warnopt (WARN_FILE_SHRANK, 0,
|
||||
ngettext (("%s: File shrank by %jd byte;"
|
||||
" padding with zeros"),
|
||||
@@ -1134,10 +1134,10 @@ dump_dir0 (struct tar_stat_info *st, char const *directory)
|
||||
if (count)
|
||||
memset (blk->buffer + size_left, 0, BLOCKSIZE - count);
|
||||
}
|
||||
memcpy (blk->buffer, p_buffer, bufsize);
|
||||
memcpy (charptr (blk), p_buffer, bufsize);
|
||||
size_left -= bufsize;
|
||||
p_buffer += bufsize;
|
||||
set_next_block_after (blk + ((bufsize - 1) >> LG_BLOCKSIZE));
|
||||
set_next_block_after (charptr (blk) + bufsize - 1);
|
||||
}
|
||||
}
|
||||
return;
|
||||
|
||||
@@ -1377,11 +1377,10 @@ extract_file (char *file_name, char typeflag)
|
||||
if (written > size)
|
||||
written = size;
|
||||
errno = 0;
|
||||
idx_t count = blocking_write (fd, data_block->buffer, written);
|
||||
idx_t count = blocking_write (fd, charptr (data_block), written);
|
||||
size -= written;
|
||||
|
||||
set_next_block_after ((union block *)
|
||||
(data_block->buffer + written - 1));
|
||||
set_next_block_after (charptr (data_block) + written - 1);
|
||||
if (count != written)
|
||||
{
|
||||
if (!to_command_option)
|
||||
|
||||
@@ -1521,10 +1521,9 @@ get_gnu_dumpdir (struct tar_stat_info *stat_info)
|
||||
copied = available_space_after (data_block);
|
||||
if (copied > size)
|
||||
copied = size;
|
||||
memcpy (to, data_block->buffer, copied);
|
||||
memcpy (to, charptr (data_block), copied);
|
||||
to += copied;
|
||||
set_next_block_after ((union block *)
|
||||
(data_block->buffer + copied - 1));
|
||||
set_next_block_after (charptr (data_block) + copied - 1);
|
||||
}
|
||||
|
||||
mv_end ();
|
||||
|
||||
13
src/list.c
13
src/list.c
@@ -393,7 +393,7 @@ tar_checksum (union block *header, bool silent)
|
||||
read_header_x_global when a POSIX global header is read,
|
||||
decode it and return HEADER_SUCCESS_EXTENDED.
|
||||
|
||||
You must always set_next_block_after(*return_block) to skip past
|
||||
You must always set_next_block_after (*return_block) to skip past
|
||||
the header which this routine reads. */
|
||||
|
||||
enum read_header
|
||||
@@ -473,7 +473,7 @@ read_header (union block **return_block, struct tar_stat_info *info,
|
||||
|
||||
set_next_block_after (header);
|
||||
*header_copy = *header;
|
||||
bp = header_copy->buffer + BLOCKSIZE;
|
||||
bp = charptr (header_copy + 1);
|
||||
|
||||
for (size -= BLOCKSIZE; size > 0; size -= written)
|
||||
{
|
||||
@@ -487,10 +487,9 @@ read_header (union block **return_block, struct tar_stat_info *info,
|
||||
if (written > size)
|
||||
written = size;
|
||||
|
||||
memcpy (bp, data_block->buffer, written);
|
||||
memcpy (bp, charptr (data_block), written);
|
||||
bp += written;
|
||||
set_next_block_after ((union block *)
|
||||
(data_block->buffer + written - 1));
|
||||
set_next_block_after (charptr (data_block) + written - 1);
|
||||
}
|
||||
|
||||
*bp = '\0';
|
||||
@@ -532,7 +531,7 @@ read_header (union block **return_block, struct tar_stat_info *info,
|
||||
|
||||
if (next_long_name)
|
||||
{
|
||||
name = next_long_name->buffer + BLOCKSIZE;
|
||||
name = charptr (next_long_name + 1);
|
||||
recent_long_name = next_long_name;
|
||||
recent_long_name_blocks = next_long_name_blocks;
|
||||
next_long_name = NULL;
|
||||
@@ -564,7 +563,7 @@ read_header (union block **return_block, struct tar_stat_info *info,
|
||||
|
||||
if (next_long_link)
|
||||
{
|
||||
name = next_long_link->buffer + BLOCKSIZE;
|
||||
name = charptr (next_long_link + 1);
|
||||
recent_long_link = next_long_link;
|
||||
recent_long_link_blocks = next_long_link_blocks;
|
||||
next_long_link = NULL;
|
||||
|
||||
@@ -417,7 +417,7 @@ sparse_dump_region (struct tar_sparse_file *file, idx_t i)
|
||||
union block *blk = find_next_block ();
|
||||
idx_t avail = available_space_after (blk);
|
||||
idx_t bufsize = min (bytes_left, avail);
|
||||
idx_t bytes_read = full_read (file->fd, blk->buffer, bufsize);
|
||||
idx_t bytes_read = full_read (file->fd, charptr (blk), bufsize);
|
||||
if (bytes_read < BLOCKSIZE)
|
||||
memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read);
|
||||
bytes_left -= bytes_read;
|
||||
@@ -450,7 +450,7 @@ sparse_dump_region (struct tar_sparse_file *file, idx_t i)
|
||||
return false;
|
||||
}
|
||||
|
||||
set_next_block_after (blk + ((bufsize - 1) >> LG_BLOCKSIZE));
|
||||
set_next_block_after (charptr (blk) + bufsize - 1);
|
||||
}
|
||||
|
||||
return true;
|
||||
@@ -482,9 +482,9 @@ sparse_extract_region (struct tar_sparse_file *file, idx_t i)
|
||||
}
|
||||
idx_t avail = available_space_after (blk);
|
||||
idx_t wrbytes = min (write_size, avail);
|
||||
set_next_block_after (blk + ((wrbytes - 1) >> LG_BLOCKSIZE));
|
||||
set_next_block_after (charptr (blk) + wrbytes - 1);
|
||||
file->dumped_size += avail;
|
||||
idx_t count = blocking_write (file->fd, blk->buffer, wrbytes);
|
||||
idx_t count = blocking_write (file->fd, charptr (blk), wrbytes);
|
||||
write_size -= count;
|
||||
mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
|
||||
file->offset += count;
|
||||
|
||||
12
src/system.c
12
src/system.c
@@ -140,7 +140,7 @@ sys_truncate (int fd)
|
||||
idx_t
|
||||
sys_write_archive_buffer (void)
|
||||
{
|
||||
return full_write (archive, record_start->buffer, record_size);
|
||||
return full_write (archive, charptr (record_start), record_size);
|
||||
}
|
||||
|
||||
/* Set ARCHIVE for writing, then compressing an archive. */
|
||||
@@ -309,7 +309,7 @@ is_regular_file (const char *name)
|
||||
idx_t
|
||||
sys_write_archive_buffer (void)
|
||||
{
|
||||
return rmtwrite (archive, record_start->buffer, record_size);
|
||||
return rmtwrite (archive, charptr (record_start), record_size);
|
||||
}
|
||||
|
||||
/* Read and write file descriptors from a pipe(pipefd) call. */
|
||||
@@ -458,7 +458,7 @@ sys_child_open_for_compress (void)
|
||||
|
||||
/* Assemble a record. */
|
||||
|
||||
for (length = 0, cursor = record_start->buffer;
|
||||
for (length = 0, cursor = charptr (record_start);
|
||||
length < record_size;
|
||||
length += status, cursor += status)
|
||||
{
|
||||
@@ -481,7 +481,7 @@ sys_child_open_for_compress (void)
|
||||
|
||||
if (length > 0)
|
||||
{
|
||||
memset (record_start->buffer + length, 0, record_size - length);
|
||||
memset (charptr (record_start) + length, 0, record_size - length);
|
||||
status = sys_write_archive_buffer ();
|
||||
if (status != record_size)
|
||||
archive_write_error (status);
|
||||
@@ -622,12 +622,12 @@ sys_child_open_for_uncompress (void)
|
||||
clear_read_error_count ();
|
||||
|
||||
ptrdiff_t n;
|
||||
while ((n = rmtread (archive, record_start->buffer, record_size)) < 0)
|
||||
while ((n = rmtread (archive, charptr (record_start), record_size)) < 0)
|
||||
archive_read_error ();
|
||||
if (n == 0)
|
||||
break;
|
||||
|
||||
char *cursor = record_start->buffer;
|
||||
char *cursor = charptr (record_start);
|
||||
do
|
||||
{
|
||||
idx_t count = min (n, BLOCKSIZE);
|
||||
|
||||
@@ -57,15 +57,15 @@ append_file (char *file_name)
|
||||
{
|
||||
union block *start = find_next_block ();
|
||||
idx_t bufsize = available_space_after (start);
|
||||
idx_t status = full_read (handle, start->buffer, bufsize);
|
||||
idx_t status = full_read (handle, charptr (start), bufsize);
|
||||
if (status < bufsize && errno)
|
||||
read_fatal (file_name);
|
||||
if (status == 0)
|
||||
break;
|
||||
idx_t rem = status % BLOCKSIZE;
|
||||
if (rem)
|
||||
memset (start->buffer + (status - rem), 0, BLOCKSIZE - rem);
|
||||
set_next_block_after (start + ((status - 1) >> LG_BLOCKSIZE));
|
||||
memset (charptr (start) + (status - rem), 0, BLOCKSIZE - rem);
|
||||
set_next_block_after (charptr (start) + status - 1);
|
||||
}
|
||||
|
||||
if (close (handle) < 0)
|
||||
@@ -206,7 +206,7 @@ update_archive (void)
|
||||
|
||||
reset_eof ();
|
||||
time_to_start_writing = true;
|
||||
output_start = current_block->buffer;
|
||||
output_start = charptr (current_block);
|
||||
|
||||
{
|
||||
struct name const *p;
|
||||
|
||||
Reference in New Issue
Block a user