Port more code to UBSan, and fix alignment bug

Problem with extract_file reported by Kirill Furman in:
https://lists.gnu.org/r/bug-tar/2025-07/msg00003.html
Since the UBSan thing seems to be a recurring issue,
I fixed other instances of the problem that I found.
Also, I noticed that the same line of code had another failure to
conform to C23’s rules for pointers (an alignment issue not caught
by UBSan), so I fixed that too.  None of these issues matter on
practical production hosts.
* src/common.h (charptr): New function.
* src/buffer.c (available_space_after, short_read, flush_archive)
(backspace_output, try_new_volume, simple_flush_read)
(_gnu_flush_read, _gnu_flush_write):
* src/compare.c (read_and_process):
* src/create.c (write_eot, write_gnu_long_link)
(dump_regular_file, dump_dir0):
* src/extract.c (extract_file):
* src/incremen.c (get_gnu_dumpdir):
* src/list.c (read_header):
* src/sparse.c (sparse_dump_region, sparse_extract_region):
* src/system.c (sys_write_archive_buffer)
(sys_child_open_for_compress, sys_child_open_for_uncompress):
* src/update.c (append_file, update_archive):
Use it.
* src/buffer.c (set_next_block_after): Arg is now void *,
not union block *, since it need not be a valid union block * pointer
and this can matter on unusual or debugging implementations.
Turn a loop into an if so that the code is O(1) not O(N).
This commit is contained in:
Paul Eggert
2025-07-25 20:39:32 -07:00
parent 8921131877
commit 75735940f1
11 changed files with 80 additions and 62 deletions

5
NEWS
View File

@@ -1,4 +1,4 @@
GNU tar NEWS - User visible changes. 2025-05-06
GNU tar NEWS - User visible changes. 2025-07-25
Please send GNU tar bug reports to <bug-tar@gnu.org>
version TBD
@@ -45,6 +45,9 @@ option.
** Transformations that change case (e.g., --transform='s/.*/\L&/')
now work correctly with multi-byte characters.
** tar now works better in strict debugging environments that do not
allow pointer arithmetic to escape from a sub-element of an array.
* Performance improvements
** Sparse files are now read and written with larger blocksizes.

View File

@@ -626,12 +626,16 @@ find_next_block (void)
return current_block;
}
/* Indicate that we have used all blocks up thru BLOCK. */
/* Indicate that we have used all blocks up thru the block addressed by PTR,
which may point to any of the bytes in the addressed block.
This does nothing if PTR points before the current block. */
void
set_next_block_after (union block *block)
set_next_block_after (void *ptr)
{
while (block >= current_block)
current_block++;
char *p = ptr;
ptrdiff_t offset = p - charptr (current_block);
if (0 <= offset)
current_block += (offset >> LG_BLOCKSIZE) + 1;
/* Do *not* flush the archive here. If we do, the same argument to
set_next_block_after could mean the next block (if the input record
@@ -648,7 +652,7 @@ set_next_block_after (union block *block)
idx_t
available_space_after (union block *pointer)
{
return record_end->buffer - pointer->buffer;
return charptr (record_end) - charptr (pointer);
}
/* Close file having descriptor FD, and abort if close unsuccessful. */
@@ -960,7 +964,7 @@ static void
short_read (idx_t status)
{
idx_t left = record_size - status; /* bytes left to read */
char *more = (char *) record_start + status; /* address of next read */
char *more = charptr (record_start) + status; /* address of next read */
if (left && left % BLOCKSIZE == 0
&& (warning_option & WARN_RECORD_SIZE)
@@ -1029,7 +1033,7 @@ flush_archive (void)
}
}
buffer_level = current_block->buffer - record_start->buffer;
buffer_level = charptr (current_block) - charptr (record_start);
record_start_block += record_end - record_start;
current_block = record_start;
record_end = record_start + blocking_factor;
@@ -1063,7 +1067,7 @@ backspace_output (void)
/* Seek back to the beginning of this record and start writing there. */
position -= record_end->buffer - record_start->buffer;
position -= charptr (record_end) - charptr (record_start);
if (position < 0)
position = 0;
if (rmtlseek (archive, position, SEEK_SET) != position)
@@ -1076,8 +1080,7 @@ backspace_output (void)
/* Replace the first part of the record with NULs. */
if (record_start->buffer != output_start)
memset (record_start->buffer, 0,
output_start - record_start->buffer);
memset (record_start, 0, output_start - charptr (record_start));
}
}
}
@@ -1444,7 +1447,7 @@ try_new_volume (void)
if (!new_volume (acc))
return true;
while ((status = rmtread (archive, record_start->buffer, record_size))
while ((status = rmtread (archive, charptr (record_start), record_size))
< 0)
archive_read_error ();
@@ -1809,7 +1812,7 @@ simple_flush_read (void)
}
ptrdiff_t nread;
while ((nread = rmtread (archive, record_start->buffer, record_size)) < 0)
while ((nread = rmtread (archive, charptr (record_start), record_size)) < 0)
archive_read_error ();
short_read_slop = 0;
if (nread == record_size)
@@ -1856,7 +1859,7 @@ _gnu_flush_read (void)
}
ptrdiff_t nread;
while ((nread = rmtread (archive, record_start->buffer, record_size)) < 0
while ((nread = rmtread (archive, charptr (record_start), record_size)) < 0
&& ! (errno == ENOSPC && multi_volume_option))
archive_read_error ();
/* The condition below used to include
@@ -1934,7 +1937,7 @@ _gnu_flush_write (idx_t buffer_level)
prev_written += bytes_written;
bytes_written = 0;
copy_ptr = record_start->buffer + status;
copy_ptr = charptr (record_start) + status;
copy_size = buffer_level - status;
/* Switch to the next buffer */
@@ -1960,16 +1963,16 @@ _gnu_flush_write (idx_t buffer_level)
inhibit_map = false;
while (bufsize < copy_size)
{
memcpy (header->buffer, copy_ptr, bufsize);
memcpy (header, copy_ptr, bufsize);
copy_ptr += bufsize;
copy_size -= bufsize;
set_next_block_after (header + ((bufsize - 1) >> LG_BLOCKSIZE));
set_next_block_after (charptr (header) + bufsize - 1);
header = find_next_block ();
bufsize = available_space_after (header);
}
memcpy (header->buffer, copy_ptr, copy_size);
memset (header->buffer + copy_size, 0, bufsize - copy_size);
set_next_block_after (header + ((copy_size - 1) >> LG_BLOCKSIZE));
memcpy (header, copy_ptr, copy_size);
memset (charptr (header) + copy_size, 0, bufsize - copy_size);
set_next_block_after (charptr (header) + copy_size - 1);
find_next_block ();
}

View File

@@ -412,6 +412,22 @@ extern enum access_mode access_mode;
/* Module buffer.c. */
/* Return BLOCK, but as a char * pointer that can be used to address
any byte in the array of blocks containing *BLOCK, as opposed to
BLOCK->buffer which can address only the bytes in *BLOCK itself.
The distinction can matter in strict debugging environments.
Callers should use this function only when possibly needing access
to storage outside of *BLOCK, or when the resulting pointer needs
to be compared to other pointers that point outside of *BLOCK.
Code not needing such access should use BLOCK->buffer instead, as
some debugging environments can catch some subscript errors that way. */
COMMON_INLINE char *
charptr (union block *block)
{
return (char *) block;
}
/* File descriptor for archive file. */
extern int archive;
@@ -456,7 +472,7 @@ void init_volume_number (void);
void open_archive (enum access_mode mode);
void print_total_stats (void);
void reset_eof (void);
void set_next_block_after (union block *block);
void set_next_block_after (void *);
void clear_read_error_count (void);
void xclose (int fd);
_Noreturn void archive_write_error (ssize_t status);

View File

@@ -136,10 +136,9 @@ read_and_process (struct tar_stat_info *st, bool (*processor) (idx_t, char *))
idx_t data_size = available_space_after (data_block);
if (data_size > size)
data_size = size;
if (!processor (data_size, data_block->buffer))
if (!processor (data_size, charptr (data_block)))
processor = process_noop;
set_next_block_after ((union block *)
(data_block->buffer + data_size - 1));
set_next_block_after (charptr (data_block) + data_size - 1);
size -= data_size;
mv_size_left (size);
}

View File

@@ -473,7 +473,7 @@ write_eot (void)
memset (pointer->buffer, 0, BLOCKSIZE);
set_next_block_after (pointer);
pointer = find_next_block ();
memset (pointer->buffer, 0, available_space_after (pointer));
memset (charptr (pointer), 0, available_space_after (pointer));
set_next_block_after (pointer);
}
@@ -540,16 +540,16 @@ write_gnu_long_link (struct tar_stat_info *st, const char *p, char type)
while (bufsize < size)
{
memcpy (header->buffer, p, bufsize);
memcpy (charptr (header), p, bufsize);
p += bufsize;
size -= bufsize;
set_next_block_after (header + ((bufsize - 1) >> LG_BLOCKSIZE));
set_next_block_after (charptr (header) + bufsize - 1);
header = find_next_block ();
bufsize = available_space_after (header);
}
memcpy (header->buffer, p, size);
memset (header->buffer + size, 0, bufsize - size);
set_next_block_after (header + ((size - 1) >> LG_BLOCKSIZE));
memcpy (charptr (header), p, size);
memset (charptr (header) + size, 0, bufsize - size);
set_next_block_after (charptr (header) + size - 1);
}
static int
@@ -1047,16 +1047,16 @@ dump_regular_file (int fd, struct tar_stat_info *st)
}
idx_t count = (fd <= 0 ? bufsize
: blocking_read (fd, blk->buffer, bufsize));
: blocking_read (fd, charptr (blk), bufsize));
size_left -= count;
set_next_block_after (blk + ((bufsize - 1) >> LG_BLOCKSIZE));
set_next_block_after (charptr (blk) + bufsize - 1);
if (count != bufsize)
{
if (errno)
read_diag_details (st->orig_file_name,
st->stat.st_size - size_left, bufsize);
memset (blk->buffer + count, 0, bufsize - count);
memset (charptr (blk) + count, 0, bufsize - count);
warnopt (WARN_FILE_SHRANK, 0,
ngettext (("%s: File shrank by %jd byte;"
" padding with zeros"),
@@ -1134,10 +1134,10 @@ dump_dir0 (struct tar_stat_info *st, char const *directory)
if (count)
memset (blk->buffer + size_left, 0, BLOCKSIZE - count);
}
memcpy (blk->buffer, p_buffer, bufsize);
memcpy (charptr (blk), p_buffer, bufsize);
size_left -= bufsize;
p_buffer += bufsize;
set_next_block_after (blk + ((bufsize - 1) >> LG_BLOCKSIZE));
set_next_block_after (charptr (blk) + bufsize - 1);
}
}
return;

View File

@@ -1377,11 +1377,10 @@ extract_file (char *file_name, char typeflag)
if (written > size)
written = size;
errno = 0;
idx_t count = blocking_write (fd, data_block->buffer, written);
idx_t count = blocking_write (fd, charptr (data_block), written);
size -= written;
set_next_block_after ((union block *)
(data_block->buffer + written - 1));
set_next_block_after (charptr (data_block) + written - 1);
if (count != written)
{
if (!to_command_option)

View File

@@ -1521,10 +1521,9 @@ get_gnu_dumpdir (struct tar_stat_info *stat_info)
copied = available_space_after (data_block);
if (copied > size)
copied = size;
memcpy (to, data_block->buffer, copied);
memcpy (to, charptr (data_block), copied);
to += copied;
set_next_block_after ((union block *)
(data_block->buffer + copied - 1));
set_next_block_after (charptr (data_block) + copied - 1);
}
mv_end ();

View File

@@ -393,7 +393,7 @@ tar_checksum (union block *header, bool silent)
read_header_x_global when a POSIX global header is read,
decode it and return HEADER_SUCCESS_EXTENDED.
You must always set_next_block_after(*return_block) to skip past
You must always set_next_block_after (*return_block) to skip past
the header which this routine reads. */
enum read_header
@@ -473,7 +473,7 @@ read_header (union block **return_block, struct tar_stat_info *info,
set_next_block_after (header);
*header_copy = *header;
bp = header_copy->buffer + BLOCKSIZE;
bp = charptr (header_copy + 1);
for (size -= BLOCKSIZE; size > 0; size -= written)
{
@@ -487,10 +487,9 @@ read_header (union block **return_block, struct tar_stat_info *info,
if (written > size)
written = size;
memcpy (bp, data_block->buffer, written);
memcpy (bp, charptr (data_block), written);
bp += written;
set_next_block_after ((union block *)
(data_block->buffer + written - 1));
set_next_block_after (charptr (data_block) + written - 1);
}
*bp = '\0';
@@ -532,7 +531,7 @@ read_header (union block **return_block, struct tar_stat_info *info,
if (next_long_name)
{
name = next_long_name->buffer + BLOCKSIZE;
name = charptr (next_long_name + 1);
recent_long_name = next_long_name;
recent_long_name_blocks = next_long_name_blocks;
next_long_name = NULL;
@@ -564,7 +563,7 @@ read_header (union block **return_block, struct tar_stat_info *info,
if (next_long_link)
{
name = next_long_link->buffer + BLOCKSIZE;
name = charptr (next_long_link + 1);
recent_long_link = next_long_link;
recent_long_link_blocks = next_long_link_blocks;
next_long_link = NULL;

View File

@@ -417,7 +417,7 @@ sparse_dump_region (struct tar_sparse_file *file, idx_t i)
union block *blk = find_next_block ();
idx_t avail = available_space_after (blk);
idx_t bufsize = min (bytes_left, avail);
idx_t bytes_read = full_read (file->fd, blk->buffer, bufsize);
idx_t bytes_read = full_read (file->fd, charptr (blk), bufsize);
if (bytes_read < BLOCKSIZE)
memset (blk->buffer + bytes_read, 0, BLOCKSIZE - bytes_read);
bytes_left -= bytes_read;
@@ -450,7 +450,7 @@ sparse_dump_region (struct tar_sparse_file *file, idx_t i)
return false;
}
set_next_block_after (blk + ((bufsize - 1) >> LG_BLOCKSIZE));
set_next_block_after (charptr (blk) + bufsize - 1);
}
return true;
@@ -482,9 +482,9 @@ sparse_extract_region (struct tar_sparse_file *file, idx_t i)
}
idx_t avail = available_space_after (blk);
idx_t wrbytes = min (write_size, avail);
set_next_block_after (blk + ((wrbytes - 1) >> LG_BLOCKSIZE));
set_next_block_after (charptr (blk) + wrbytes - 1);
file->dumped_size += avail;
idx_t count = blocking_write (file->fd, blk->buffer, wrbytes);
idx_t count = blocking_write (file->fd, charptr (blk), wrbytes);
write_size -= count;
mv_size_left (file->stat_info->archive_file_size - file->dumped_size);
file->offset += count;

View File

@@ -140,7 +140,7 @@ sys_truncate (int fd)
idx_t
sys_write_archive_buffer (void)
{
return full_write (archive, record_start->buffer, record_size);
return full_write (archive, charptr (record_start), record_size);
}
/* Set ARCHIVE for writing, then compressing an archive. */
@@ -309,7 +309,7 @@ is_regular_file (const char *name)
idx_t
sys_write_archive_buffer (void)
{
return rmtwrite (archive, record_start->buffer, record_size);
return rmtwrite (archive, charptr (record_start), record_size);
}
/* Read and write file descriptors from a pipe(pipefd) call. */
@@ -458,7 +458,7 @@ sys_child_open_for_compress (void)
/* Assemble a record. */
for (length = 0, cursor = record_start->buffer;
for (length = 0, cursor = charptr (record_start);
length < record_size;
length += status, cursor += status)
{
@@ -481,7 +481,7 @@ sys_child_open_for_compress (void)
if (length > 0)
{
memset (record_start->buffer + length, 0, record_size - length);
memset (charptr (record_start) + length, 0, record_size - length);
status = sys_write_archive_buffer ();
if (status != record_size)
archive_write_error (status);
@@ -622,12 +622,12 @@ sys_child_open_for_uncompress (void)
clear_read_error_count ();
ptrdiff_t n;
while ((n = rmtread (archive, record_start->buffer, record_size)) < 0)
while ((n = rmtread (archive, charptr (record_start), record_size)) < 0)
archive_read_error ();
if (n == 0)
break;
char *cursor = record_start->buffer;
char *cursor = charptr (record_start);
do
{
idx_t count = min (n, BLOCKSIZE);

View File

@@ -57,15 +57,15 @@ append_file (char *file_name)
{
union block *start = find_next_block ();
idx_t bufsize = available_space_after (start);
idx_t status = full_read (handle, start->buffer, bufsize);
idx_t status = full_read (handle, charptr (start), bufsize);
if (status < bufsize && errno)
read_fatal (file_name);
if (status == 0)
break;
idx_t rem = status % BLOCKSIZE;
if (rem)
memset (start->buffer + (status - rem), 0, BLOCKSIZE - rem);
set_next_block_after (start + ((status - 1) >> LG_BLOCKSIZE));
memset (charptr (start) + (status - rem), 0, BLOCKSIZE - rem);
set_next_block_after (charptr (start) + status - 1);
}
if (close (handle) < 0)
@@ -206,7 +206,7 @@ update_archive (void)
reset_eof ();
time_to_start_writing = true;
output_start = current_block->buffer;
output_start = charptr (current_block);
{
struct name const *p;