Do not create empty placeholder files

* src/extract.c (HAVE_BIRTHTIME, BIRTHTIME_EQ, struct delayed_link)
(delayed_link_table, delayed_link_head delayed_link_tail)
(struct string_list, dl_hash, dl_compare, find_direct_ancestor)
(find_delayed_link_source, create_placeholder_file, apply_delayed_link)
(apply_delayed_links): Remove.  All uses removed.
(struct delayed_set_stat): New member metadata_set,
replacing after_links.  All uses changed.
(apply_nonancestor_delayed_set_stat): Arg METADATA_SET replaces
the old AFTER_LINKS.  All callers changed.
(extract_archive): Do not worry about "..", since openat2
now does that for us.
* src/names.c (first_dot_dot): Remove.  All uses removed.
This commit is contained in:
Paul Eggert
2025-11-13 22:59:24 -08:00
parent 75b03fdff4
commit 50b559c3d7
5 changed files with 24 additions and 448 deletions

5
NEWS
View File

@@ -62,6 +62,11 @@ option.
** Sparse files are now read and written with larger blocksizes.
** When extracting, tar no longer creates empty placeholder files
that are later replaced by symbolic links. The placeholders are no
longer needed now that tar no longer follows symbolic links to
targets outside the working directory.
version 1.35 - Sergey Poznyakoff, 2023-07-18

View File

@@ -9549,7 +9549,8 @@ really @file{etc/passwd}.
File names containing @file{..} can cause problems when extracting, so
@command{tar} normally warns you about such files when creating an
archive, and rejects attempts to extracts such files.
archive, and prevents attempts to extract such files if that would
affect files outside the working directory.
Other @command{tar} programs do not do this. As a result, if you
create an archive whose member names start with a slash, they will be
@@ -9565,10 +9566,6 @@ is not, generally speaking, the same as the one you'd get running
scripts for comparing both outputs. @xref{listing member and file names},
for the information on how to handle this case.}.
Symbolic links containing @file{..} or leading @samp{/} can also cause
problems when extracting, so @command{tar} normally extracts them last;
it may create empty files as placeholders during extraction.
If you use the @option{--absolute-names} (@option{-P}) option,
@command{tar} will do none of these transformations.

View File

@@ -826,8 +826,6 @@ bool all_names_found (struct tar_stat_info *st);
void add_avoided_name (char const *name);
bool is_avoided_name (char const *name);
bool contains_dot_dot (char const *name);
COMMON_INLINE bool
isfound (struct name const *c)
{

View File

@@ -47,21 +47,6 @@ static mode_t const all_mode_bits = ~ (mode_t) 0;
# define fchown(fd, uid, gid) (errno = ENOSYS, -1)
#endif
#if (defined HAVE_STRUCT_STAT_ST_BIRTHTIMESPEC_TV_NSEC \
|| defined HAVE_STRUCT_STAT_ST_BIRTHTIM_TV_NSEC \
|| defined HAVE_STRUCT_STAT_ST_BIRTHTIMENSEC \
|| (defined _WIN32 && ! defined __CYGWIN__))
# define HAVE_BIRTHTIME 1
#else
# define HAVE_BIRTHTIME 0
#endif
#if HAVE_BIRTHTIME
# define BIRTHTIME_EQ(a, b) (timespec_cmp (a, b) == 0)
#else
# define BIRTHTIME_EQ(a, b) true
#endif
/* Return true if an error number ERR means the system call is
supported in this case. */
static bool
@@ -73,13 +58,9 @@ implemented (int err)
}
/* List of directories whose statuses we need to extract after we've
finished extracting their subsidiary files. If you consider each
contiguous subsequence of elements of the form [D]?[^D]*, where [D]
represents an element where AFTER_LINKS is nonzero and [^D]
represents an element where AFTER_LINKS is zero, then the head
of the subsequence has the longest name, and each non-head element
in the prefix is an ancestor (in the directory hierarchy) of the
preceding element. */
finished extracting their subsidiary files. The head of the list
has the longest name, and each non-head element is an ancestor (in
the directory hierarchy) of the preceding element. */
struct delayed_set_stat
{
@@ -87,6 +68,7 @@ struct delayed_set_stat
struct delayed_set_stat *next;
/* Metadata for this directory. */
bool metadata_set;
dev_t st_dev;
ino_t st_ino;
mode_t mode; /* The desired mode is MODE & ~ current_umask. */
@@ -112,10 +94,6 @@ struct delayed_set_stat
directory. */
int atflag;
/* Do not set the status of this directory until after delayed
links are created. */
bool after_links;
/* Directory that the name is relative to. */
idx_t change_dir;
@@ -136,90 +114,6 @@ static struct delayed_set_stat *delayed_set_stat_head;
/* Table of delayed stat updates hashed by path; null if none. */
static Hash_table *delayed_set_stat_table;
/* A link whose creation we have delayed. */
struct delayed_link
{
/* The next in a list of delayed links that should be made after
this delayed link. */
struct delayed_link *next;
/* The device, inode number and birthtime of the placeholder.
birthtime.tv_nsec is negative if the birthtime is not available.
Don't use mtime as this would allow for false matches if some
other process removes the placeholder. Don't use ctime as
this would cause race conditions and other screwups, e.g.,
when restoring hard-linked symlinks. */
dev_t st_dev;
ino_t st_ino;
#if HAVE_BIRTHTIME
struct timespec birthtime;
#endif
/* True if the link is symbolic. */
bool is_symlink;
/* The desired metadata, valid only the link is symbolic. */
mode_t mode;
uid_t uid;
gid_t gid;
struct timespec atime;
struct timespec mtime;
/* The directory that the sources and target are relative to. */
idx_t change_dir;
/* A list of sources for this link. The sources are all to be
hard-linked together. */
struct string_list *sources;
/* SELinux context */
char *cntx_name;
/* ACLs */
char *acls_a_ptr;
idx_t acls_a_len;
char *acls_d_ptr;
idx_t acls_d_len;
struct xattr_map xattr_map;
/* The desired target of the desired link. */
char target[FLEXIBLE_ARRAY_MEMBER];
};
/* Table of delayed links hashed by device and inode; null if none. */
static Hash_table *delayed_link_table;
/* A list of the delayed links in tar file order,
and the tail of that list. */
static struct delayed_link *delayed_link_head;
static struct delayed_link **delayed_link_tail = &delayed_link_head;
struct string_list
{
struct string_list *next;
char string[FLEXIBLE_ARRAY_MEMBER];
};
static size_t
dl_hash (void const *entry, size_t table_size)
{
struct delayed_link const *dl = entry;
uintmax_t n = dl->st_dev;
int nshift = TYPE_WIDTH (n) - TYPE_WIDTH (dl->st_dev);
if (0 < nshift)
n <<= nshift;
n ^= dl->st_ino;
return n % table_size;
}
static bool
dl_compare (void const *a, void const *b)
{
struct delayed_link const *da = a, *db = b;
return PSAME_INODE (da, db);
}
static size_t
ds_hash (void const *entry, size_t table_size)
{
@@ -475,27 +369,8 @@ set_stat (char const *file_name,
xattrs_selinux_set (st, file_name, typeflag);
}
/* Find the direct ancestor of FILE_NAME in the delayed_set_stat list.
*/
static struct delayed_set_stat *
find_direct_ancestor (char const *file_name)
{
struct delayed_set_stat *h = delayed_set_stat_head;
while (h)
{
if (! h->after_links
&& strncmp (file_name, h->file_name, h->file_name_len) == 0
&& ISSLASH (file_name[h->file_name_len])
&& (last_component (file_name) == file_name + h->file_name_len + 1))
break;
h = h->next;
}
return h;
}
/* For each entry H in the leading prefix of entries in HEAD that do
not have after_links marked, mark H and fill in its dev and ino
members. Assume HEAD && ! HEAD->after_links. */
/* For each entry H in the entries in HEAD, mark H and fill in its dev
and ino members. Assume HEAD. */
static void
mark_after_links (struct delayed_set_stat *head)
{
@@ -504,7 +379,7 @@ mark_after_links (struct delayed_set_stat *head)
do
{
struct stat st;
h->after_links = 1;
h->metadata_set = true;
if (deref_stat (h->file_name, &st) < 0)
stat_error (h->file_name);
@@ -514,7 +389,7 @@ mark_after_links (struct delayed_set_stat *head)
h->st_ino = st.st_ino;
}
}
while ((h = h->next) && ! h->after_links);
while ((h = h->next) && ! h->metadata_set);
}
/* Remember to restore stat attributes (owner, group, mode and times)
@@ -579,7 +454,7 @@ delay_set_stat (char const *file_name, struct tar_stat_info const *st,
data->file_name = xstrdup (file_name);
if (! hash_insert (delayed_set_stat_table, data))
xalloc_die ();
data->after_links = false;
data->metadata_set = false;
if (st)
{
data->st_dev = st->stat.st_dev;
@@ -1000,12 +875,11 @@ set_xattr (MAYBE_UNUSED char const *file_name,
}
/* Fix the statuses of all directories whose statuses need fixing, and
which are not ancestors of FILE_NAME. If AFTER_LINKS is
nonzero, do this for all such directories; otherwise, stop at the
first directory that is marked to be fixed up only after delayed
links are applied. */
which are not ancestors of FILE_NAME. If METADATA_SET,
do this for all such directories; otherwise, stop at the
first directory with metadata already determined. */
static void
apply_nonancestor_delayed_set_stat (char const *file_name, bool after_links)
apply_nonancestor_delayed_set_stat (char const *file_name, bool metadata_set)
{
idx_t file_name_len = strlen (file_name);
bool check_for_renamed_directories = 0;
@@ -1018,9 +892,9 @@ apply_nonancestor_delayed_set_stat (char const *file_name, bool after_links)
mode_t current_mode = data->current_mode;
mode_t current_mode_mask = data->current_mode_mask;
check_for_renamed_directories |= data->after_links;
check_for_renamed_directories |= data->metadata_set;
if (after_links < data->after_links
if (metadata_set < data->metadata_set
|| (data->file_name_len < file_name_len
&& file_name[data->file_name_len]
&& (ISSLASH (file_name[data->file_name_len])
@@ -1413,143 +1287,6 @@ extract_file (char *file_name, char typeflag)
return status == 0;
}
/* Return true if NAME is a delayed link. This can happen only if the link
placeholder file has been created. Therefore, try to stat the NAME
first. If it doesn't exist, there is no matching entry in the table.
Otherwise, look for the entry in the table that has the matching dev
and ino numbers. Return false if not found.
Do not rely on comparing file names, which may differ for
various reasons (e.g. relative vs. absolute file names).
*/
static bool
find_delayed_link_source (char const *name)
{
struct stat st;
if (!delayed_link_table)
return false;
struct fdbase f = fdbase (name);
if (f.fd == BADFD || fstatat (f.fd, f.base, &st, AT_SYMLINK_NOFOLLOW) < 0)
{
if (errno != ENOENT)
stat_error (name);
return false;
}
struct delayed_link dl;
dl.st_dev = st.st_dev;
dl.st_ino = st.st_ino;
return hash_lookup (delayed_link_table, &dl) != NULL;
}
/* Create a placeholder file with name FILE_NAME, which will be
replaced after other extraction is done by a symbolic link if
IS_SYMLINK is true, and by a hard link otherwise. Set
*INTERDIR_MADE if an intermediate directory is made in the
process.
*/
static bool
create_placeholder_file (char *file_name, bool is_symlink, bool *interdir_made)
{
int fd;
struct stat st;
for (;;)
{
struct fdbase f = fdbase (file_name);
if (f.fd != BADFD)
{
fd = openat (f.fd, f.base, O_WRONLY | O_CREAT | O_EXCL, 0);
if (0 <= fd)
break;
}
if (errno == EEXIST && find_delayed_link_source (file_name))
{
/* The placeholder file has already been created. This means
that the link being extracted is a duplicate of an already
processed one. Skip it.
*/
return true;
}
switch (maybe_recoverable (file_name, false, interdir_made))
{
case RECOVER_OK:
continue;
case RECOVER_SKIP:
return true;
case RECOVER_NO:
open_error (file_name);
return false;
}
}
if (fstat (fd, &st) < 0)
{
stat_error (file_name);
close (fd);
}
else if (close (fd) < 0)
close_error (file_name);
else
{
struct delayed_set_stat *h;
struct delayed_link *p =
xmalloc (FLEXNSIZEOF (struct delayed_link, target,
strlen (current_stat_info.link_name) + 1));
p->next = NULL;
p->st_dev = st.st_dev;
p->st_ino = st.st_ino;
#if HAVE_BIRTHTIME
p->birthtime = get_stat_birthtime (&st);
#endif
p->is_symlink = is_symlink;
if (is_symlink)
{
p->mode = current_stat_info.stat.st_mode;
p->uid = current_stat_info.stat.st_uid;
p->gid = current_stat_info.stat.st_gid;
p->atime = current_stat_info.atime;
p->mtime = current_stat_info.mtime;
}
p->change_dir = chdir_current;
p->sources = xmalloc (FLEXNSIZEOF (struct string_list, string,
strlen (file_name) + 1));
p->sources->next = 0;
strcpy (p->sources->string, file_name);
p->cntx_name = NULL;
assign_string_or_null (&p->cntx_name, current_stat_info.cntx_name);
p->acls_a_ptr = NULL;
p->acls_a_len = 0;
p->acls_d_ptr = NULL;
p->acls_d_len = 0;
xattr_map_init (&p->xattr_map);
xattr_map_copy (&p->xattr_map, &current_stat_info.xattr_map);
strcpy (p->target, current_stat_info.link_name);
*delayed_link_tail = p;
delayed_link_tail = &p->next;
if (! ((delayed_link_table
|| (delayed_link_table = hash_initialize (0, 0, dl_hash,
dl_compare, free)))
&& hash_insert (delayed_link_table, p)))
xalloc_die ();
if ((h = find_direct_ancestor (file_name)) != NULL)
mark_after_links (h);
return true;
}
return false;
}
static bool
extract_link (char *file_name, MAYBE_UNUSED char typeflag)
{
@@ -1559,10 +1296,6 @@ extract_link (char *file_name, MAYBE_UNUSED char typeflag)
link_name = current_stat_info.link_name;
if ((! absolute_names_option && contains_dot_dot (link_name))
|| find_delayed_link_source (link_name))
return create_placeholder_file (file_name, false, &interdir_made);
do
{
struct stat st, st1;
@@ -1579,28 +1312,7 @@ extract_link (char *file_name, MAYBE_UNUSED char typeflag)
}
if (status == 0)
{
if (delayed_link_table
&& fstatat (f1.fd, f1.base, &st1, AT_SYMLINK_NOFOLLOW) == 0)
{
struct delayed_link dl1;
dl1.st_ino = st1.st_ino;
dl1.st_dev = st1.st_dev;
struct delayed_link *ds = hash_lookup (delayed_link_table, &dl1);
if (ds && ds->change_dir == chdir_current
&& BIRTHTIME_EQ (ds->birthtime, get_stat_birthtime (&st1)))
{
struct string_list *p
= xmalloc (FLEXNSIZEOF (struct string_list,
string, strlen (file_name) + 1));
strcpy (p->string, file_name);
p->next = ds->sources;
ds->sources = p;
}
}
return true;
}
return true;
int e = errno;
if ((e == EEXIST && streq (link_name, file_name))
@@ -1629,11 +1341,6 @@ extract_symlink (char *file_name, MAYBE_UNUSED char typeflag)
{
bool interdir_made = false;
if (! absolute_names_option
&& (IS_ABSOLUTE_FILE_NAME (current_stat_info.link_name)
|| contains_dot_dot (current_stat_info.link_name)))
return create_placeholder_file (file_name, true, &interdir_made);
for (struct fdbase f;
((f = fdbase (file_name)).fd == BADFD
|| symlinkat (current_stat_info.link_name, f.fd, f.base) < 0);
@@ -1857,20 +1564,12 @@ void
extract_archive (void)
{
char typeflag;
bool skip_dotdot_name;
fatal_exit_hook = extract_finish;
set_next_block_after (current_header);
skip_dotdot_name = (!absolute_names_option
&& contains_dot_dot (current_stat_info.orig_file_name));
if (skip_dotdot_name)
paxerror (0, _("%s: Member name contains '..'"),
quotearg_colon (current_stat_info.orig_file_name));
if (!current_stat_info.file_name[0]
|| skip_dotdot_name
|| (interactive_option
&& !confirm ("extract", current_stat_info.file_name)))
{
@@ -1922,115 +1621,11 @@ extract_archive (void)
undo_last_backup ();
}
/* Extract the link DS whose final extraction was delayed. */
static void
apply_delayed_link (struct delayed_link *ds)
{
struct string_list *sources = ds->sources;
char const *valid_source = NULL;
chdir_do (ds->change_dir);
for (sources = ds->sources; sources; sources = sources->next)
{
char const *source = sources->string;
struct stat st;
/* Make sure the placeholder file is still there. If not,
don't create a link, as the placeholder was probably
removed by a later extraction. */
struct fdbase f = fdbase (source);
if (f.fd != BADFD && fstatat (f.fd, f.base, &st, AT_SYMLINK_NOFOLLOW) == 0
&& SAME_INODE (st, *ds)
&& BIRTHTIME_EQ (get_stat_birthtime (&st), ds->birthtime))
{
/* Unlink the placeholder, then create a hard link if possible,
a symbolic link otherwise. */
struct fdbase f1;
if (unlinkat (f.fd, f.base, 0) < 0)
unlink_error (source);
else if (valid_source
&& ((f1 = f.fd == BADFD ? f : fdbase1 (valid_source)).fd
!= BADFD)
&& linkat (f1.fd, f1.base, f.fd, f.base, 0) == 0)
;
else if (!ds->is_symlink)
{
f1 = f.fd == BADFD ? f : fdbase1 (ds->target);
if (f1.fd == BADFD
|| linkat (f1.fd, f1.base, f.fd, f.base, 0) < 0)
link_error (ds->target, source);
}
else if (symlinkat (ds->target, f.fd, f.base) < 0)
symlink_error (ds->target, source);
else
{
struct tar_stat_info st1;
st1.stat.st_mode = ds->mode;
st1.stat.st_uid = ds->uid;
st1.stat.st_gid = ds->gid;
st1.atime = ds->atime;
st1.mtime = ds->mtime;
st1.cntx_name = ds->cntx_name;
st1.acls_a_ptr = ds->acls_a_ptr;
st1.acls_a_len = ds->acls_a_len;
st1.acls_d_ptr = ds->acls_d_ptr;
st1.acls_d_len = ds->acls_d_len;
st1.xattr_map = ds->xattr_map;
set_stat (source, &st1, -1, 0, 0, SYMTYPE,
false, AT_SYMLINK_NOFOLLOW);
valid_source = source;
}
}
}
/* There is little point to freeing, as we are about to exit,
and freeing is more likely to cause than cure trouble. */
if (false)
{
for (sources = ds->sources; sources; )
{
struct string_list *next = sources->next;
free (sources);
sources = next;
}
xattr_map_free (&ds->xattr_map);
free (ds->cntx_name);
}
}
/* Extract the links whose final extraction were delayed. */
static void
apply_delayed_links (void)
{
for (struct delayed_link *ds = delayed_link_head; ds; ds = ds->next)
apply_delayed_link (ds);
if (false && delayed_link_table)
{
/* There is little point to freeing, as we are about to exit,
and freeing is more likely to cause than cure trouble.
Also, the above code has not bothered to free the list
in delayed_link_head. */
hash_free (delayed_link_table);
delayed_link_table = NULL;
}
}
/* Finish the extraction of an archive. */
void
extract_finish (void)
{
/* First, fix the status of ordinary directories that need fixing. */
apply_nonancestor_delayed_set_stat ("", false);
/* Then, apply delayed links, so that they don't affect delayed
directory status-setting for ordinary directories. */
apply_delayed_links ();
/* Finally, fix the status of directories that are ancestors
of delayed links. */
/* Fix the status of ordinary directories that need fixing. */
apply_nonancestor_delayed_set_stat ("", true);
/* This table should be empty after apply_nonancestor_delayed_set_stat. */

View File

@@ -1989,22 +1989,3 @@ stripped_prefix_len (char const *file_name, idx_t num)
}
return -1;
}
/* Return nonzero if NAME contains ".." as a file name component. */
bool
contains_dot_dot (char const *name)
{
char const *p = name + FILE_SYSTEM_PREFIX_LEN (name);
for (;; p++)
{
if (p[0] == '.' && p[1] == '.' && (ISSLASH (p[2]) || !p[2]))
return 1;
while (! ISSLASH (*p))
{
if (! *p++)
return 0;
}
}
}