Avoid quadratic behavior with delayed links
Do this by searching a hash table instead of a linked list. Problem reported by Martin Dørum in https://mort.coffee/home/tar/ via Gavin Smith in: https://lists.gnu.org/r/bug-tar/2022-07/msg00003.html * src/extract.c: Include hash.h. Improve performance a bit on non-birthtime hosts (struct delayed_link.has_predecessor): New member. (delayed_link_head): Remove, replacing with ... (delayed_link_table): ... this new variable. All uses of linked list replaced with hash table. (dl_hash, dl_compare): New functions for hash table. (create_placeholder_file): Initialize has_predecessor. (apply_delayed_link): New function, with body taken from most of the old apply_delayed_link. (apply_delayed_links): Use it. Respect has_predecessor. Don’t bother freeing as we are about to exit.
This commit is contained in:
247
src/extract.c
247
src/extract.c
@@ -22,6 +22,7 @@
|
||||
#include <system.h>
|
||||
#include <quotearg.h>
|
||||
#include <errno.h>
|
||||
#include <hash.h>
|
||||
#include <priv-set.h>
|
||||
#include <root-uid.h>
|
||||
#include <utimens.h>
|
||||
@@ -128,12 +129,16 @@ struct delayed_set_stat
|
||||
|
||||
static struct delayed_set_stat *delayed_set_stat_head;
|
||||
|
||||
/* List of links whose creation we have delayed. */
|
||||
/* A link whose creation we have delayed. */
|
||||
struct delayed_link
|
||||
{
|
||||
/* The next delayed link in the list. */
|
||||
/* The next in a list of delayed links that should be made after
|
||||
this delayed link. */
|
||||
struct delayed_link *next;
|
||||
|
||||
/* Whether this delayed link has a predecessor in the NEXT list. */
|
||||
bool has_predecessor;
|
||||
|
||||
/* The device, inode number and birthtime of the placeholder.
|
||||
birthtime.tv_nsec is negative if the birthtime is not available.
|
||||
Don't use mtime as this would allow for false matches if some
|
||||
@@ -178,7 +183,7 @@ struct delayed_link
|
||||
char target[1];
|
||||
};
|
||||
|
||||
static struct delayed_link *delayed_link_head;
|
||||
static Hash_table *delayed_link_table;
|
||||
|
||||
struct string_list
|
||||
{
|
||||
@@ -186,6 +191,25 @@ struct string_list
|
||||
char string[1];
|
||||
};
|
||||
|
||||
static size_t
|
||||
dl_hash (void const *entry, size_t table_size)
|
||||
{
|
||||
struct delayed_link const *dl = entry;
|
||||
uintmax_t n = dl->dev;
|
||||
int nshift = TYPE_WIDTH (n) - TYPE_WIDTH (dl->dev);
|
||||
if (0 < nshift)
|
||||
n <<= nshift;
|
||||
n ^= dl->ino;
|
||||
return n % table_size;
|
||||
}
|
||||
|
||||
static bool
|
||||
dl_compare (void const *a, void const *b)
|
||||
{
|
||||
struct delayed_link const *da = a, *db = b;
|
||||
return (da->dev == db->dev) & (da->ino == db->ino);
|
||||
}
|
||||
|
||||
/* Set up to extract files. */
|
||||
void
|
||||
extr_init (void)
|
||||
@@ -1349,23 +1373,21 @@ extract_file (char *file_name, int typeflag)
|
||||
}
|
||||
|
||||
/* Find a delayed_link structure corresponding to the source NAME.
|
||||
Such a structure exists in the delayed link list only if the link
|
||||
Such a structure exists in the delayed link table only if the link
|
||||
placeholder file has been created. Therefore, try to stat the NAME
|
||||
first. If it doesn't exist, there is no matching entry in the list.
|
||||
Otherwise, look for the entry in list which has the matching dev
|
||||
and ino numbers.
|
||||
first. If it doesn't exist, there is no matching entry in the table.
|
||||
Otherwise, look for the entry in the table that has the matching dev
|
||||
and ino numbers. Return a null pointer if not found.
|
||||
|
||||
This approach avoids scanning the singly-linked list in obvious cases
|
||||
and does not rely on comparing file names, which may differ for
|
||||
Do not rely on comparing file names, which may differ for
|
||||
various reasons (e.g. relative vs. absolute file names).
|
||||
*/
|
||||
static struct delayed_link *
|
||||
find_delayed_link_source (char const *name)
|
||||
{
|
||||
struct delayed_link *dl;
|
||||
struct stat st;
|
||||
|
||||
if (!delayed_link_head)
|
||||
if (!delayed_link_table)
|
||||
return NULL;
|
||||
|
||||
if (fstatat (chdir_fd, name, &st, AT_SYMLINK_NOFOLLOW))
|
||||
@@ -1375,12 +1397,10 @@ find_delayed_link_source (char const *name)
|
||||
return NULL;
|
||||
}
|
||||
|
||||
for (dl = delayed_link_head; dl; dl = dl->next)
|
||||
{
|
||||
if (dl->dev == st.st_dev && dl->ino == st.st_ino)
|
||||
break;
|
||||
}
|
||||
return dl;
|
||||
struct delayed_link dl;
|
||||
dl.dev = st.st_dev;
|
||||
dl.ino = st.st_ino;
|
||||
return hash_lookup (delayed_link_table, &dl);
|
||||
}
|
||||
|
||||
/* Create a placeholder file with name FILE_NAME, which will be
|
||||
@@ -1388,9 +1408,7 @@ find_delayed_link_source (char const *name)
|
||||
IS_SYMLINK is true, and by a hard link otherwise. Set
|
||||
*INTERDIR_MADE if an intermediate directory is made in the
|
||||
process.
|
||||
Install the created struct delayed_link after PREV, unless the
|
||||
latter is NULL, in which case insert it at the head of the delayed
|
||||
link list.
|
||||
If PREV, install the created struct delayed_link after PREV.
|
||||
*/
|
||||
|
||||
static int
|
||||
@@ -1443,12 +1461,14 @@ create_placeholder_file (char *file_name, bool is_symlink, bool *interdir_made,
|
||||
{
|
||||
p->next = prev->next;
|
||||
prev->next = p;
|
||||
p->has_predecessor = true;
|
||||
}
|
||||
else
|
||||
{
|
||||
p->next = delayed_link_head;
|
||||
delayed_link_head = p;
|
||||
p->next = NULL;
|
||||
p->has_predecessor = false;
|
||||
}
|
||||
|
||||
p->dev = st.st_dev;
|
||||
p->ino = st.st_ino;
|
||||
#if HAVE_BIRTHTIME
|
||||
@@ -1478,6 +1498,12 @@ create_placeholder_file (char *file_name, bool is_symlink, bool *interdir_made,
|
||||
xattr_map_copy (&p->xattr_map, ¤t_stat_info.xattr_map);
|
||||
strcpy (p->target, current_stat_info.link_name);
|
||||
|
||||
if (! ((delayed_link_table
|
||||
|| (delayed_link_table = hash_initialize (0, 0, dl_hash,
|
||||
dl_compare, free)))
|
||||
&& hash_insert (delayed_link_table, p)))
|
||||
xalloc_die ();
|
||||
|
||||
if ((h = find_direct_ancestor (file_name)) != NULL)
|
||||
mark_after_links (h);
|
||||
|
||||
@@ -1512,13 +1538,14 @@ extract_link (char *file_name, int typeflag)
|
||||
|
||||
if (status == 0)
|
||||
{
|
||||
struct delayed_link *ds = delayed_link_head;
|
||||
if (ds
|
||||
if (delayed_link_table
|
||||
&& fstatat (chdir_fd, link_name, &st1, AT_SYMLINK_NOFOLLOW) == 0)
|
||||
for (; ds; ds = ds->next)
|
||||
if (ds->change_dir == chdir_current
|
||||
&& ds->dev == st1.st_dev
|
||||
&& ds->ino == st1.st_ino
|
||||
{
|
||||
struct delayed_link dl1;
|
||||
dl1.ino = st1.st_ino;
|
||||
dl1.dev = st1.st_dev;
|
||||
struct delayed_link *ds = hash_lookup (delayed_link_table, &dl1);
|
||||
if (ds && ds->change_dir == chdir_current
|
||||
&& BIRTHTIME_EQ (ds->birthtime, get_stat_birthtime (&st1)))
|
||||
{
|
||||
struct string_list *p = xmalloc (offsetof (struct string_list, string)
|
||||
@@ -1526,8 +1553,9 @@ extract_link (char *file_name, int typeflag)
|
||||
strcpy (p->string, file_name);
|
||||
p->next = ds->sources;
|
||||
ds->sources = p;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
else if ((e == EEXIST && strcmp (link_name, file_name) == 0)
|
||||
@@ -1853,86 +1881,103 @@ extract_archive (void)
|
||||
undo_last_backup ();
|
||||
}
|
||||
|
||||
/* Extract the link DS whose final extraction was delayed. */
|
||||
static void
|
||||
apply_delayed_link (struct delayed_link *ds)
|
||||
{
|
||||
struct string_list *sources = ds->sources;
|
||||
char const *valid_source = 0;
|
||||
|
||||
chdir_do (ds->change_dir);
|
||||
|
||||
for (sources = ds->sources; sources; sources = sources->next)
|
||||
{
|
||||
char const *source = sources->string;
|
||||
struct stat st;
|
||||
|
||||
/* Make sure the placeholder file is still there. If not,
|
||||
don't create a link, as the placeholder was probably
|
||||
removed by a later extraction. */
|
||||
if (fstatat (chdir_fd, source, &st, AT_SYMLINK_NOFOLLOW) == 0
|
||||
&& st.st_dev == ds->dev
|
||||
&& st.st_ino == ds->ino
|
||||
&& BIRTHTIME_EQ (get_stat_birthtime (&st), ds->birthtime))
|
||||
{
|
||||
/* Unlink the placeholder, then create a hard link if possible,
|
||||
a symbolic link otherwise. */
|
||||
if (unlinkat (chdir_fd, source, 0) != 0)
|
||||
unlink_error (source);
|
||||
else if (valid_source
|
||||
&& (linkat (chdir_fd, valid_source, chdir_fd, source, 0)
|
||||
== 0))
|
||||
;
|
||||
else if (!ds->is_symlink)
|
||||
{
|
||||
if (linkat (chdir_fd, ds->target, chdir_fd, source, 0) != 0)
|
||||
link_error (ds->target, source);
|
||||
}
|
||||
else if (symlinkat (ds->target, chdir_fd, source) != 0)
|
||||
symlink_error (ds->target, source);
|
||||
else
|
||||
{
|
||||
struct tar_stat_info st1;
|
||||
st1.stat.st_mode = ds->mode;
|
||||
st1.stat.st_uid = ds->uid;
|
||||
st1.stat.st_gid = ds->gid;
|
||||
st1.atime = ds->atime;
|
||||
st1.mtime = ds->mtime;
|
||||
st1.cntx_name = ds->cntx_name;
|
||||
st1.acls_a_ptr = ds->acls_a_ptr;
|
||||
st1.acls_a_len = ds->acls_a_len;
|
||||
st1.acls_d_ptr = ds->acls_d_ptr;
|
||||
st1.acls_d_len = ds->acls_d_len;
|
||||
st1.xattr_map = ds->xattr_map;
|
||||
set_stat (source, &st1, -1, 0, 0, SYMTYPE,
|
||||
false, AT_SYMLINK_NOFOLLOW);
|
||||
valid_source = source;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (sources = ds->sources; sources; )
|
||||
{
|
||||
struct string_list *next = sources->next;
|
||||
free (sources);
|
||||
sources = next;
|
||||
}
|
||||
|
||||
xattr_map_free (&ds->xattr_map);
|
||||
free (ds->cntx_name);
|
||||
}
|
||||
|
||||
/* Extract the links whose final extraction were delayed. */
|
||||
static void
|
||||
apply_delayed_links (void)
|
||||
{
|
||||
struct delayed_link *ds;
|
||||
|
||||
for (ds = delayed_link_head; ds; )
|
||||
{
|
||||
struct string_list *sources = ds->sources;
|
||||
char const *valid_source = 0;
|
||||
|
||||
chdir_do (ds->change_dir);
|
||||
|
||||
for (sources = ds->sources; sources; sources = sources->next)
|
||||
{
|
||||
char const *source = sources->string;
|
||||
struct stat st;
|
||||
|
||||
/* Make sure the placeholder file is still there. If not,
|
||||
don't create a link, as the placeholder was probably
|
||||
removed by a later extraction. */
|
||||
if (fstatat (chdir_fd, source, &st, AT_SYMLINK_NOFOLLOW) == 0
|
||||
&& st.st_dev == ds->dev
|
||||
&& st.st_ino == ds->ino
|
||||
&& BIRTHTIME_EQ (get_stat_birthtime (&st), ds->birthtime))
|
||||
{
|
||||
/* Unlink the placeholder, then create a hard link if possible,
|
||||
a symbolic link otherwise. */
|
||||
if (unlinkat (chdir_fd, source, 0) != 0)
|
||||
unlink_error (source);
|
||||
else if (valid_source
|
||||
&& (linkat (chdir_fd, valid_source, chdir_fd, source, 0)
|
||||
== 0))
|
||||
;
|
||||
else if (!ds->is_symlink)
|
||||
{
|
||||
if (linkat (chdir_fd, ds->target, chdir_fd, source, 0) != 0)
|
||||
link_error (ds->target, source);
|
||||
}
|
||||
else if (symlinkat (ds->target, chdir_fd, source) != 0)
|
||||
symlink_error (ds->target, source);
|
||||
else
|
||||
{
|
||||
struct tar_stat_info st1;
|
||||
st1.stat.st_mode = ds->mode;
|
||||
st1.stat.st_uid = ds->uid;
|
||||
st1.stat.st_gid = ds->gid;
|
||||
st1.atime = ds->atime;
|
||||
st1.mtime = ds->mtime;
|
||||
st1.cntx_name = ds->cntx_name;
|
||||
st1.acls_a_ptr = ds->acls_a_ptr;
|
||||
st1.acls_a_len = ds->acls_a_len;
|
||||
st1.acls_d_ptr = ds->acls_d_ptr;
|
||||
st1.acls_d_len = ds->acls_d_len;
|
||||
st1.xattr_map = ds->xattr_map;
|
||||
set_stat (source, &st1, -1, 0, 0, SYMTYPE,
|
||||
false, AT_SYMLINK_NOFOLLOW);
|
||||
valid_source = source;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (sources = ds->sources; sources; )
|
||||
{
|
||||
struct string_list *next = sources->next;
|
||||
free (sources);
|
||||
sources = next;
|
||||
}
|
||||
|
||||
xattr_map_free (&ds->xattr_map);
|
||||
free (ds->cntx_name);
|
||||
if (!delayed_link_table)
|
||||
return;
|
||||
|
||||
for (struct delayed_link *dl = hash_get_first (delayed_link_table);
|
||||
dl;
|
||||
dl = dl->next ? dl->next : hash_get_next (delayed_link_table, dl))
|
||||
if (!dl->has_predecessor)
|
||||
{
|
||||
struct delayed_link *next = ds->next;
|
||||
free (ds);
|
||||
ds = next;
|
||||
struct delayed_link *ds = dl;
|
||||
do
|
||||
{
|
||||
apply_delayed_link (ds);
|
||||
ds = ds->next;
|
||||
}
|
||||
while (ds);
|
||||
}
|
||||
}
|
||||
|
||||
delayed_link_head = 0;
|
||||
if (false)
|
||||
{
|
||||
/* There is little point to freeing, as we are about to exit,
|
||||
and freeing is more likely to cause than cure trouble. */
|
||||
hash_free (delayed_link_table);
|
||||
delayed_link_table = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/* Finish the extraction of an archive. */
|
||||
|
||||
Reference in New Issue
Block a user