Reduce memory consuption when handling the -T option.

The commit cdb27293 made the -T option more flexible, but
incurred a very considerable memory overhead by storing
all file names in the argument array.  In case of very
big file lists this caused tar to run out of memory.  This
was reported by Christian Wetzel <wetzel@phoenix-pacs.de>
on March 14, 2013
(http://lists.gnu.org/archive/html/bug-tar/2013-03/msg00018.html).

On the other hand, Michal Žeidl discovered that tar misfunctioned
when given empty file lists or lists with the trailing newline
misssing in the last entry.  This was reported by Pavel Raiskup
on July 23
(http://lists.gnu.org/archive/html/bug-tar/2013-07/msg00009.html and
msg00010.html).

This change fixes both issues.

* src/common.h (name_add_file,request_stdin): New prototype.
(more_options): New prototype.
* src/names.c (NELT_FILE): New entry type.
(name_elt) <file>: New union member.
(name_add_file): New function.
(read_name_from_file): New function, a rewrite of
the same function from tar.c
(read_next_name,copy_name): New static functions.
(name_next_elt): Handle NELT_FILE entries.
* src/tar.c (request_stdin): Make extern.
(read_name_from_file,add_file_id)
(update_argv): Removed.
(parse_opt): Change handling of the -T option.
(more_options): New function.

* tests/T-null.at: Rewrite test.
* tests/T-zfile.at: New file.
* tests/T-nonl.at: New file.
* tests/Makefile.am: Add new testcases.
* tests/testsuite.at: Likewise.

* THANKS: Update.
This commit is contained in:
Sergey Poznyakoff
2013-08-04 14:26:35 +03:00
parent 8a834dfa10
commit 26538c9bfc
10 changed files with 367 additions and 227 deletions

2
THANKS
View File

@@ -91,6 +91,7 @@ Christian Kirsch ck@held.mind.de
Christian Laubscher christian.laubscher@tiscalinet.ch
Christian T. Dum ctd@mpe-garching.mpg.de
Christian von Roques roques@pond.sub.org
Christian Wetzel wetzel@phoenix-pacs.de
Christoph Litauer litauer@mailhost.uni-koblenz.de
Christophe Colle colle@krtkg1.rug.ac.be
Christophe Kalt Christophe.Kalt@kbcfp.com
@@ -350,6 +351,7 @@ Michael P Urban urban@cobra.jpl.nasa.gov
Michael Schmidt michael@muc.de
Michael Schwingen m.schwingen@stochastik.rwth-aachen.de
Michael Smolsky fnsiguc@astro.weizmann.ac.il
Michal Žejdl zejdl@suas.cz
Mike Muuss mike@brl.mil
Mike Nolan nolan@lpl.arizona.edu
Mike Rogers mike@demon.net

View File

@@ -704,6 +704,7 @@ int uname_to_uid (char const *uname, uid_t *puid);
void name_init (void);
void name_add_name (const char *name, int matching_flags);
void name_add_dir (const char *name);
void name_add_file (const char *name, int term);
void name_term (void);
const char *name_next (int change_dirs);
void name_gather (void);
@@ -748,6 +749,9 @@ const char *archive_format_string (enum archive_format fmt);
const char *subcommand_string (enum subcommand c);
void set_exit_status (int val);
void request_stdin (const char *option);
void more_options (int argc, char **argv);
/* Module update.c. */
extern char *output_start;

View File

@@ -21,6 +21,7 @@
#include <fnmatch.h>
#include <hash.h>
#include <quotearg.h>
#include <wordsplit.h>
#include "common.h"
@@ -211,7 +212,8 @@ static struct name *nametail; /* end of name list */
#define NELT_NAME 0 /* File name */
#define NELT_CHDIR 1 /* Change directory request */
#define NELT_FMASK 2 /* Change fnmatch options request */
#define NELT_FILE 3 /* Read file names from that file */
struct name_elt /* A name_array element. */
{
char type; /* Element type, see NELT_* constants above */
@@ -219,6 +221,12 @@ struct name_elt /* A name_array element. */
{
const char *name; /* File or directory name */
int matching_flags;/* fnmatch options if type == NELT_FMASK */
struct
{
const char *name;/* File name */
int term; /* File name terminator in the list */
FILE *fp;
} file;
} v;
};
@@ -274,6 +282,16 @@ name_add_dir (const char *name)
ep->v.name = name;
}
void
name_add_file (const char *name, int term)
{
struct name_elt *ep;
check_name_alloc ();
ep = &name_array[entries++];
ep->type = NELT_FILE;
ep->v.file.name = name;
ep->v.file.term = term;
}
/* Names from external name file. */
@@ -295,7 +313,185 @@ name_term (void)
free (name_buffer);
free (name_array);
}
/* Prevent recursive inclusion of the same file */
struct file_id_list
{
struct file_id_list *next;
ino_t ino;
dev_t dev;
};
static struct file_id_list *file_id_list;
static void
add_file_id (const char *filename)
{
struct file_id_list *p;
struct stat st;
if (stat (filename, &st))
stat_fatal (filename);
for (p = file_id_list; p; p = p->next)
if (p->ino == st.st_ino && p->dev == st.st_dev)
{
FATAL_ERROR ((0, 0, _("%s: file list already read"),
quotearg_colon (filename)));
}
p = xmalloc (sizeof *p);
p->next = file_id_list;
p->ino = st.st_ino;
p->dev = st.st_dev;
file_id_list = p;
}
enum read_file_list_state /* Result of reading file name from the list file */
{
file_list_success, /* OK, name read successfully */
file_list_end, /* End of list file */
file_list_zero, /* Zero separator encountered where it should not */
file_list_skip /* Empty (zero-length) entry encountered, skip it */
};
/* Read from FP a sequence of characters up to TERM and put them
into STK.
*/
static enum read_file_list_state
read_name_from_file (struct name_elt *ent)
{
int c;
size_t counter = 0;
FILE *fp = ent->v.file.fp;
int term = ent->v.file.term;
size_t count;
for (c = getc (fp); c != EOF && c != term; c = getc (fp))
{
if (count == name_buffer_length)
name_buffer = x2realloc (name_buffer, &name_buffer_length);
name_buffer[counter++] = c;
if (c == 0)
{
/* We have read a zero separator. The file possibly is
zero-separated */
return file_list_zero;
}
}
if (counter == 0 && c != EOF)
return file_list_skip;
if (count == name_buffer_length)
name_buffer = x2realloc (name_buffer, &name_buffer_length);
name_buffer[counter] = 0;
return (counter == 0 && c == EOF) ? file_list_end : file_list_success;
}
static int
handle_option (const char *str)
{
struct wordsplit ws;
int i;
while (*str && isspace (*str))
;
if (*str != '-')
return 1;
ws.ws_offs = 1;
if (wordsplit (str, &ws, WRDSF_DEFFLAGS|WRDSF_DOOFFS))
FATAL_ERROR ((0, 0, _("cannot split string '%s': %s"),
str, wordsplit_strerror (&ws)));
ws.ws_wordv[0] = "tar";
more_options (ws.ws_wordc+ws.ws_offs, ws.ws_wordv);
for (i = 0; i < ws.ws_wordc+ws.ws_offs; i++)
ws.ws_wordv[i] = NULL;
wordsplit_free (&ws);
return 0;
}
static int
read_next_name (struct name_elt *ent, struct name_elt *ret)
{
enum read_file_list_state read_state;
if (!ent->v.file.fp)
{
if (!strcmp (ent->v.file.name, "-"))
{
request_stdin ("-T");
ent->v.file.fp = stdin;
}
else
{
add_file_id (ent->v.file.name);
if ((ent->v.file.fp = fopen (ent->v.file.name, "r")) == NULL)
open_fatal (ent->v.file.name);
}
}
while (1)
{
switch (read_name_from_file (ent))
{
case file_list_skip:
continue;
case file_list_zero:
WARNOPT (WARN_FILENAME_WITH_NULS,
(0, 0, N_("%s: file name read contains nul character"),
quotearg_colon (ent->v.file.name)));
ent->v.file.term = 0;
/* fall through */
case file_list_success:
if (handle_option (name_buffer) == 0)
continue;
ret->type = NELT_NAME;
ret->v.name = name_buffer;
return 0;
case file_list_end:
if (strcmp (ent->v.file.name, "-"))
fclose (ent->v.file.fp);
ent->v.file.fp = NULL;
return 1;
}
}
}
static void
copy_name (struct name_elt *ep)
{
const char *source;
size_t source_len;
char *cursor;
source = ep->v.name;
source_len = strlen (source);
if (name_buffer_length < source_len)
{
do
{
name_buffer_length *= 2;
if (! name_buffer_length)
xalloc_die ();
}
while (name_buffer_length < source_len);
free (name_buffer);
name_buffer = xmalloc(name_buffer_length + 2);
}
strcpy (name_buffer, source);
/* Zap trailing slashes. */
cursor = name_buffer + strlen (name_buffer) - 1;
while (cursor > name_buffer && ISSLASH (*cursor))
*cursor-- = '\0';
}
static int matching_flags; /* exclude_fnmatch options */
/* Get the next NELT_NAME element from name_array. Result is in
@@ -310,51 +506,40 @@ static struct name_elt *
name_next_elt (int change_dirs)
{
static struct name_elt entry;
const char *source;
char *cursor;
while (scanned != entries)
{
struct name_elt *ep;
size_t source_len;
ep = &name_array[scanned++];
ep = &name_array[scanned];
if (ep->type == NELT_FMASK)
{
matching_flags = ep->v.matching_flags;
++scanned;
continue;
}
source = ep->v.name;
source_len = strlen (source);
if (name_buffer_length < source_len)
switch (ep->type)
{
do
case NELT_FILE:
if (read_next_name (ep, &entry) == 0)
return &entry;
++scanned;
continue;
case NELT_CHDIR:
if (change_dirs)
{
name_buffer_length *= 2;
if (! name_buffer_length)
xalloc_die ();
++scanned;
copy_name (ep);
if (chdir (name_buffer) < 0)
chdir_fatal (name_buffer);
break;
}
while (name_buffer_length < source_len);
free (name_buffer);
name_buffer = xmalloc (name_buffer_length + 2);
}
strcpy (name_buffer, source);
/* Zap trailing slashes. */
cursor = name_buffer + strlen (name_buffer) - 1;
while (cursor > name_buffer && ISSLASH (*cursor))
*cursor-- = '\0';
if (change_dirs && ep->type == NELT_CHDIR)
{
if (chdir (name_buffer) < 0)
chdir_fatal (name_buffer);
}
else
{
/* fall trhough */
case NELT_NAME:
++scanned;
copy_name (ep);
if (unquote_option)
unquote_string (name_buffer);
entry.type = ep->type;

191
src/tar.c
View File

@@ -79,7 +79,7 @@ static size_t allocated_archive_names;
static const char *stdin_used_by;
/* Doesn't return if stdin already requested. */
static void
void
request_stdin (const char *option)
{
if (stdin_used_by)
@@ -1106,84 +1106,9 @@ report_textual_dates (struct tar_args *args)
}
}
/* Either NL or NUL, as decided by the --null option. */
static char filename_terminator;
enum read_file_list_state /* Result of reading file name from the list file */
{
file_list_success, /* OK, name read successfully */
file_list_end, /* End of list file */
file_list_zero, /* Zero separator encountered where it should not */
file_list_skip /* Empty (zero-length) entry encountered, skip it */
};
/* Read from FP a sequence of characters up to TERM and put them
into STK.
*/
static enum read_file_list_state
read_name_from_file (FILE *fp, struct obstack *stk, int term)
{
int c;
size_t counter = 0;
for (c = getc (fp); c != EOF && c != term; c = getc (fp))
{
if (c == 0)
{
/* We have read a zero separator. The file possibly is
zero-separated */
return file_list_zero;
}
obstack_1grow (stk, c);
counter++;
}
if (counter == 0 && c != EOF)
return file_list_skip;
obstack_1grow (stk, 0);
return (counter == 0 && c == EOF) ? file_list_end : file_list_success;
}
static bool files_from_option; /* When set, tar will not refuse to create
empty archives */
static struct obstack argv_stk; /* Storage for additional command line options
read using -T option */
/* Prevent recursive inclusion of the same file */
struct file_id_list
{
struct file_id_list *next;
ino_t ino;
dev_t dev;
};
static struct file_id_list *file_id_list;
static void
add_file_id (const char *filename)
{
struct file_id_list *p;
struct stat st;
if (stat (filename, &st))
stat_fatal (filename);
for (p = file_id_list; p; p = p->next)
if (p->ino == st.st_ino && p->dev == st.st_dev)
{
FATAL_ERROR ((0, 0, _("%s: file list already read"),
quotearg_colon (filename)));
}
p = xmalloc (sizeof *p);
p->next = file_id_list;
p->ino = st.st_ino;
p->dev = st.st_dev;
file_id_list = p;
}
/* Default density numbers for [0-9][lmh] device specifications */
@@ -1201,101 +1126,6 @@ add_file_id (const char *filename)
# endif
#endif
static void
update_argv (const char *filename, struct argp_state *state)
{
FILE *fp;
size_t count = 0, i;
char *start, *p;
char **new_argv;
size_t new_argc;
bool is_stdin = false;
enum read_file_list_state read_state;
int term = filename_terminator;
if (!strcmp (filename, "-"))
{
is_stdin = true;
request_stdin ("-T");
fp = stdin;
}
else
{
add_file_id (filename);
if ((fp = fopen (filename, "r")) == NULL)
open_fatal (filename);
}
while ((read_state = read_name_from_file (fp, &argv_stk, term))
!= file_list_end)
{
switch (read_state)
{
case file_list_success:
count++;
break;
case file_list_end: /* won't happen, just to pacify gcc */
break;
case file_list_zero:
{
size_t size;
WARNOPT (WARN_FILENAME_WITH_NULS,
(0, 0, N_("%s: file name read contains nul character"),
quotearg_colon (filename)));
/* Prepare new stack contents */
size = obstack_object_size (&argv_stk);
p = obstack_finish (&argv_stk);
for (; size > 0; size--, p++)
if (*p)
obstack_1grow (&argv_stk, *p);
else
obstack_1grow (&argv_stk, '\n');
obstack_1grow (&argv_stk, 0);
count = 1;
/* Read rest of files using new filename terminator */
term = 0;
break;
}
case file_list_skip:
break;
}
}
if (!is_stdin)
fclose (fp);
if (count == 0)
return;
start = obstack_finish (&argv_stk);
if (term == 0)
for (p = start; *p; p += strlen (p) + 1)
if (p[0] == '-')
count++;
new_argc = state->argc + count;
new_argv = xmalloc (sizeof (state->argv[0]) * (new_argc + 1));
memcpy (new_argv, state->argv, sizeof (state->argv[0]) * (state->argc + 1));
state->argv = new_argv;
memmove (&state->argv[state->next + count], &state->argv[state->next],
(state->argc - state->next + 1) * sizeof (state->argv[0]));
state->argc = new_argc;
for (i = state->next, p = start; *p; p += strlen (p) + 1, i++)
{
if (term == 0 && p[0] == '-')
state->argv[i++] = (char *) "--add-file";
state->argv[i] = p;
}
}
static char *
tar_help_filter (int key, const char *text, void *input)
@@ -1462,6 +1292,9 @@ parse_owner_group (char *arg, uintmax_t field_max, char const **name_option)
#define TAR_SIZE_SUFFIXES "bBcGgkKMmPTtw"
/* Either NL or NUL, as decided by the --null option. */
static char filename_terminator;
static error_t
parse_opt (int key, char *arg, struct argp_state *state)
{
@@ -1745,7 +1578,7 @@ parse_opt (int key, char *arg, struct argp_state *state)
break;
case 'T':
update_argv (arg, state);
name_add_file (arg, filename_terminator);
/* Indicate we've been given -T option. This is for backward
compatibility only, so that `tar cfT archive /dev/null will
succeed */
@@ -2371,11 +2204,12 @@ static int subcommand_class[] = {
/* Return t if the subcommand_option is in class(es) f */
#define IS_SUBCOMMAND_CLASS(f) (subcommand_class[subcommand_option] & (f))
static struct tar_args args;
static void
decode_options (int argc, char **argv)
{
int idx;
struct tar_args args;
argp_version_setup ("tar", tar_authors);
@@ -2476,7 +2310,6 @@ decode_options (int argc, char **argv)
if (argp_parse (&argp, argc, argv, ARGP_IN_ORDER, &idx, &args))
exit (TAREXIT_FAILURE);
/* Special handling for 'o' option:
GNU tar used to say "output old format".
@@ -2742,6 +2575,14 @@ decode_options (int argc, char **argv)
report_textual_dates (&args);
}
void
more_options (int argc, char **argv)
{
int idx;
if (argp_parse (&argp, argc, argv, ARGP_IN_ORDER,
&idx, &args))
exit (TAREXIT_FAILURE);
}
/* Tar proper. */
@@ -2771,8 +2612,6 @@ main (int argc, char **argv)
xmalloc (sizeof (const char *) * allocated_archive_names);
archive_names = 0;
obstack_init (&argv_stk);
/* System V fork+wait does not work if SIGCHLD is ignored. */
signal (SIGCHLD, SIG_DFL);

View File

@@ -44,6 +44,8 @@ $(srcdir)/package.m4: $(top_srcdir)/configure.ac
TESTSUITE_AT = \
T-empty.at\
T-null.at\
T-zfile.at\
T-nonl.at\
testsuite.at\
append.at\
append01.at\

View File

@@ -23,8 +23,8 @@
# Reported by: Karl Berry <karl@freefriends.org>
# References: <200610301353.k9UDr1O30680@f7.net>
AT_SETUP([files-from: empty entries])
AT_KEYWORDS([files-from empty])
AT_SETUP([empty entries])
AT_KEYWORDS([files-from empty-line])
AT_DATA([file-list],
[jeden
@@ -34,17 +34,16 @@ trzy
])
AT_TAR_CHECK([
AT_SORT_PREREQ
genfile --file jeden
genfile --file dwa
genfile --file trzy
tar cfvT archive ../file-list | sort
tar cfvT archive ../file-list
],
[0],
[dwa
jeden
[jeden
dwa
trzy
],
[],[],[],[ustar]) # Testing one format is enough

62
tests/T-nonl.at Normal file
View File

@@ -0,0 +1,62 @@
# Process this file with autom4te to create testsuite. -*- Autotest -*-
#
# Test suite for GNU tar.
# Copyright 2013 Free Software Foundation, Inc.
#
# This file is part of GNU tar.
#
# GNU tar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GNU tar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
# Tar malfunctioned when given a file list with the last line not ending
# in a newline.
#
# Reported by: Michal Žejdl <zejdl@suas.cz>
# References: <http://lists.gnu.org/archive/html/bug-tar/2013-07/msg00009.html>
AT_SETUP([entries with missing newlines])
AT_KEYWORDS([files-from nonewline nonl T-nonl])
AT_TAR_CHECK([
genfile --length=0 --file empty
AS_ECHO_N(c) > 1.nonl
echo d > 2.nonl
AS_ECHO_N(e) >> 2.nonl
touch a b c d e
AT_DATA([filelist],[a
b
])
tar cf archive -T empty -T 1.nonl -T 2.nonl -T filelist
tar tf archive
echo ==
tar cf archive -T 2.nonl -T empty -T filelist -T 1.nonl
tar tf archive
],
[0],
[c
d
e
a
b
==
d
e
a
b
c
],
[],[],[],[ustar])
AT_CLEANUP

View File

@@ -18,30 +18,28 @@
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
AT_SETUP([files-from: 0-separated file without -0])
AT_SETUP([0-separated file without -0])
AT_KEYWORDS([files-from null T-null])
AT_DATA([expout],[jeden\ndwa
trzy
])
AT_TAR_CHECK([
AT_SORT_PREREQ
echo dwa > temp
echo jeden > temp
echo dwa >> temp
echo trzy >> temp
cat temp | tr '\n' '\0' > temp1
echo jeden > file-list
cat temp1 >> file-list
cat temp | tr '\n' '\0' > file-list
genfile -f "jeden
dwa" || AT_SKIP_TEST
genfile -f jeden
genfile -f dwa
genfile -f trzy
tar cfTv archive file-list | sort
tar cfTv archive file-list
],
[0],
[expout],
[jeden
dwa
trzy
],
[tar: file-list: file name read contains nul character
],[],[],[ustar]) # Testing one format is enough

47
tests/T-zfile.at Normal file
View File

@@ -0,0 +1,47 @@
# Process this file with autom4te to create testsuite. -*- Autotest -*-
#
# Test suite for GNU tar.
# Copyright 2013 Free Software Foundation, Inc.
#
# This file is part of GNU tar.
#
# GNU tar is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 3 of the License, or
# (at your option) any later version.
#
# GNU tar is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
AT_SETUP([empty file])
AT_KEYWORDS([files-from empty-file])
AT_TAR_CHECK([
genfile --length=0 --file empty
genfile --file a
genfile --file b
AT_DATA([valid],[a
b
])
tar cf archive -T empty -T valid
tar tf archive
echo "=="
tar cf archive -T valid -T empty
tar tf archive
],
[0],
[a
b
==
a
b
],
[],[],[],[ustar]) # Testing one format is enough
AT_CLEANUP

View File

@@ -199,6 +199,8 @@ m4_include([opcomp06.at])
AT_BANNER([The -T option])
m4_include([T-empty.at])
m4_include([T-null.at])
m4_include([T-zfile.at])
m4_include([T-nonl.at])
AT_BANNER([Various options])
m4_include([indexfile.at])