Improve tar-snapshot-edit
Support architecture-specific field ranges for the "-c" function. Better handle negative or larger-than-32-bit field values even when running in 32-bit Perl (for the default "print a summary" function)
This commit is contained in:
committed by
Sergey Poznyakoff
parent
b41b004638
commit
5cb79ed519
@@ -28,7 +28,8 @@
|
||||
#
|
||||
# It can also run a check on all the field values found in the
|
||||
# snapshot file, printing out a detailed message when it finds values
|
||||
# that would cause an "Unexpected field value in snapshot file" error
|
||||
# that would cause an "Unexpected field value in snapshot file",
|
||||
# "Numerical result out of range", or "Invalid argument" error
|
||||
# if tar were run using that snapshot file as input. (See the
|
||||
# comments included in the definition of the check_field_values
|
||||
# routine for more detailed information regarding these checks.)
|
||||
@@ -47,9 +48,19 @@
|
||||
# or 2 files.
|
||||
# * tweak output formatting
|
||||
#
|
||||
#
|
||||
# Modified March 13, 2013 by Nathan Stratton Treadway <nathanst AT ontko.com>:
|
||||
# * configure field ranges used for -c option based on the system
|
||||
# architecture (in response to the December 2012 update to GNU tar
|
||||
# enabling support for systems with signed dev_t values).
|
||||
# * when printing the list of device ids found in the snapshot file
|
||||
# (when run in the default mode), print the raw device id values
|
||||
# instead of the hex-string version in those cases where they
|
||||
# can't be converted successfully.
|
||||
|
||||
use Getopt::Std;
|
||||
use Config;
|
||||
|
||||
my %snapshot_field_ranges; # used in check_field_values function
|
||||
|
||||
## reading
|
||||
|
||||
@@ -207,30 +218,151 @@ sub show_device_counts ($) {
|
||||
$devices{$dev}++;
|
||||
}
|
||||
|
||||
my $devstr;
|
||||
foreach $dev (sort {$a <=> $b} keys %devices) {
|
||||
printf " Device 0x%04x occurs $devices{$dev} times.\n", $dev;
|
||||
$devstr = sprintf ("0x%04x", $dev);
|
||||
if ( $dev > 0xffffffff or $dev < 0 or hex($devstr) != $dev ) {
|
||||
# sprintf "%x" will not return a useful value for device ids
|
||||
# that are negative or which overflow the integer size on this
|
||||
# instance of Perl, so we convert the hex string back to a
|
||||
# number, and if it doesn't (numerically) equal the original
|
||||
# device id value, we know the hex conversion hasn't worked.
|
||||
#
|
||||
# Unfortunately, since we're running in "-w" mode, Perl will
|
||||
# also print a warning message if the hex() routine is called
|
||||
# on anything larger than "0xffffffff", even in 64-bit Perl
|
||||
# where such values are actually supported... so we have to
|
||||
# avoid calling hex() at all if the device id is too large or
|
||||
# negative. (If it's negative, the conversion to an unsigned
|
||||
# integer for the "%x" specifier will mean the result will
|
||||
# always trigger hex()'s warning on a 64-bit machine.)
|
||||
#
|
||||
# These situations don't seem to occur very often, so for now
|
||||
# when they do occur, we simply print the original text value
|
||||
# that was read from the snapshot file; it will look a bit
|
||||
# funny next to the values that do print in hex, but that's
|
||||
# preferable to printing values that aren't actually correct.
|
||||
$devstr = $dev;
|
||||
}
|
||||
printf " Device %s occurs $devices{$dev} times.\n", $devstr;
|
||||
}
|
||||
}
|
||||
|
||||
## check field values
|
||||
|
||||
# returns a warning message if $field isn't a valid string representation
|
||||
# of an integer, or if the resulting integer is out of the specified range
|
||||
sub validate_integer_field ($$$$) {
|
||||
my $field = shift;
|
||||
# initializes the global %snapshot_field_ranges hash, based on the "-a"
|
||||
# command-line option if given, otherwise based on the "archname" of
|
||||
# the current system.
|
||||
#
|
||||
# Each value in the hash is a two-element array containing the minimum
|
||||
# and maximum allowed values, respectively, for that field in the snapshot
|
||||
# file. GNU tar's allowed values for each architecture are determined
|
||||
# in the incremen.c source file, where the TYPE_MIN and TYPE_MAX
|
||||
# pre-processor expressions are used to determine the range that can be
|
||||
# expressed by the C data type used for each field; the values in the
|
||||
# array defined below should match those calculations.
|
||||
|
||||
sub choose_architecture ($) {
|
||||
my $opt_a = shift;
|
||||
|
||||
my $arch = $opt_a ? $opt_a : $Config{'archname'};
|
||||
|
||||
# These ranges apply to Linux 2.4/2.6 on iX86 systems, but are used
|
||||
# by default on unrecognized/unsupported systems, too.
|
||||
%iX86_linux_field_ranges = (
|
||||
timestamp_sec => [ -2147483648, 2147483647 ], # min/max of time_t
|
||||
timestamp_nsec => [ 0, 999999999 ], # 0 to BILLION-1
|
||||
nfs => [ 0, 1 ],
|
||||
dev => [ 0, 18446744073709551615 ], # min/max of dev_t
|
||||
ino => [ 0, 4294967295 ], # min/max of ino_t
|
||||
);
|
||||
|
||||
|
||||
if ( $arch =~ m/^i[\dxX]86-linux/i ) {
|
||||
%snapshot_field_ranges = %iX86_linux_field_ranges;
|
||||
print "Checking snapshot field values using \"iX86-linux\" ranges.\n\n";
|
||||
} elsif ( $arch =~ m/^x86_64-linux/i ) {
|
||||
%snapshot_field_ranges = (
|
||||
timestamp_sec => [ -9223372036854775808, 9223372036854775807 ],
|
||||
timestamp_nsec => [ 0, 999999999 ],
|
||||
nfs => [ 0, 1 ],
|
||||
dev => [ 0, 18446744073709551615 ],
|
||||
ino => [ 0, 18446744073709551615 ],
|
||||
);
|
||||
print "Checking snapshot field values using \"x86_64-linux\" ranges.\n\n";
|
||||
} elsif ( $arch =~ m/^IA64.ARCHREV_0/i ) {
|
||||
# HP/UX running on Itanium/ia64 architecture
|
||||
%snapshot_field_ranges = (
|
||||
timestamp_sec => [ -2147483648, 2147483647 ],
|
||||
timestamp_nsec => [ 0, 999999999 ],
|
||||
nfs => [ 0, 1 ],
|
||||
dev => [ -2147483648, 2147483647 ],
|
||||
ino => [ 0, 4294967295 ],
|
||||
);
|
||||
print "Checking snapshot field values using \"IA64.ARCHREV_0\" (HP/UX) ranges.\n\n";
|
||||
} else {
|
||||
%snapshot_field_ranges = %iX86_linux_field_ranges;
|
||||
print "Unrecognized architecture \"$arch\"; defaulting to \"iX86-linux\".\n";
|
||||
print "(Use -a option to override.)\n" unless $opt_a;
|
||||
print "\n";
|
||||
}
|
||||
|
||||
if ( ref(1) ne "" ) {
|
||||
print "(\"bignum\" mode is in effect; skipping 64-bit-integer check.)\n\n"
|
||||
} else {
|
||||
# find the largest max value in the current set of ranges
|
||||
my $maxmax = 0;
|
||||
for $v (values %snapshot_field_ranges ) {
|
||||
$maxmax = $v->[1] if ($v->[1] > $maxmax);
|
||||
}
|
||||
|
||||
# "~0" translates into a platform-native integer with all bits turned
|
||||
# on -- that is, the largest value that can be represented as
|
||||
# an integer. We print a warning if our $maxmax value is greater
|
||||
# than that largest integer, since in that case Perl will switch
|
||||
# to using floats for those large max values. The wording of
|
||||
# the message assumes that the only way this situation can exist
|
||||
# is that the platform uses 32-bit integers but some of the
|
||||
# snapshot-file fields have 64-bit values.
|
||||
if ( ~0 < $maxmax ) {
|
||||
print <<EOF
|
||||
Note: this version of Perl uses 32-bit integers, which means that it
|
||||
will switch to using floating-point numbers when checking the ranges
|
||||
for 64-bit snapshot-file fields. This normally will work fine, but
|
||||
might fail to detect cases where the value in the input field value is
|
||||
only slightly out of range. (For example, a "9223372036854775808"
|
||||
might not be recognized as being larger than 9223372036854775807.)
|
||||
If you suspect you are experiencing this problem, you can try running
|
||||
the program using the "-Mbignum" option, as in
|
||||
\$ perl $0 -Mbignum -c [FILES]
|
||||
(but doing so will make the program run *much* slower).
|
||||
|
||||
EOF
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
# returns a warning message if $field_value isn't a valid string
|
||||
# representation of an integer, or if the resulting integer is out of range
|
||||
# defined by the two-element array retrieved using up the $field_name key in
|
||||
# the global %snapshot_field_ranges hash.
|
||||
sub validate_integer_field ($$) {
|
||||
my $field_value = shift;
|
||||
my $field_name = shift;
|
||||
my $min = shift;
|
||||
my $max = shift;
|
||||
|
||||
my ($min, $max) = @{$snapshot_field_ranges{$field_name}};
|
||||
|
||||
my $msg = "";
|
||||
|
||||
if ( not $field =~ /^-?\d+$/ ) {
|
||||
$msg = " $field_name value contains invalid characters: \"$field\"\n";
|
||||
if ( not $field_value =~ /^-?\d+$/ ) {
|
||||
$msg = " $field_name value contains invalid characters: \"$field_value\"\n";
|
||||
} else {
|
||||
if ( $field < $min ) {
|
||||
$msg = " $field_name value too low: \"$field\" < $min \n";
|
||||
} elsif ( $field > $max ) {
|
||||
$msg = " $field_name value too high: \"$field\" > $max \n";
|
||||
if ( $field_value < $min ) {
|
||||
$msg = " $field_name value too low: \"$field_value\" < $min \n";
|
||||
} elsif ( $field_value > $max ) {
|
||||
$msg = " $field_name value too high: \"$field_value\" > $max \n";
|
||||
}
|
||||
}
|
||||
return $msg;
|
||||
@@ -239,28 +371,18 @@ sub validate_integer_field ($$$$) {
|
||||
|
||||
# This routine loops through each directory entry in the $info data
|
||||
# structure and prints a warning message if tar would abort with an
|
||||
# "Unexpected field value in snapshot file" error upon reading this
|
||||
# snapshot file.
|
||||
# "Unexpected field value in snapshot file", "Numerical result out of
|
||||
# range", or "Invalid argument" error upon reading this snapshot file.
|
||||
#
|
||||
# (Note that this specific error message was introduced along with the
|
||||
# change to snapshot file format "2", starting with tar v1.16 [or,
|
||||
# more precisely, v1.15.91].)
|
||||
# (Note that the "Unexpected field value in snapshot file" error message
|
||||
# was introduced along with the change to snapshot file format "2",
|
||||
# starting with tar v1.16 [or, more precisely, v1.15.91], while the
|
||||
# other two were introduced in v1.27.)
|
||||
#
|
||||
# The checks here are intended to match those found in the incremen.c
|
||||
# source file (as of tar v1.16.1).
|
||||
#
|
||||
# In that code, the checks are done against pre-processor expressions,
|
||||
# as defined in the C header files at compile time. In the routine
|
||||
# below, a Perl variable is created for each expression used as part of
|
||||
# one of these checks, assigned the value of the related pre-processor
|
||||
# expression as found on a Linux 2.6.8/i386 system.
|
||||
#
|
||||
# It seems likely that these settings will catch most invalid
|
||||
# field values found in actual snapshot files on all systems. However,
|
||||
# if "tar" is erroring out on a snapshot file that this check routine
|
||||
# does not complain about, that probably indicates that the values
|
||||
# below need to be adjusted to match those used by "tar" in that
|
||||
# particular environment.
|
||||
# source file. See the choose_architecture() function (above) for more
|
||||
# information on how to configure the range of values considered valid
|
||||
# by this script.
|
||||
#
|
||||
# (Note: the checks here are taken from the code that processes
|
||||
# version 2 snapshot files, but to keep things simple we apply those
|
||||
@@ -270,16 +392,6 @@ sub validate_integer_field ($$$$) {
|
||||
sub check_field_values ($) {
|
||||
my $info = shift;
|
||||
|
||||
# set up a variable with the value of each pre-processor
|
||||
# expression used for field-value checks in incremen.c
|
||||
# (these values here are from a Linux 2.6.8/i386 system)
|
||||
my $BILLION = 1000000000; # BILLION
|
||||
my $MIN_TIME_T = -2147483648; # TYPE_MINIMUM(time_t)
|
||||
my $MAX_TIME_T = 2147483647; # TYPE_MAXIUMUM(time_t)
|
||||
my $MAX_DEV_T = 4294967295; # TYPE_MAXIUMUM(dev_t)
|
||||
my $MAX_INO_T = 4294967295; # TYPE_MAXIUMUM(ino_t)
|
||||
|
||||
|
||||
my $msg;
|
||||
my $error_found = 0;
|
||||
|
||||
@@ -288,11 +400,9 @@ sub check_field_values ($) {
|
||||
$snapver = $info->[0];
|
||||
|
||||
$msg = "";
|
||||
$msg .= validate_integer_field($info->[1],
|
||||
'timestamp_sec', $MIN_TIME_T, $MAX_TIME_T);
|
||||
$msg .= validate_integer_field($info->[1], 'timestamp_sec');
|
||||
if ($snapver >= 1) {
|
||||
$msg .= validate_integer_field($info->[2],
|
||||
'timestamp_nsec', 0, $BILLION-1);
|
||||
$msg .= validate_integer_field($info->[2], 'timestamp_nsec');
|
||||
}
|
||||
if ( $msg ne "" ) {
|
||||
$error_found = 1;
|
||||
@@ -305,15 +415,13 @@ sub check_field_values ($) {
|
||||
|
||||
$msg = "";
|
||||
|
||||
$msg .= validate_integer_field($dir->{'nfs'}, 'nfs', 0, 1);
|
||||
$msg .= validate_integer_field($dir->{'nfs'}, 'nfs');
|
||||
if ($snapver >= 1) {
|
||||
$msg .= validate_integer_field($dir->{'timestamp_sec'},
|
||||
'timestamp_sec', $MIN_TIME_T, $MAX_TIME_T);
|
||||
$msg .= validate_integer_field($dir->{'timestamp_nsec'},
|
||||
'timestamp_nsec', 0, $BILLION-1);
|
||||
$msg .= validate_integer_field($dir->{'timestamp_sec'}, 'timestamp_sec');
|
||||
$msg .= validate_integer_field($dir->{'timestamp_nsec'}, 'timestamp_nsec');
|
||||
}
|
||||
$msg .= validate_integer_field($dir->{'dev'}, 'dev', 0, $MAX_DEV_T);
|
||||
$msg .= validate_integer_field($dir->{'ino'}, 'ino', 0, $MAX_INO_T);
|
||||
$msg .= validate_integer_field($dir->{'dev'}, 'dev');
|
||||
$msg .= validate_integer_field($dir->{'ino'}, 'ino');
|
||||
|
||||
if ( $msg ne "" ) {
|
||||
$error_found = 1;
|
||||
@@ -438,10 +546,10 @@ sub write_incr_db_2 ($$) {
|
||||
## main
|
||||
|
||||
sub main {
|
||||
our ($opt_b, $opt_r, $opt_h, $opt_c);
|
||||
getopts('br:hc');
|
||||
our ($opt_b, $opt_r, $opt_h, $opt_c, $opt_a);
|
||||
getopts('br:hca:');
|
||||
HELP_MESSAGE() if ($opt_h || $#ARGV == -1 || ($opt_b && !$opt_r) ||
|
||||
($opt_r && $opt_c) );
|
||||
($opt_a && !$opt_c) || ($opt_r && $opt_c) );
|
||||
|
||||
my @repl;
|
||||
if ($opt_r) {
|
||||
@@ -451,9 +559,11 @@ sub main {
|
||||
}
|
||||
}
|
||||
|
||||
choose_architecture($opt_a) if ($opt_c);
|
||||
|
||||
foreach my $snapfile (@ARGV) {
|
||||
my $info = read_incr_db($snapfile);
|
||||
if ($opt_r ) {
|
||||
if ($opt_r) {
|
||||
if ($opt_b) {
|
||||
rename($snapfile, $snapfile . "~") || die "Could not rename '$snapfile' to backup";
|
||||
}
|
||||
@@ -474,7 +584,7 @@ sub HELP_MESSAGE {
|
||||
Usage:
|
||||
tar-snapshot-edit SNAPFILE [SNAPFILE [...]]
|
||||
tar-snapshot-edit -r 'DEV1-DEV2[,DEV3-DEV4...]' [-b] SNAPFILE [SNAPFILE [...]]
|
||||
tar-snapshot-edit -c SNAPFILE [SNAPFILE [...]]
|
||||
tar-snapshot-edit -c [-aARCH] SNAPFILE [SNAPFILE [...]]
|
||||
|
||||
With no options specified: print a summary of the 'device' values
|
||||
found in each SNAPFILE.
|
||||
@@ -487,9 +597,21 @@ Usage:
|
||||
|
||||
With -c: Check the field values in each SNAPFILE and print warning
|
||||
messages if any invalid values are found. (An invalid value is one
|
||||
that would cause \"tar\" to generate an
|
||||
Unexpected field value in snapshot file
|
||||
error message as it processed the snapshot file.)
|
||||
that would cause \"tar\" to abort with an error message such as
|
||||
Unexpected field value in snapshot file
|
||||
Numerical result out of range
|
||||
or
|
||||
Invalid argument
|
||||
as it processed the snapshot file.)
|
||||
|
||||
Normally the program automatically chooses the valid ranges for
|
||||
the fields based on the current system's architecture, but the
|
||||
-a option can be used to override the selection, e.g. in order
|
||||
to validate a snapshot file generated on a some other system.
|
||||
(Currently only three architectures are supported, "iX86-linux",
|
||||
"x86_64-linux", and "IA64.ARCHREV_0" [HP/UX running on Itanium/ia64],
|
||||
and if the current system isn't recognized, then the iX86-linux
|
||||
values are used by default.)
|
||||
|
||||
EOF
|
||||
exit 1;
|
||||
|
||||
Reference in New Issue
Block a user