Files
scylladb/test/alternator/test_number.py
Szymon Malewski cb8e11653f test/alternator: Number normalization tests
DynamoDB normalizes Number values, so different string representations
of the same number (e.g., "1000" vs "1e3") should be treated as the
same value in all contexts.
In Alternator this is true in most cases, thanks to implicit normalization in
Decimal `to_string()` function.
However this is fragile - and in fact this function should be fixed
due to OOM vulnerability in CQL use (#8002).

This patch adds tests that should prevent regression in cases
that work currently.

Unfortunately not all contexts work currently - mainly the HASH keys
are not normalized and backend handles them by byte representation.
Added test replicate this incorrect behaviour

All added tests pass with DynamoDB, with one exception: weirdly
DynamoDB doesn't recognise unnormalized numbers in BatchGetItem
 as duplicate keys.

Ref SCYLLADB-1575

Closes scylladb/scylladb#29501
2026-05-18 09:42:33 +03:00

611 lines
29 KiB
Python

# Copyright 2020-present ScyllaDB
#
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.1
# Tests for the number type. Numbers in DynamoDB have an unusual definition -
# they are a floating-point type with 38 decimal digits of precision and
# decimal exponent in the range -130 to +125. The *decimal* definition allows
# this type to accurately represent integers (with magnitude up to the allowed
# exponent) or decimal fractions up to the supported precision.
# Because of this unusual definition, none of the C++ types can accurately
# hold DynamoDB numbers - and Alternator currently uses the arbitrary-
# precision "big_decimal" type to hold them.
#
# The tests here try to verify two things:
# 1. That Alternator's number type supports the full precision and magnitude
# that DynamoDB's number type supports. We don't want to see precision
# or magnitude lost when storing and retrieving numbers, or when doing
# calculations on them.
# 2. That Alternator's number type does not have *better* precision or
# magnitude than DynamoDB does. If it did, users may be tempted to rely
# on that implementation detail.
#
# We have additional tests in other files that numbers can be stored,
# retrieved, calculated (add and subtract), and sorted (when a sort key
# is a number). The tests in this file focus just on the precision and
# magnitude that the number type can store.
import decimal
from decimal import Decimal
import boto3.dynamodb.types
import pytest
from botocore.exceptions import ClientError
from test.alternator.util import random_string, client_no_transform
# Monkey-patch the boto3 library to stop doing its own error-checking on
# numbers. This works around a bug https://github.com/boto/boto3/issues/2500
# of incorrect checking of responses, and we also need to get boto3 to not do
# its own error checking of requests, to allow us to check the server's
# handling of such errors.
boto3.dynamodb.types.DYNAMODB_CONTEXT = decimal.Context(prec=100)
# Test that numbers of allowed magnitudes - between to 1e-130 and 1e125 -
# can be stored and successfully retrieved unchanged.
def test_number_magnitude_allowed(test_table_s):
p = random_string()
for num in [Decimal("1e10"), Decimal("1e100"), Decimal("1e125"),
Decimal("9.99999999e125"), Decimal("1e-100"),
Decimal("1e-130")]:
for sign in [False, True]:
if sign:
num = -num
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': num})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == num
# Test that numbers of too big (or small) a magnitude cannot be stored.
def test_number_magnitude_not_allowed(test_table_s):
p = random_string()
for num in [Decimal("1e126"), Decimal("11e125")]:
with pytest.raises(ClientError, match='ValidationException.*overflow'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': num})
for num in [Decimal("1e-131"), Decimal("0.9e-130")]:
print(num)
with pytest.raises(ClientError, match='ValidationException.*underflow'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': num})
# Zero can be written as 0e126. Should this be allowed - since the number
# is zero, which is allowed - or forbidden because nominally the exponent
# is 126? In my opinion the former interpretation is the correct one,
# since the nominal exponent in the scientific notation input isn't what
# matters, but rather the actual magnitude: E.g., consider 0.1e126 is allowed
# despite having a nominal exponent 126 - because its actual magnitude is 126.
# At first glance, it appears that DynamoDB seems to follow the latter
# interpretation, and forbids 0e126 (and similar). Which sounds like a
# legitimate decision - except it is NOT followed consistently - while
# DynamoDB forbids 0e126, it allows 0.0e126! That is inconsistent, and I
# consider it a DynamoDB bug, so Alternator follows the first interpretation
# (both 0e126 and 0.0e126 are allowed), and I'm marking this test as a
# dynamodb_bug.
def test_number_magnitude_not_allowed_zero(test_table_s, dynamodb_bug):
p = random_string()
# 0e125 is allowed, obviously
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': Decimal("0e125")})
# To cement our understanding of nominal exponent vs. actual magnitude,
# confirm that 0.1e126 is allowed - it's actual magnitude is 125,
# which is allowed.
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': Decimal("0.1e126")})
# 0.0e126 is still just zero and has actual magnitude 0, despite the
# nominal exponent 126, so is also allowed
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': Decimal("0.0e126")})
# If 0.0e126 is allowed, obviously 0e126 should also be allowed, and
# Alternator allows it as this test confirms - but DynamoDB doesn't and
# this test fails here on DynamoDB. I consider this a DynamoDB bug,
# hence the "dynamodb_bug" tag on this test.
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': Decimal("0e126")})
# Verify that the 0e126 that we wrote above is really just a regular zero
assert 0 == test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a']
# Similarly, 0e-131 should be allowed (it's, again, just zero), and
# DynamoDB has a bug causing it to be forbidden.
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': Decimal("0e-131")})
# DynamoDB limits the magnitude of the numbers (the exponent can be between
# -130 and 125). If we neglect to limit the magnitude, it can allow a user
# to request an addition operation between two numbers of wildly different
# magnitudes, that requires an unlimited amount of memory and CPU time - i.e.,
# a DoS attack. The attacker can cause a std::bad_alloc, large allocations,
# and very long scheduler stall, all with a very short request.
# When we had issue #6794 we had to skip this test, because it took a very
# long time and/or crashes Scylla.
def test_number_magnitude_not_allowed_dos(test_table_s):
p = random_string()
# Python's "Decimal" type and the way it's used by the Boto3 library
# has its own limitations, so we need to bypass them with the wrapper
# client_no_transform(), and pass numbers directly to the protocol as
# strings.
a = "1.0"
b = "1.0e100000000"
with client_no_transform(test_table_s.meta.client) as client:
with pytest.raises(ClientError, match='ValidationException.*overflow'):
client.update_item(TableName=test_table_s.name,
Key={'p': {'S': p}},
UpdateExpression='SET x = :a + :b',
ExpressionAttributeValues={':a': {'N': a}, ':b': {'N': b}})
# Check that numbers up to the specified precision (38 decimal digits) can
# be stored and retrieved unchanged.
def test_number_precision_allowed(test_table_s):
p = random_string()
for num in [Decimal("3.1415926535897932384626433832795028841"),
Decimal("314159265358979323846.26433832795028841"),
Decimal("31415926535897932384626433832795028841e30")]:
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': num})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['a'] == num
# Check that numbers with more significant digits than supported (38 decimal
# digits) cannot be stored.
def test_number_precision_not_allowed(test_table_s):
p = random_string()
for num in [Decimal("3.14159265358979323846264338327950288419"),
Decimal("314159265358979323846.264338327950288419"),
Decimal("314159265358979323846264338327950288419e30")]:
with pytest.raises(ClientError, match='ValidationException.*significant'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': num})
# The above tests checked the legal magnitudes and precisions of non-key
# columns, and the following tests do the same for a numeric key column.
# Because different code paths are involved for serializing and storing
# key and non-key columns, it's important to check this case as well.
def test_number_magnitude_key(test_table_sn):
p = random_string()
# Legal magnitudes are allowed:
for num in [Decimal("1e10"), Decimal("1e100"), Decimal("1e125"),
Decimal("9.99999999e125"), Decimal("1e-100"),
Decimal("1e-130")]:
for sign in [False, True]:
if sign:
num = -num
x = random_string()
test_table_sn.update_item(Key={'p': p, 'c': num},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': x})
assert test_table_sn.get_item(Key={'p': p, 'c': num}, ConsistentRead=True)['Item']['a'] == x
# Illegal magnitudes are not allowed:
x = random_string()
for num in [Decimal("1e126"), Decimal("11e125")]:
for sign in [False, True]:
if sign:
num = -num
with pytest.raises(ClientError, match='ValidationException.*overflow'):
test_table_sn.update_item(Key={'p': p, 'c': num},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': x})
for num in [Decimal("1e-131"), Decimal("0.9e-130")]:
for sign in [False, True]:
if sign:
num = -num
with pytest.raises(ClientError, match='ValidationException.*underflow'):
test_table_sn.update_item(Key={'p': p, 'c': num},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': x})
def test_number_precision_key(test_table_sn):
p = random_string()
# Legal precision is allowed:
for num in [Decimal("3.1415926535897932384626433832795028841"),
Decimal("314159265358979323846.26433832795028841"),
Decimal("31415926535897932384626433832795028841e30")]:
x = random_string()
test_table_sn.update_item(Key={'p': p, 'c': num},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': x})
assert test_table_sn.get_item(Key={'p': p, 'c': num}, ConsistentRead=True)['Item']['a'] == x
# Illegal precision is not allowed:
x = random_string()
for num in [Decimal("3.14159265358979323846264338327950288419"),
Decimal("314159265358979323846.264338327950288419"),
Decimal("314159265358979323846264338327950288419e30")]:
with pytest.raises(ClientError, match='ValidationException.*significant'):
test_table_sn.update_item(Key={'p': p, 'c': num},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': x})
# While most of the Alternator code just saves high-precision numbers
# unchanged, the "+" and "-" operations need to calculate with them, and
# we should check the calculation isn't done with some lower-precision
# representation, e.g., double
def test_update_expression_plus_precision(test_table_s):
p = random_string()
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET b = :val1 + :val2',
ExpressionAttributeValues={':val1': Decimal("1"), ':val2': Decimal("10000000000000000000000")})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': Decimal("10000000000000000000001")}
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET b = :val2 - :val1',
ExpressionAttributeValues={':val1': Decimal("1"), ':val2': Decimal("10000000000000000000000")})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item'] == {'p': p, 'b': Decimal("9999999999999999999999")}
# Some additions or subtractions can result in overflow to the allowed range,
# causing the update to fail: 9e125 + 9e125 = 1.8e126 which overflows.
def test_update_expression_plus_overflow(test_table_s):
p = random_string()
with pytest.raises(ClientError, match='ValidationException.*overflow'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET b = :val1 + :val2',
ExpressionAttributeValues={':val1': Decimal("9e125"), ':val2': Decimal("9e125")})
with pytest.raises(ClientError, match='ValidationException.*overflow'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET b = :val1 - :val2',
ExpressionAttributeValues={':val1': Decimal("9e125"), ':val2': Decimal("-9e125")})
# Validate that the individual operands aren't too large - the only
# problem was the sum
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET b = :val1 + :val2',
ExpressionAttributeValues={':val1': Decimal("9e125"), ':val2': Decimal("9e124")})
assert test_table_s.get_item(Key={'p': p}, ConsistentRead=True)['Item']['b'] == Decimal("9.9e125")
# Similarly, addition or subtraction can also result in unsupported precision
# and causing the update to fail: For example, 1e50 + 1 cannot be represented
# in 38 digits of precision.
def test_update_expression_plus_imprecise(test_table_s):
p = random_string()
# Strangely, DynamoDB says that the error is: "Number overflow. Attempting
# to store a number with magnitude larger than supported range". This is
# clearly the wrong error message...
with pytest.raises(ClientError, match='ValidationException.*number'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET b = :val1 - :val2',
ExpressionAttributeValues={':val1': Decimal("1e50"), ':val2': Decimal("1")})
with pytest.raises(ClientError, match='ValidationException.*number'):
test_table_s.update_item(Key={'p': p},
UpdateExpression='SET b = :val1 + :val2',
ExpressionAttributeValues={':val1': Decimal("1e50"), ':val2': Decimal("1")})
# Test that invalid strings cannot be stored as numbers and produce the
# expected error. This includes random non-numeric strings (e.g., "dog"),
# various syntax errors, and also the strings "NaN" and "Infinity", which
# although may be legal numbers in other systems (including Python), are
# not supported by DynamoDB. Spurious spaces are also not allowed.
def test_invalid_numbers(test_table_s):
p = random_string()
# We cannot write this test using boto3's high-level API because it
# reformats and validates the numeric parameter before sending it to
# the server, but we can test this using the client_no_transform trick.
# Note that client_no_transform, the number 3 should be passed as
# {'N': '3'}.
with client_no_transform(test_table_s.meta.client) as client:
for s in ['NaN', 'Infinity', '-Infinity', '-NaN', 'dog', '-dog', ' 1', '1 ']:
with pytest.raises(ClientError, match='ValidationException.*numeric'):
client.update_item(TableName=test_table_s.name,
Key={'p': {'S': p}},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': {'N': s}})
# As a sanity check, check that *allowed* numbers are fine:
for s in ['3', '-7.1234', '-17e5', '-17.4E37', '+3', '.123', '0001.23', '1e+5']:
client.update_item(TableName=test_table_s.name,
Key={'p': {'S': p}},
UpdateExpression='SET a = :val',
ExpressionAttributeValues={':val': {'N': s}})
# In DynamoDB's JSON format, a number value is represented as map with key
# "N" and the value is a *string* containing the number. E.g., {"N": "123"}.
# Using a string instead of a number in the JSON is important to guarantee
# the full range of DynamoDB's floating point even if the JSON libraries
# do not understand them. But can a user use a number in the JSON anyway?
# E.g., would {"N": 123} work as a number value? It turns out that the
# answer is no - it doesn't work. Let's check that:
def test_number_in_json(test_table_s):
# We must use client_no_transform() to build the JSON encoding
# ourselves instead of boto3 doing it automatically for us.
with client_no_transform(test_table_s.meta.client) as client:
p = random_string()
# Alternator reads numeric inputs in several code paths which may
# handle errors differently, so let's verify several of them.
# It turns out that all code paths call the same validate_value()
# function, so result in the same error.
with pytest.raises(ClientError, match='SerializationException'):
client.update_item(TableName=test_table_s.name,
Key={'p': {'S': p}},
UpdateExpression='SET a = :val',
# Note that we're passing a number 123 here, not a string
# '123', and that is wrong.
ExpressionAttributeValues={':val': {'N': 123}})
with pytest.raises(ClientError, match='SerializationException'):
client.update_item(TableName=test_table_s.name,
Key={'p': {'S': p}},
UpdateExpression='SET a = :vgood',
ConditionExpression='a < :vbad',
ExpressionAttributeValues={':vgood': {'N': '1'}, ':vbad': {'N': 123}})
# Verify that Number RANGE (sort) keys use value-based comparison: different
# string representations of the same number ("1000" vs "1e3") are treated as
# the same sort key. A second PutItem with a different representation
# overwrites the first, and GetItem with any representation
# finds the item.
#
# Additionally, verify that the sort key is returned in canonical
# (normalized) form regardless of what representation was used to write it.
#
# This is consistent with DynamoDB behaviour. We use client_no_transform()
# to send exact string representations and avoid boto3's own Decimal
# normalization.
def test_number_range_key_representation(test_table_sn):
p = random_string()
with client_no_transform(test_table_sn.meta.client) as client:
# Write an item with sort key "1000".
client.put_item(TableName=test_table_sn.name,
Item={'p': {'S': p}, 'c': {'N': '1000'}, 'v': {'S': 'first'}})
# Sanity: reading with the same representation works.
got = client.get_item(TableName=test_table_sn.name,
Key={'p': {'S': p}, 'c': {'N': '1000'}},
ConsistentRead=True)
assert got['Item']['v']['S'] == 'first'
# Reading with a different representation of the same value should
# also find the item (RANGE key comparison is value-based).
got2 = client.get_item(TableName=test_table_sn.name,
Key={'p': {'S': p}, 'c': {'N': '1e3'}},
ConsistentRead=True)
assert got2['Item']['v']['S'] == 'first'
# Overwrite with a different representation.
client.put_item(TableName=test_table_sn.name,
Item={'p': {'S': p}, 'c': {'N': '1e3'}, 'v': {'S': 'second'}})
# There should be exactly one item in this partition (overwritten,
# not a second item).
result = client.query(TableName=test_table_sn.name,
KeyConditionExpression='p = :p',
ExpressionAttributeValues={':p': {'S': p}},
ConsistentRead=True)
assert result['Count'] == 1
assert result['Items'][0]['v']['S'] == 'second'
# All representations of the same value find the single item.
# Includes exponent notation, trailing fractional zero, leading
# zeros, and explicit plus sign.
for n in ['1000', '1e3', '1E+3', '1000.0', '001000', '+1000']:
got3 = client.get_item(TableName=test_table_sn.name,
Key={'p': {'S': p}, 'c': {'N': n}},
ConsistentRead=True)
assert got3['Item']['v']['S'] == 'second'
# The returned sort key should be in canonical form ("1000")
# regardless of what representation was used for the lookup.
assert got3['Item']['c']['N'] == '1000'
# Verify that Number HASH (partition) keys are normalized: different string
# representations of the same number ("1000" vs "1e3") should refer to the
# same item, just like in DynamoDB. Also verify that the returned key is
# in canonical (normalized) form.
#
# DynamoDB normalizes Numbers on write, so "1e3" and "1000" are the same
# partition key. Alternator currently does NOT normalize — different
# representations produce different (scale, unscaled) byte pairs, different
# Murmur3 tokens, and end up in different partitions.
# Reproduces SCYLLADB-1575
@pytest.mark.xfail(reason="SCYLLADB-1575")
def test_number_hash_key_representation(test_table_n):
with client_no_transform(test_table_n.meta.client) as client:
# Write an item with HASH key "1000".
client.put_item(TableName=test_table_n.name,
Item={'p': {'N': '1000'}, 'v': {'S': 'first'}})
# Sanity: reading with the same representation works.
got = client.get_item(TableName=test_table_n.name,
Key={'p': {'N': '1000'}},
ConsistentRead=True)
assert got['Item']['v']['S'] == 'first'
# The returned key should be in canonical form.
assert got['Item']['p']['N'] == '1000'
# In DynamoDB, reading with a different representation of the
# same number should find the same item. In Alternator, this
# currently fails (SCYLLADB-1575: returns no item) because the
# representations serialize to different bytes → different
# tokens → different partitions.
got2 = client.get_item(TableName=test_table_n.name,
Key={'p': {'N': '1e3'}},
ConsistentRead=True)
assert 'Item' in got2
assert got2['Item']['v']['S'] == 'first'
# Even when looked up via '1e3', the returned key should be
# in canonical form ('1000'), not the lookup representation.
assert got2['Item']['p']['N'] == '1000'
# Writing with a different representation should overwrite, not
# create a second item.
client.put_item(TableName=test_table_n.name,
Item={'p': {'N': '1e3'}, 'v': {'S': 'second'}})
got3 = client.get_item(TableName=test_table_n.name,
Key={'p': {'N': '1000'}},
ConsistentRead=True)
assert got3['Item']['v']['S'] == 'second'
got4 = client.get_item(TableName=test_table_n.name,
Key={'p': {'N': '1e3'}},
ConsistentRead=True)
assert got4['Item']['v']['S'] == 'second'
# All these representations should find the same item and
# return the key in canonical form.
for n in ['1000', '1e3', '1E+3', '1000.0', '001000', '+1000']:
got5 = client.get_item(TableName=test_table_n.name,
Key={'p': {'N': n}},
ConsistentRead=True)
assert got5['Item']['v']['S'] == 'second'
assert got5['Item']['p']['N'] == '1000'
# DynamoDB normalizes Number values on write: it strips leading zeros,
# trailing fractional zeros, converts exponent notation to plain decimal
# form (within the representable range), and removes explicit plus signs.
# The following table lists (input_string, expected_canonical_output)
# pairs that document DynamoDB's exact normalization rules.
_NORMALIZATION_CASES = [
# Basic integers — no change expected.
('1', '1'),
('123', '123'),
('-5', '-5'),
('0', '0'),
('10', '10'),
('100', '100'),
('1000', '1000'),
# Leading zeros — stripped.
('007', '7'),
('001.23', '1.23'),
('00', '0'),
# Explicit plus sign — stripped.
('+3', '3'),
('+0', '0'),
('+1.5', '1.5'),
# Trailing fractional zeros — stripped.
('1.0', '1'),
('1.00', '1'),
('1.10', '1.1'),
('100.000', '100'),
('0.0', '0'),
('0.10', '0.1'),
# Exponent notation — expanded to plain decimal.
('1e3', '1000'),
('1E3', '1000'),
('1e+3', '1000'),
('1E+3', '1000'),
('5e1', '50'),
('-3e2', '-300'),
('1.5e2', '150'),
('1.23e4', '12300'),
# Negative exponent — expanded to plain decimal.
('1e-3', '0.001'),
('5e-1', '0.5'),
('123e-2', '1.23'),
('1.23e-1', '0.123'),
# Exponent + trailing zeros — both normalized.
('1.0e3', '1000'),
('1.00e2', '100'),
('1.0e-1', '0.1'),
# Negative zero — sign stripped.
('-0', '0'),
('-0.0', '0'),
# Zero with exponent — simplified to '0'.
('0e5', '0'),
('0.0e3', '0'),
('0e-5', '0'),
# Fractional without leading zero — leading zero added.
('.5', '0.5'),
('.123', '0.123'),
('-.5', '-0.5'),
# Large magnitude within DynamoDB bounds.
# The normalized form is always plain decimal, never scientific notation.
('1e20', '100000000000000000000'),
('9.9e10', '99000000000'),
('1e125', '1' + '0' * 125),
# Small values — expanded to plain decimal.
('1e-20', '0.00000000000000000001'),
# Values that are already in canonical form — no change.
('3.14159', '3.14159'),
('-273.15', '-273.15'),
('0.001', '0.001'),
]
# Verify that DynamoDB returns Number attribute values in canonical
# (normalized) form regardless of what string representation was used
# to write them. This test probes the exact normalization rules listed
# in _NORMALIZATION_CASES.
#
# Uses client_no_transform() to send exact Number strings and inspect
# the exact strings returned, bypassing boto3's own Decimal normalization.
def test_number_output_normalization(test_table_s):
with client_no_transform(test_table_s.meta.client) as client:
failures = []
for i, (input_num, expected) in enumerate(_NORMALIZATION_CASES):
key = {'S': random_string()}
client.put_item(TableName=test_table_s.name,
Item={'p': key, 'a': {'N': input_num}})
got = client.get_item(TableName=test_table_s.name,
Key={'p': key},
ConsistentRead=True)
actual = got['Item']['a']['N']
if actual != expected:
failures.append(
f' {input_num!r}: expected {expected!r}, got {actual!r}')
assert not failures, \
'Number output normalization mismatches:\n' + '\n'.join(failures)
# DynamoDB normalizes Number values inside Number Sets (NS), so different
# string representations of the same number are treated as the same set
# element.
# Alternator stores NS elements as JSON strings and uses string
# comparison (rjson::single_value_comp on kStringType), so "1" and "1.0"
# are treated as different elements.
# This single test checks ADD (union), DELETE (difference), and
# ConditionExpression equality sequentially.
# Reproduces SCYLLADB-1575.
@pytest.mark.xfail(reason="SCYLLADB-1575")
def test_number_set_normalization(test_table_s):
p = random_string()
with client_no_transform(test_table_s.meta.client) as client:
tn = test_table_s.name
key = {'S': p}
# --- ADD: adding a different representation of an existing element
# should not increase the set size.
client.put_item(TableName=tn,
Item={'p': key, 'ns': {'NS': ['1', '2', '3']}})
client.update_item(TableName=tn,
Key={'p': key},
UpdateExpression='ADD ns :v',
ExpressionAttributeValues={':v': {'NS': ['1.0', '2.00']}})
got = client.get_item(TableName=tn, Key={'p': key},
ConsistentRead=True)['Item']
ns = got['ns']['NS']
# DynamoDB: still 3 elements (1.0 == 1, 2.00 == 2).
# Alternator bug (SCYLLADB-1575): 5 elements ("1", "1.0", "2", "2.00", "3").
assert len(ns) == 3
# --- DELETE: removing by a different representation should work.
client.put_item(TableName=tn,
Item={'p': key, 'ns': {'NS': ['10.0', '20', '30']}})
client.update_item(TableName=tn,
Key={'p': key},
UpdateExpression='DELETE ns :v',
ExpressionAttributeValues={':v': {'NS': ['10', '2e1']}})
got = client.get_item(TableName=tn, Key={'p': key},
ConsistentRead=True)['Item']
ns = got['ns']['NS']
# DynamoDB: only "30" remains (10 == 10.0, 2e1 == 20).
# Alternator bug (SCYLLADB-1575): still all 3 ("10.0", "20", "30").
assert len(ns) == 1
# --- ConditionExpression EQ: sets with same values but different
# representations should be equal.
client.put_item(TableName=tn,
Item={'p': key, 'ns': {'NS': ['1', '2']}})
# Condition: ns = {1.0, 2.00} — should pass (same numbers).
# Alternator bug (SCYLLADB-1575): fails because "1" != "1.0" in string comparison.
client.update_item(TableName=tn,
Key={'p': key},
UpdateExpression='SET #x = :x',
ConditionExpression='ns = :ns',
ExpressionAttributeNames={'#x': 'flag'},
ExpressionAttributeValues={
':x': {'S': 'passed'},
':ns': {'NS': ['1.0', '2.00']},
})
got = client.get_item(TableName=tn, Key={'p': key},
ConsistentRead=True)['Item']
assert got.get('flag', {}).get('S') == 'passed', \
'ConditionExpression: NS equality with different representations failed'