type_json: support integers in scientific format

Add support for specifing integers in scientific format (for example
1.234e8) in INSERT JSON statement:

INSERT INTO table JSON '{"int_column": 1e7}';

Inserting a floating-point number ending with .0 is allowed, as
the fractional part is zero. Non-zero fractional part (for example
12.34) is disallowed. A new test is added to test all those behaviors.

Before the JSON parsing library was switched to RapidJSON from JsonCpp,
this statement used to work correctly, because JsonCpp transparently
casts double to integer value.

This behavior differs from Cassandra, which disallows those types of
numbers (1e7, 123.0 and 12.34).

Fix typo in if condition: "if (value.GetUint64())" to
"if (value.IsUint64())".

Fixes #10100
This commit is contained in:
Piotr Grabowski
2022-02-18 12:22:25 +01:00
parent 649ab70936
commit efe7456f0a
2 changed files with 138 additions and 1 deletions

View File

@@ -78,8 +78,35 @@ static int64_t to_int64_t(const rjson::value& value) {
return value.GetInt();
} else if (value.IsUint()) {
return value.GetUint();
} else if (value.GetUint64()) {
} else if (value.IsUint64()) {
return value.GetUint64(); //NOTICE: large uint64_t values will get overflown
} else if (value.IsDouble()) {
// We allow specifing integer constants
// using scientific notation (for example 1.3e8)
// and floating-point numbers ending with .0 (for example 12.0),
// but not floating-point numbers with fractional part (12.34).
//
// The reason is that JSON standard does not have separate
// types for integers and floating-point numbers, only
// a single "number" type. Some serializers may
// produce an integer in that floating-point format.
double double_value = value.GetDouble();
// Check if the value contains disallowed fractional part (.34 from 12.34).
// With RapidJSON and an integer value in range [-(2^53)+1, (2^53)-1],
// the fractional part will be zero as the entire value
// fits in 53-bit significand. RapidJSON's parsing code does not lose accuracy:
// when parsing a number like 12.34e8, it accumulates 1234 to a int64_t number,
// then converts it to double and multiples by power of 10, never having any
// digit in fractional part.
double integral;
double fractional = std::modf(double_value, &integral);
if (fractional != 0.0 && fractional != -0.0) {
throw marshal_exception(format("Incorrect JSON floating-point value "
"for int64 type: {} (it should not contain fractional part {})", value, fractional));
}
return double_value;
}
throw marshal_exception(format("Incorrect JSON value for int64 type: {}", value));
}

View File

@@ -26,6 +26,7 @@
#include "types/tuple.hh"
#include "types/user.hh"
#include "types/list.hh"
#include "utils/rjson.hh"
using namespace std::literals::chrono_literals;
@@ -451,6 +452,115 @@ SEASTAR_TEST_CASE(test_insert_json_null_frozen_collections) {
});
}
SEASTAR_TEST_CASE(test_insert_json_integer_in_scientific_notation) {
return do_with_cql_env_thread([] (cql_test_env& e) {
// Verify that our JSON parsing supports
// inserting numbers like 1.23e+7 to an integer
// column (int, bigint, etc.). Numbers that contain
// a fractional part (12.34) are disallowed. Note
// that this behavior differs from Cassandra, which
// disallows all those types (1.23e+7, 12.34).
cquery_nofail(e,
"CREATE TABLE scientific_notation ("
" pk int primary key,"
" v bigint,"
");");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 1, "v": 150.0
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 2, "v": 234
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 3, "v": 1E+6
}'
)");
// JSON standard specifies that numbers
// in range [-2^53+1, 2^53-1] are interoperable
// meaning implementations will agree
// exactly on their numeric values. This range
// corresponds to a fact that double floating-point
// type has a 53-bit significand and converting
// from an integer in that range to double is non-lossy.
//
// This checks that precision is not lost
// for the largest possible value in that range
// (2^53-1).
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 4, "v": 9.007199254740991E+15
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 5, "v": 0.0e3
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 6, "v": -0.0e1
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 7, "v": -1.234E+3
}'
)");
require_rows(e, "SELECT pk, v FROM scientific_notation", {
{int32_type->decompose(1), long_type->decompose(int64_t(150))},
{int32_type->decompose(2), long_type->decompose(int64_t(234))},
{int32_type->decompose(3), long_type->decompose(int64_t(1000000))},
{int32_type->decompose(4), long_type->decompose(int64_t(9007199254740991))},
{int32_type->decompose(5), long_type->decompose(int64_t(0))},
{int32_type->decompose(6), long_type->decompose(int64_t(0))},
{int32_type->decompose(7), long_type->decompose(int64_t(-1234))},
});
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 8, "v": 12.34
}'
)").get(), marshal_exception);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 9, "v": 1e-1
}'
)").get(), marshal_exception);
// JSON specification disallows Inf, -Inf, NaN:
// "Numeric values that cannot be represented in the
// grammar below (such as Infinity and NaN) are not permitted."
//
// RapidJSON has a parsing flag: kParseNanAndInfFlag
// which allows it. Verify it's not used:
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 10, "v": +Inf
}'
)").get(), rjson::error);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 11, "v": -inf
}'
)").get(), rjson::error);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 12, "v": NaN
}'
)").get(), rjson::error);
});
}
SEASTAR_TEST_CASE(test_prepared_json) {
return do_with_cql_env_thread([] (cql_test_env& e) {
auto prepared = e.execute_cql(