diff --git a/cql3/type_json.cc b/cql3/type_json.cc index be8f983dda..af92ec46b6 100644 --- a/cql3/type_json.cc +++ b/cql3/type_json.cc @@ -78,8 +78,35 @@ static int64_t to_int64_t(const rjson::value& value) { return value.GetInt(); } else if (value.IsUint()) { return value.GetUint(); - } else if (value.GetUint64()) { + } else if (value.IsUint64()) { return value.GetUint64(); //NOTICE: large uint64_t values will get overflown + } else if (value.IsDouble()) { + // We allow specifing integer constants + // using scientific notation (for example 1.3e8) + // and floating-point numbers ending with .0 (for example 12.0), + // but not floating-point numbers with fractional part (12.34). + // + // The reason is that JSON standard does not have separate + // types for integers and floating-point numbers, only + // a single "number" type. Some serializers may + // produce an integer in that floating-point format. + double double_value = value.GetDouble(); + + // Check if the value contains disallowed fractional part (.34 from 12.34). + // With RapidJSON and an integer value in range [-(2^53)+1, (2^53)-1], + // the fractional part will be zero as the entire value + // fits in 53-bit significand. RapidJSON's parsing code does not lose accuracy: + // when parsing a number like 12.34e8, it accumulates 1234 to a int64_t number, + // then converts it to double and multiples by power of 10, never having any + // digit in fractional part. + double integral; + double fractional = std::modf(double_value, &integral); + if (fractional != 0.0 && fractional != -0.0) { + throw marshal_exception(format("Incorrect JSON floating-point value " + "for int64 type: {} (it should not contain fractional part {})", value, fractional)); + } + + return double_value; } throw marshal_exception(format("Incorrect JSON value for int64 type: {}", value)); } diff --git a/test/boost/json_cql_query_test.cc b/test/boost/json_cql_query_test.cc index 28534b2273..cc0c864d50 100644 --- a/test/boost/json_cql_query_test.cc +++ b/test/boost/json_cql_query_test.cc @@ -26,6 +26,7 @@ #include "types/tuple.hh" #include "types/user.hh" #include "types/list.hh" +#include "utils/rjson.hh" using namespace std::literals::chrono_literals; @@ -451,6 +452,115 @@ SEASTAR_TEST_CASE(test_insert_json_null_frozen_collections) { }); } +SEASTAR_TEST_CASE(test_insert_json_integer_in_scientific_notation) { + return do_with_cql_env_thread([] (cql_test_env& e) { + // Verify that our JSON parsing supports + // inserting numbers like 1.23e+7 to an integer + // column (int, bigint, etc.). Numbers that contain + // a fractional part (12.34) are disallowed. Note + // that this behavior differs from Cassandra, which + // disallows all those types (1.23e+7, 12.34). + + cquery_nofail(e, + "CREATE TABLE scientific_notation (" + " pk int primary key," + " v bigint," + ");"); + + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 1, "v": 150.0 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 2, "v": 234 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 3, "v": 1E+6 + }' + )"); + + // JSON standard specifies that numbers + // in range [-2^53+1, 2^53-1] are interoperable + // meaning implementations will agree + // exactly on their numeric values. This range + // corresponds to a fact that double floating-point + // type has a 53-bit significand and converting + // from an integer in that range to double is non-lossy. + // + // This checks that precision is not lost + // for the largest possible value in that range + // (2^53-1). + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 4, "v": 9.007199254740991E+15 + }' + )"); + + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 5, "v": 0.0e3 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 6, "v": -0.0e1 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 7, "v": -1.234E+3 + }' + )"); + + require_rows(e, "SELECT pk, v FROM scientific_notation", { + {int32_type->decompose(1), long_type->decompose(int64_t(150))}, + {int32_type->decompose(2), long_type->decompose(int64_t(234))}, + {int32_type->decompose(3), long_type->decompose(int64_t(1000000))}, + {int32_type->decompose(4), long_type->decompose(int64_t(9007199254740991))}, + {int32_type->decompose(5), long_type->decompose(int64_t(0))}, + {int32_type->decompose(6), long_type->decompose(int64_t(0))}, + {int32_type->decompose(7), long_type->decompose(int64_t(-1234))}, + }); + + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 8, "v": 12.34 + }' + )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 9, "v": 1e-1 + }' + )").get(), marshal_exception); + + // JSON specification disallows Inf, -Inf, NaN: + // "Numeric values that cannot be represented in the + // grammar below (such as Infinity and NaN) are not permitted." + // + // RapidJSON has a parsing flag: kParseNanAndInfFlag + // which allows it. Verify it's not used: + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 10, "v": +Inf + }' + )").get(), rjson::error); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 11, "v": -inf + }' + )").get(), rjson::error); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 12, "v": NaN + }' + )").get(), rjson::error); + }); +} + SEASTAR_TEST_CASE(test_prepared_json) { return do_with_cql_env_thread([] (cql_test_env& e) { auto prepared = e.execute_cql(