diff --git a/cql3/type_json.cc b/cql3/type_json.cc index 20a76cd3ef..af92ec46b6 100644 --- a/cql3/type_json.cc +++ b/cql3/type_json.cc @@ -78,12 +78,46 @@ static int64_t to_int64_t(const rjson::value& value) { return value.GetInt(); } else if (value.IsUint()) { return value.GetUint(); - } else if (value.GetUint64()) { + } else if (value.IsUint64()) { return value.GetUint64(); //NOTICE: large uint64_t values will get overflown + } else if (value.IsDouble()) { + // We allow specifing integer constants + // using scientific notation (for example 1.3e8) + // and floating-point numbers ending with .0 (for example 12.0), + // but not floating-point numbers with fractional part (12.34). + // + // The reason is that JSON standard does not have separate + // types for integers and floating-point numbers, only + // a single "number" type. Some serializers may + // produce an integer in that floating-point format. + double double_value = value.GetDouble(); + + // Check if the value contains disallowed fractional part (.34 from 12.34). + // With RapidJSON and an integer value in range [-(2^53)+1, (2^53)-1], + // the fractional part will be zero as the entire value + // fits in 53-bit significand. RapidJSON's parsing code does not lose accuracy: + // when parsing a number like 12.34e8, it accumulates 1234 to a int64_t number, + // then converts it to double and multiples by power of 10, never having any + // digit in fractional part. + double integral; + double fractional = std::modf(double_value, &integral); + if (fractional != 0.0 && fractional != -0.0) { + throw marshal_exception(format("Incorrect JSON floating-point value " + "for int64 type: {} (it should not contain fractional part {})", value, fractional)); + } + + return double_value; } throw marshal_exception(format("Incorrect JSON value for int64 type: {}", value)); } +static std::string_view validated_to_string_view(const rjson::value& v, const char* type_name) { + if (!v.IsString()) { + throw marshal_exception(format("{} must be represented as string in JSON, instead got '{}'", type_name, v)); + } + return rjson::to_string_view(v); +} + static bytes from_json_object_aux(const map_type_impl& t, const rjson::value& value, cql_serialization_format sf) { if (!value.IsObject()) { throw marshal_exception("map_type must be represented as JSON Object"); @@ -185,11 +219,8 @@ struct from_json_object_visitor { } bytes operator()(const string_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } bytes operator()(const bytes_type_impl& t) { - if (!value.IsString()) { - throw marshal_exception("bytes_type must be represented as string"); - } - std::string_view string_v = rjson::to_string_view(value); - if (string_v.size() < 2 && string_v[0] != '0' && string_v[1] != 'x') { + std::string_view string_v = validated_to_string_view(value, "bytes_type"); + if (string_v.size() < 2 || string_v[0] != '0' || string_v[1] != 'x') { throw marshal_exception("Blob JSON strings must start with 0x"); } string_v.remove_prefix(2); @@ -210,16 +241,11 @@ struct from_json_object_visitor { } return t.from_string(rjson::to_string_view(value)); } - bytes operator()(const timeuuid_type_impl& t) { - if (!value.IsString()) { - throw marshal_exception(format("{} must be represented as string in JSON", value)); - } - return t.from_string(rjson::to_string_view(value)); - } - bytes operator()(const simple_date_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } - bytes operator()(const time_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } - bytes operator()(const uuid_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } - bytes operator()(const inet_addr_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } + bytes operator()(const timeuuid_type_impl& t) { return t.from_string(validated_to_string_view(value, "timeuuid_type")); } + bytes operator()(const simple_date_type_impl& t) { return t.from_string(validated_to_string_view(value, "simple_date_type")); } + bytes operator()(const time_type_impl& t) { return t.from_string(validated_to_string_view(value, "time_type")); } + bytes operator()(const uuid_type_impl& t) { return t.from_string(validated_to_string_view(value, "uuid_type")); } + bytes operator()(const inet_addr_type_impl& t) { return t.from_string(validated_to_string_view(value, "inet_addr_type")); } template bytes operator()(const floating_type_impl& t) { if (value.IsString()) { return t.from_string(rjson::to_string_view(value)); @@ -257,12 +283,7 @@ struct from_json_object_visitor { } return counter_cell_view::total_value_type()->decompose(to_int64_t(value)); } - bytes operator()(const duration_type_impl& t) { - if (!value.IsString()) { - throw marshal_exception(format("{} must be represented as string in JSON", value)); - } - return t.from_string(rjson::to_string_view(value)); - } + bytes operator()(const duration_type_impl& t) { return t.from_string(validated_to_string_view(value, "duration_type")); } bytes operator()(const empty_type_impl& t) { return bytes(); } bytes operator()(const map_type_impl& t) { return from_json_object_aux(t, value, sf); } bytes operator()(const set_type_impl& t) { return from_json_object_aux(t, value, sf); } diff --git a/test/boost/json_cql_query_test.cc b/test/boost/json_cql_query_test.cc index 59b7f43646..cc0c864d50 100644 --- a/test/boost/json_cql_query_test.cc +++ b/test/boost/json_cql_query_test.cc @@ -26,6 +26,7 @@ #include "types/tuple.hh" #include "types/user.hh" #include "types/list.hh" +#include "utils/rjson.hh" using namespace std::literals::chrono_literals; @@ -295,6 +296,45 @@ SEASTAR_TEST_CASE(test_insert_json_types) { } }); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "c": "6" + }' + )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "c": "0392fa" + }' + )").get(), marshal_exception); + + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "\"G\"": 87654321 + }' + )").get(), marshal_exception); + + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "k": 123456 + }' + )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "k": "3157" + }' + )").get(), marshal_exception); + + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "r": 12.34 + }' + )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "s": 1234 + }' + )").get(), marshal_exception); + e.execute_cql("CREATE TABLE multi_column_pk_table (p1 int, p2 int, p3 int, c1 int, c2 int, v int, PRIMARY KEY((p1, p2, p3), c1, c2));").get(); e.require_table_exists("ks", "multi_column_pk_table").get(); @@ -412,6 +452,115 @@ SEASTAR_TEST_CASE(test_insert_json_null_frozen_collections) { }); } +SEASTAR_TEST_CASE(test_insert_json_integer_in_scientific_notation) { + return do_with_cql_env_thread([] (cql_test_env& e) { + // Verify that our JSON parsing supports + // inserting numbers like 1.23e+7 to an integer + // column (int, bigint, etc.). Numbers that contain + // a fractional part (12.34) are disallowed. Note + // that this behavior differs from Cassandra, which + // disallows all those types (1.23e+7, 12.34). + + cquery_nofail(e, + "CREATE TABLE scientific_notation (" + " pk int primary key," + " v bigint," + ");"); + + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 1, "v": 150.0 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 2, "v": 234 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 3, "v": 1E+6 + }' + )"); + + // JSON standard specifies that numbers + // in range [-2^53+1, 2^53-1] are interoperable + // meaning implementations will agree + // exactly on their numeric values. This range + // corresponds to a fact that double floating-point + // type has a 53-bit significand and converting + // from an integer in that range to double is non-lossy. + // + // This checks that precision is not lost + // for the largest possible value in that range + // (2^53-1). + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 4, "v": 9.007199254740991E+15 + }' + )"); + + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 5, "v": 0.0e3 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 6, "v": -0.0e1 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 7, "v": -1.234E+3 + }' + )"); + + require_rows(e, "SELECT pk, v FROM scientific_notation", { + {int32_type->decompose(1), long_type->decompose(int64_t(150))}, + {int32_type->decompose(2), long_type->decompose(int64_t(234))}, + {int32_type->decompose(3), long_type->decompose(int64_t(1000000))}, + {int32_type->decompose(4), long_type->decompose(int64_t(9007199254740991))}, + {int32_type->decompose(5), long_type->decompose(int64_t(0))}, + {int32_type->decompose(6), long_type->decompose(int64_t(0))}, + {int32_type->decompose(7), long_type->decompose(int64_t(-1234))}, + }); + + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 8, "v": 12.34 + }' + )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 9, "v": 1e-1 + }' + )").get(), marshal_exception); + + // JSON specification disallows Inf, -Inf, NaN: + // "Numeric values that cannot be represented in the + // grammar below (such as Infinity and NaN) are not permitted." + // + // RapidJSON has a parsing flag: kParseNanAndInfFlag + // which allows it. Verify it's not used: + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 10, "v": +Inf + }' + )").get(), rjson::error); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 11, "v": -inf + }' + )").get(), rjson::error); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 12, "v": NaN + }' + )").get(), rjson::error); + }); +} + SEASTAR_TEST_CASE(test_prepared_json) { return do_with_cql_env_thread([] (cql_test_env& e) { auto prepared = e.execute_cql(