From f8b67c9bd14785e581ede40b0b8d91ac71359276 Mon Sep 17 00:00:00 2001 From: Piotr Grabowski Date: Fri, 18 Feb 2022 12:23:32 +0100 Subject: [PATCH 1/3] type_json: fix wrong blob JSON validation Fixes wrong condition for validating whether a JSON string representing blob value is valid. Previously, strings such as "6" or "0392fa" would pass the validation, even though they are too short or don't start with "0x". Add those test cases to json_cql_query_test.cc. Fixes #10114 --- cql3/type_json.cc | 2 +- test/boost/json_cql_query_test.cc | 11 +++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/cql3/type_json.cc b/cql3/type_json.cc index 20a76cd3ef..1ab2438aef 100644 --- a/cql3/type_json.cc +++ b/cql3/type_json.cc @@ -189,7 +189,7 @@ struct from_json_object_visitor { throw marshal_exception("bytes_type must be represented as string"); } std::string_view string_v = rjson::to_string_view(value); - if (string_v.size() < 2 && string_v[0] != '0' && string_v[1] != 'x') { + if (string_v.size() < 2 || string_v[0] != '0' || string_v[1] != 'x') { throw marshal_exception("Blob JSON strings must start with 0x"); } string_v.remove_prefix(2); diff --git a/test/boost/json_cql_query_test.cc b/test/boost/json_cql_query_test.cc index 59b7f43646..59d11ddf78 100644 --- a/test/boost/json_cql_query_test.cc +++ b/test/boost/json_cql_query_test.cc @@ -295,6 +295,17 @@ SEASTAR_TEST_CASE(test_insert_json_types) { } }); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "c": "6" + }' + )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "c": "0392fa" + }' + )").get(), marshal_exception); + e.execute_cql("CREATE TABLE multi_column_pk_table (p1 int, p2 int, p3 int, c1 int, c2 int, v int, PRIMARY KEY((p1, p2, p3), c1, c2));").get(); e.require_table_exists("ks", "multi_column_pk_table").get(); From 649ab709369eb4bd9a5afcecf8b87136a1d2f347 Mon Sep 17 00:00:00 2001 From: Piotr Grabowski Date: Fri, 18 Feb 2022 15:06:53 +0100 Subject: [PATCH 2/3] type_json: add missing IsString() checks Add missing IsString() checks to parsing date, time, uuid and inet types by introducing validated_to_string_view function which checks whether the value is of string type and otherwise throws marshal_exception. Without this check, a malformed input to those types would result in nasty ServerError with RapidJSON assertion instead of marshal_exception with detail about the problem. Add new tests checking passing non-string values for those types. Fixes #10115 --- cql3/type_json.cc | 34 +++++++++++++------------------ test/boost/json_cql_query_test.cc | 28 +++++++++++++++++++++++++ 2 files changed, 42 insertions(+), 20 deletions(-) diff --git a/cql3/type_json.cc b/cql3/type_json.cc index 1ab2438aef..be8f983dda 100644 --- a/cql3/type_json.cc +++ b/cql3/type_json.cc @@ -84,6 +84,13 @@ static int64_t to_int64_t(const rjson::value& value) { throw marshal_exception(format("Incorrect JSON value for int64 type: {}", value)); } +static std::string_view validated_to_string_view(const rjson::value& v, const char* type_name) { + if (!v.IsString()) { + throw marshal_exception(format("{} must be represented as string in JSON, instead got '{}'", type_name, v)); + } + return rjson::to_string_view(v); +} + static bytes from_json_object_aux(const map_type_impl& t, const rjson::value& value, cql_serialization_format sf) { if (!value.IsObject()) { throw marshal_exception("map_type must be represented as JSON Object"); @@ -185,10 +192,7 @@ struct from_json_object_visitor { } bytes operator()(const string_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } bytes operator()(const bytes_type_impl& t) { - if (!value.IsString()) { - throw marshal_exception("bytes_type must be represented as string"); - } - std::string_view string_v = rjson::to_string_view(value); + std::string_view string_v = validated_to_string_view(value, "bytes_type"); if (string_v.size() < 2 || string_v[0] != '0' || string_v[1] != 'x') { throw marshal_exception("Blob JSON strings must start with 0x"); } @@ -210,16 +214,11 @@ struct from_json_object_visitor { } return t.from_string(rjson::to_string_view(value)); } - bytes operator()(const timeuuid_type_impl& t) { - if (!value.IsString()) { - throw marshal_exception(format("{} must be represented as string in JSON", value)); - } - return t.from_string(rjson::to_string_view(value)); - } - bytes operator()(const simple_date_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } - bytes operator()(const time_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } - bytes operator()(const uuid_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } - bytes operator()(const inet_addr_type_impl& t) { return t.from_string(rjson::to_string_view(value)); } + bytes operator()(const timeuuid_type_impl& t) { return t.from_string(validated_to_string_view(value, "timeuuid_type")); } + bytes operator()(const simple_date_type_impl& t) { return t.from_string(validated_to_string_view(value, "simple_date_type")); } + bytes operator()(const time_type_impl& t) { return t.from_string(validated_to_string_view(value, "time_type")); } + bytes operator()(const uuid_type_impl& t) { return t.from_string(validated_to_string_view(value, "uuid_type")); } + bytes operator()(const inet_addr_type_impl& t) { return t.from_string(validated_to_string_view(value, "inet_addr_type")); } template bytes operator()(const floating_type_impl& t) { if (value.IsString()) { return t.from_string(rjson::to_string_view(value)); @@ -257,12 +256,7 @@ struct from_json_object_visitor { } return counter_cell_view::total_value_type()->decompose(to_int64_t(value)); } - bytes operator()(const duration_type_impl& t) { - if (!value.IsString()) { - throw marshal_exception(format("{} must be represented as string in JSON", value)); - } - return t.from_string(rjson::to_string_view(value)); - } + bytes operator()(const duration_type_impl& t) { return t.from_string(validated_to_string_view(value, "duration_type")); } bytes operator()(const empty_type_impl& t) { return bytes(); } bytes operator()(const map_type_impl& t) { return from_json_object_aux(t, value, sf); } bytes operator()(const set_type_impl& t) { return from_json_object_aux(t, value, sf); } diff --git a/test/boost/json_cql_query_test.cc b/test/boost/json_cql_query_test.cc index 59d11ddf78..28534b2273 100644 --- a/test/boost/json_cql_query_test.cc +++ b/test/boost/json_cql_query_test.cc @@ -306,6 +306,34 @@ SEASTAR_TEST_CASE(test_insert_json_types) { }' )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "\"G\"": 87654321 + }' + )").get(), marshal_exception); + + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "k": 123456 + }' + )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "k": "3157" + }' + )").get(), marshal_exception); + + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "r": 12.34 + }' + )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO all_types JSON '{ + "a": "abc", "s": 1234 + }' + )").get(), marshal_exception); + e.execute_cql("CREATE TABLE multi_column_pk_table (p1 int, p2 int, p3 int, c1 int, c2 int, v int, PRIMARY KEY((p1, p2, p3), c1, c2));").get(); e.require_table_exists("ks", "multi_column_pk_table").get(); From efe7456f0a0ef0f0558cd0d3c09b16ac059ce8db Mon Sep 17 00:00:00 2001 From: Piotr Grabowski Date: Fri, 18 Feb 2022 12:22:25 +0100 Subject: [PATCH 3/3] type_json: support integers in scientific format Add support for specifing integers in scientific format (for example 1.234e8) in INSERT JSON statement: INSERT INTO table JSON '{"int_column": 1e7}'; Inserting a floating-point number ending with .0 is allowed, as the fractional part is zero. Non-zero fractional part (for example 12.34) is disallowed. A new test is added to test all those behaviors. Before the JSON parsing library was switched to RapidJSON from JsonCpp, this statement used to work correctly, because JsonCpp transparently casts double to integer value. This behavior differs from Cassandra, which disallows those types of numbers (1e7, 123.0 and 12.34). Fix typo in if condition: "if (value.GetUint64())" to "if (value.IsUint64())". Fixes #10100 --- cql3/type_json.cc | 29 +++++++- test/boost/json_cql_query_test.cc | 110 ++++++++++++++++++++++++++++++ 2 files changed, 138 insertions(+), 1 deletion(-) diff --git a/cql3/type_json.cc b/cql3/type_json.cc index be8f983dda..af92ec46b6 100644 --- a/cql3/type_json.cc +++ b/cql3/type_json.cc @@ -78,8 +78,35 @@ static int64_t to_int64_t(const rjson::value& value) { return value.GetInt(); } else if (value.IsUint()) { return value.GetUint(); - } else if (value.GetUint64()) { + } else if (value.IsUint64()) { return value.GetUint64(); //NOTICE: large uint64_t values will get overflown + } else if (value.IsDouble()) { + // We allow specifing integer constants + // using scientific notation (for example 1.3e8) + // and floating-point numbers ending with .0 (for example 12.0), + // but not floating-point numbers with fractional part (12.34). + // + // The reason is that JSON standard does not have separate + // types for integers and floating-point numbers, only + // a single "number" type. Some serializers may + // produce an integer in that floating-point format. + double double_value = value.GetDouble(); + + // Check if the value contains disallowed fractional part (.34 from 12.34). + // With RapidJSON and an integer value in range [-(2^53)+1, (2^53)-1], + // the fractional part will be zero as the entire value + // fits in 53-bit significand. RapidJSON's parsing code does not lose accuracy: + // when parsing a number like 12.34e8, it accumulates 1234 to a int64_t number, + // then converts it to double and multiples by power of 10, never having any + // digit in fractional part. + double integral; + double fractional = std::modf(double_value, &integral); + if (fractional != 0.0 && fractional != -0.0) { + throw marshal_exception(format("Incorrect JSON floating-point value " + "for int64 type: {} (it should not contain fractional part {})", value, fractional)); + } + + return double_value; } throw marshal_exception(format("Incorrect JSON value for int64 type: {}", value)); } diff --git a/test/boost/json_cql_query_test.cc b/test/boost/json_cql_query_test.cc index 28534b2273..cc0c864d50 100644 --- a/test/boost/json_cql_query_test.cc +++ b/test/boost/json_cql_query_test.cc @@ -26,6 +26,7 @@ #include "types/tuple.hh" #include "types/user.hh" #include "types/list.hh" +#include "utils/rjson.hh" using namespace std::literals::chrono_literals; @@ -451,6 +452,115 @@ SEASTAR_TEST_CASE(test_insert_json_null_frozen_collections) { }); } +SEASTAR_TEST_CASE(test_insert_json_integer_in_scientific_notation) { + return do_with_cql_env_thread([] (cql_test_env& e) { + // Verify that our JSON parsing supports + // inserting numbers like 1.23e+7 to an integer + // column (int, bigint, etc.). Numbers that contain + // a fractional part (12.34) are disallowed. Note + // that this behavior differs from Cassandra, which + // disallows all those types (1.23e+7, 12.34). + + cquery_nofail(e, + "CREATE TABLE scientific_notation (" + " pk int primary key," + " v bigint," + ");"); + + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 1, "v": 150.0 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 2, "v": 234 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 3, "v": 1E+6 + }' + )"); + + // JSON standard specifies that numbers + // in range [-2^53+1, 2^53-1] are interoperable + // meaning implementations will agree + // exactly on their numeric values. This range + // corresponds to a fact that double floating-point + // type has a 53-bit significand and converting + // from an integer in that range to double is non-lossy. + // + // This checks that precision is not lost + // for the largest possible value in that range + // (2^53-1). + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 4, "v": 9.007199254740991E+15 + }' + )"); + + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 5, "v": 0.0e3 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 6, "v": -0.0e1 + }' + )"); + cquery_nofail(e, R"( + INSERT INTO scientific_notation JSON '{ + "pk": 7, "v": -1.234E+3 + }' + )"); + + require_rows(e, "SELECT pk, v FROM scientific_notation", { + {int32_type->decompose(1), long_type->decompose(int64_t(150))}, + {int32_type->decompose(2), long_type->decompose(int64_t(234))}, + {int32_type->decompose(3), long_type->decompose(int64_t(1000000))}, + {int32_type->decompose(4), long_type->decompose(int64_t(9007199254740991))}, + {int32_type->decompose(5), long_type->decompose(int64_t(0))}, + {int32_type->decompose(6), long_type->decompose(int64_t(0))}, + {int32_type->decompose(7), long_type->decompose(int64_t(-1234))}, + }); + + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 8, "v": 12.34 + }' + )").get(), marshal_exception); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 9, "v": 1e-1 + }' + )").get(), marshal_exception); + + // JSON specification disallows Inf, -Inf, NaN: + // "Numeric values that cannot be represented in the + // grammar below (such as Infinity and NaN) are not permitted." + // + // RapidJSON has a parsing flag: kParseNanAndInfFlag + // which allows it. Verify it's not used: + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 10, "v": +Inf + }' + )").get(), rjson::error); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 11, "v": -inf + }' + )").get(), rjson::error); + BOOST_REQUIRE_THROW(e.execute_cql(R"( + INSERT INTO scientific_notation JSON '{ + "pk": 12, "v": NaN + }' + )").get(), rjson::error); + }); +} + SEASTAR_TEST_CASE(test_prepared_json) { return do_with_cql_env_thread([] (cql_test_env& e) { auto prepared = e.execute_cql(