Merge 'Allow integers in scientific format in INSERT JSON ' from Piotr Grabowski

Add support for specifing integers in scientific format (for example 1.234e8) in INSERT JSON statement:

```
INSERT INTO table JSON '{"int_column": 1e7}';
```

Before the JSON parsing library was switched to RapidJSON from JsonCpp, this statement used to work correctly, because JsonCpp transparently casts double to integer value.

Inserting a floating-point number ending with .0 is allowed, as the fractional part is zero. Non-zero fractional part (for example 12.34) is disallowed. A new test is added to test all those behaviors.

This behavior differs from Cassandra, which disallows those types of numbers (1e7, 123.0 and 12.34), however some users rely on that behavior and JSON specification itself does not distinct between floating-point numbers and integer numbers (only a single "number" type is defined).

This PR also fixes two minor issues I noticed while looking at the code: wrong blob validation and missing `IsString()` checks that could result in assertion error.

Fixes #10100
Fixes #10114
Fixes #10115

Closes #10101

* github.com:scylladb/scylla:
  type_json: support integers in scientific format
  type_json: add missing IsString() checks
  type_json: fix wrong blob JSON validation
This commit is contained in:
Nadav Har'El
2022-02-22 14:54:32 +02:00
committed by Pavel Emelyanov
2 changed files with 192 additions and 22 deletions

View File

@@ -78,12 +78,46 @@ static int64_t to_int64_t(const rjson::value& value) {
return value.GetInt();
} else if (value.IsUint()) {
return value.GetUint();
} else if (value.GetUint64()) {
} else if (value.IsUint64()) {
return value.GetUint64(); //NOTICE: large uint64_t values will get overflown
} else if (value.IsDouble()) {
// We allow specifing integer constants
// using scientific notation (for example 1.3e8)
// and floating-point numbers ending with .0 (for example 12.0),
// but not floating-point numbers with fractional part (12.34).
//
// The reason is that JSON standard does not have separate
// types for integers and floating-point numbers, only
// a single "number" type. Some serializers may
// produce an integer in that floating-point format.
double double_value = value.GetDouble();
// Check if the value contains disallowed fractional part (.34 from 12.34).
// With RapidJSON and an integer value in range [-(2^53)+1, (2^53)-1],
// the fractional part will be zero as the entire value
// fits in 53-bit significand. RapidJSON's parsing code does not lose accuracy:
// when parsing a number like 12.34e8, it accumulates 1234 to a int64_t number,
// then converts it to double and multiples by power of 10, never having any
// digit in fractional part.
double integral;
double fractional = std::modf(double_value, &integral);
if (fractional != 0.0 && fractional != -0.0) {
throw marshal_exception(format("Incorrect JSON floating-point value "
"for int64 type: {} (it should not contain fractional part {})", value, fractional));
}
return double_value;
}
throw marshal_exception(format("Incorrect JSON value for int64 type: {}", value));
}
static std::string_view validated_to_string_view(const rjson::value& v, const char* type_name) {
if (!v.IsString()) {
throw marshal_exception(format("{} must be represented as string in JSON, instead got '{}'", type_name, v));
}
return rjson::to_string_view(v);
}
static bytes from_json_object_aux(const map_type_impl& t, const rjson::value& value, cql_serialization_format sf) {
if (!value.IsObject()) {
throw marshal_exception("map_type must be represented as JSON Object");
@@ -185,11 +219,8 @@ struct from_json_object_visitor {
}
bytes operator()(const string_type_impl& t) { return t.from_string(rjson::to_string_view(value)); }
bytes operator()(const bytes_type_impl& t) {
if (!value.IsString()) {
throw marshal_exception("bytes_type must be represented as string");
}
std::string_view string_v = rjson::to_string_view(value);
if (string_v.size() < 2 && string_v[0] != '0' && string_v[1] != 'x') {
std::string_view string_v = validated_to_string_view(value, "bytes_type");
if (string_v.size() < 2 || string_v[0] != '0' || string_v[1] != 'x') {
throw marshal_exception("Blob JSON strings must start with 0x");
}
string_v.remove_prefix(2);
@@ -210,16 +241,11 @@ struct from_json_object_visitor {
}
return t.from_string(rjson::to_string_view(value));
}
bytes operator()(const timeuuid_type_impl& t) {
if (!value.IsString()) {
throw marshal_exception(format("{} must be represented as string in JSON", value));
}
return t.from_string(rjson::to_string_view(value));
}
bytes operator()(const simple_date_type_impl& t) { return t.from_string(rjson::to_string_view(value)); }
bytes operator()(const time_type_impl& t) { return t.from_string(rjson::to_string_view(value)); }
bytes operator()(const uuid_type_impl& t) { return t.from_string(rjson::to_string_view(value)); }
bytes operator()(const inet_addr_type_impl& t) { return t.from_string(rjson::to_string_view(value)); }
bytes operator()(const timeuuid_type_impl& t) { return t.from_string(validated_to_string_view(value, "timeuuid_type")); }
bytes operator()(const simple_date_type_impl& t) { return t.from_string(validated_to_string_view(value, "simple_date_type")); }
bytes operator()(const time_type_impl& t) { return t.from_string(validated_to_string_view(value, "time_type")); }
bytes operator()(const uuid_type_impl& t) { return t.from_string(validated_to_string_view(value, "uuid_type")); }
bytes operator()(const inet_addr_type_impl& t) { return t.from_string(validated_to_string_view(value, "inet_addr_type")); }
template <typename T> bytes operator()(const floating_type_impl<T>& t) {
if (value.IsString()) {
return t.from_string(rjson::to_string_view(value));
@@ -257,12 +283,7 @@ struct from_json_object_visitor {
}
return counter_cell_view::total_value_type()->decompose(to_int64_t(value));
}
bytes operator()(const duration_type_impl& t) {
if (!value.IsString()) {
throw marshal_exception(format("{} must be represented as string in JSON", value));
}
return t.from_string(rjson::to_string_view(value));
}
bytes operator()(const duration_type_impl& t) { return t.from_string(validated_to_string_view(value, "duration_type")); }
bytes operator()(const empty_type_impl& t) { return bytes(); }
bytes operator()(const map_type_impl& t) { return from_json_object_aux(t, value, sf); }
bytes operator()(const set_type_impl& t) { return from_json_object_aux(t, value, sf); }

View File

@@ -26,6 +26,7 @@
#include "types/tuple.hh"
#include "types/user.hh"
#include "types/list.hh"
#include "utils/rjson.hh"
using namespace std::literals::chrono_literals;
@@ -295,6 +296,45 @@ SEASTAR_TEST_CASE(test_insert_json_types) {
}
});
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO all_types JSON '{
"a": "abc", "c": "6"
}'
)").get(), marshal_exception);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO all_types JSON '{
"a": "abc", "c": "0392fa"
}'
)").get(), marshal_exception);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO all_types JSON '{
"a": "abc", "\"G\"": 87654321
}'
)").get(), marshal_exception);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO all_types JSON '{
"a": "abc", "k": 123456
}'
)").get(), marshal_exception);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO all_types JSON '{
"a": "abc", "k": "3157"
}'
)").get(), marshal_exception);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO all_types JSON '{
"a": "abc", "r": 12.34
}'
)").get(), marshal_exception);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO all_types JSON '{
"a": "abc", "s": 1234
}'
)").get(), marshal_exception);
e.execute_cql("CREATE TABLE multi_column_pk_table (p1 int, p2 int, p3 int, c1 int, c2 int, v int, PRIMARY KEY((p1, p2, p3), c1, c2));").get();
e.require_table_exists("ks", "multi_column_pk_table").get();
@@ -412,6 +452,115 @@ SEASTAR_TEST_CASE(test_insert_json_null_frozen_collections) {
});
}
SEASTAR_TEST_CASE(test_insert_json_integer_in_scientific_notation) {
return do_with_cql_env_thread([] (cql_test_env& e) {
// Verify that our JSON parsing supports
// inserting numbers like 1.23e+7 to an integer
// column (int, bigint, etc.). Numbers that contain
// a fractional part (12.34) are disallowed. Note
// that this behavior differs from Cassandra, which
// disallows all those types (1.23e+7, 12.34).
cquery_nofail(e,
"CREATE TABLE scientific_notation ("
" pk int primary key,"
" v bigint,"
");");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 1, "v": 150.0
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 2, "v": 234
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 3, "v": 1E+6
}'
)");
// JSON standard specifies that numbers
// in range [-2^53+1, 2^53-1] are interoperable
// meaning implementations will agree
// exactly on their numeric values. This range
// corresponds to a fact that double floating-point
// type has a 53-bit significand and converting
// from an integer in that range to double is non-lossy.
//
// This checks that precision is not lost
// for the largest possible value in that range
// (2^53-1).
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 4, "v": 9.007199254740991E+15
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 5, "v": 0.0e3
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 6, "v": -0.0e1
}'
)");
cquery_nofail(e, R"(
INSERT INTO scientific_notation JSON '{
"pk": 7, "v": -1.234E+3
}'
)");
require_rows(e, "SELECT pk, v FROM scientific_notation", {
{int32_type->decompose(1), long_type->decompose(int64_t(150))},
{int32_type->decompose(2), long_type->decompose(int64_t(234))},
{int32_type->decompose(3), long_type->decompose(int64_t(1000000))},
{int32_type->decompose(4), long_type->decompose(int64_t(9007199254740991))},
{int32_type->decompose(5), long_type->decompose(int64_t(0))},
{int32_type->decompose(6), long_type->decompose(int64_t(0))},
{int32_type->decompose(7), long_type->decompose(int64_t(-1234))},
});
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 8, "v": 12.34
}'
)").get(), marshal_exception);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 9, "v": 1e-1
}'
)").get(), marshal_exception);
// JSON specification disallows Inf, -Inf, NaN:
// "Numeric values that cannot be represented in the
// grammar below (such as Infinity and NaN) are not permitted."
//
// RapidJSON has a parsing flag: kParseNanAndInfFlag
// which allows it. Verify it's not used:
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 10, "v": +Inf
}'
)").get(), rjson::error);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 11, "v": -inf
}'
)").get(), rjson::error);
BOOST_REQUIRE_THROW(e.execute_cql(R"(
INSERT INTO scientific_notation JSON '{
"pk": 12, "v": NaN
}'
)").get(), rjson::error);
});
}
SEASTAR_TEST_CASE(test_prepared_json) {
return do_with_cql_env_thread([] (cql_test_env& e) {
auto prepared = e.execute_cql(