From 85be9f55e8d7692ef4d17458d21049e565ba3680 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 15 Jul 2015 13:58:35 -0400 Subject: [PATCH 1/6] type_parser: find - and ignore strings in the form :type Collections can at times have the form :type. This is the case, for instance, for the strings that compose the comparator string. The actual hex number isn't terribly interesting: it is used as a key to hash the collection types, but since we hash them by their types anyway, we can safely ignore them. Signed-off-by: Glauber Costa --- db/marshal/type_parser.cc | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/db/marshal/type_parser.cc b/db/marshal/type_parser.cc index aeb9a0943a..7409443c42 100644 --- a/db/marshal/type_parser.cc +++ b/db/marshal/type_parser.cc @@ -27,6 +27,7 @@ #include "exceptions/exceptions.hh" #include +#include namespace db { @@ -85,6 +86,21 @@ data_type type_parser::parse() skip_blank(); sstring name = read_next_identifier(); + if (_str[_idx] == ':') { + _idx++; + try { + size_t pos; + std::stoul(name, &pos, 0x10); + if (pos != name.size()) { + throw exceptions::syntax_exception(sprint("expected 8-byte hex number, found %s", name)); + } + } catch (const std::invalid_argument & e) { + throw exceptions::syntax_exception(sprint("expected 8-byte hex number, found %s", name)); + } catch (const std::out_of_range& e) { + throw exceptions::syntax_exception(sprint("expected 8-byte hex number, found %s", name)); + } + name = read_next_identifier(); + } skip_blank(); if (!is_eos() && _str[_idx] == '(') From 31e22034f2f5d3fe6d8d154877e81dfa39e120e5 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 15 Jul 2015 16:21:25 -0400 Subject: [PATCH 2/6] type_parser: fix a bug with recursive parsing We currently have a bug when parsing collection types that contains collections themselves. We call the recursion correctly, but get_abstract_type gets its value by copy, not reference. Therefore, all work it does in the _idx manipulation is done in the copy, and when the callee returns, the caller, with its _idx unchanged, will try recursing again. Fix it by passing by reference Signed-off-by: Glauber Costa --- db/marshal/type_parser.cc | 2 +- db/marshal/type_parser.hh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/db/marshal/type_parser.cc b/db/marshal/type_parser.cc index 7409443c42..0cff14a5a0 100644 --- a/db/marshal/type_parser.cc +++ b/db/marshal/type_parser.cc @@ -155,7 +155,7 @@ data_type type_parser::get_abstract_type(const sstring& compare_with) return abstract_type::parse_type(class_name); } -data_type type_parser::get_abstract_type(const sstring& compare_with, type_parser parser) +data_type type_parser::get_abstract_type(const sstring& compare_with, type_parser& parser) { sstring class_name; if (compare_with.find('.') != sstring::npos) { diff --git a/db/marshal/type_parser.hh b/db/marshal/type_parser.hh index 8e924f20ef..7f94ed6881 100644 --- a/db/marshal/type_parser.hh +++ b/db/marshal/type_parser.hh @@ -265,7 +265,7 @@ public: static data_type get_abstract_type(const sstring& compare_with); - static data_type get_abstract_type(const sstring& compare_with, type_parser parser); + static data_type get_abstract_type(const sstring& compare_with, type_parser& parser); #if 0 private static AbstractType getRawAbstractType(Class> typeClass) throws ConfigurationException From 9b96a2441a87242ed75830ec65c1ada952225f62 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 15 Jul 2015 15:21:17 -0400 Subject: [PATCH 3/6] type parser: detect frozen types Note that the multicell attribute can't be part of the parse instance, because otherwise we would either freeze every subsequent element, or complicate the flow considerably to handle it. It is instead, passed as a parameter to get_instance_types(), which will then have to be propagated to parse() and get_abstract_type() Signed-off-by: Glauber Costa --- db/marshal/type_parser.cc | 28 +++++++++++++++++++--------- db/marshal/type_parser.hh | 6 +++--- 2 files changed, 22 insertions(+), 12 deletions(-) diff --git a/db/marshal/type_parser.cc b/db/marshal/type_parser.cc index 0cff14a5a0..ff235f077a 100644 --- a/db/marshal/type_parser.cc +++ b/db/marshal/type_parser.cc @@ -81,7 +81,11 @@ data_type type_parser::parse(const sstring& str) { return type; } -data_type type_parser::parse() +data_type type_parser::parse() { + return do_parse(true); +} + +data_type type_parser::do_parse(bool multicell) { skip_blank(); @@ -104,12 +108,12 @@ data_type type_parser::parse() skip_blank(); if (!is_eos() && _str[_idx] == '(') - return get_abstract_type(name, *this); + return get_abstract_type(name, *this, multicell); else return get_abstract_type(name); } -std::vector type_parser::get_type_parameters() +std::vector type_parser::get_type_parameters(bool multicell) { std::vector list; @@ -131,7 +135,7 @@ std::vector type_parser::get_type_parameters() } try { - list.emplace_back(parse()); + list.emplace_back(do_parse(multicell)); } catch (exceptions::syntax_exception e) { // FIXME #if 0 @@ -155,7 +159,7 @@ data_type type_parser::get_abstract_type(const sstring& compare_with) return abstract_type::parse_type(class_name); } -data_type type_parser::get_abstract_type(const sstring& compare_with, type_parser& parser) +data_type type_parser::get_abstract_type(const sstring& compare_with, type_parser& parser, bool multicell) { sstring class_name; if (compare_with.find('.') != sstring::npos) { @@ -163,24 +167,30 @@ data_type type_parser::get_abstract_type(const sstring& compare_with, type_parse } else { class_name = "org.apache.cassandra.db.marshal." + compare_with; } - if (class_name == "org.apache.cassandra.db.marshal.ListType") { + if (class_name == "org.apache.cassandra.db.marshal.FrozenType") { + auto l = parser.get_type_parameters(false); + if (l.size() != 1) { + throw exceptions::configuration_exception("FrozenType takes exactly 1 type parameter"); + } + return l[0]; + } else if (class_name == "org.apache.cassandra.db.marshal.ListType") { auto l = parser.get_type_parameters(); if (l.size() != 1) { throw exceptions::configuration_exception("ListType takes exactly 1 type parameter"); } - return list_type_impl::get_instance(l[0], true); + return list_type_impl::get_instance(l[0], multicell); } else if (class_name == "org.apache.cassandra.db.marshal.SetType") { auto l = parser.get_type_parameters(); if (l.size() != 1) { throw exceptions::configuration_exception("SetType takes exactly 1 type parameter"); } - return set_type_impl::get_instance(l[0], true); + return set_type_impl::get_instance(l[0], multicell); } else if (class_name == "org.apache.cassandra.db.marshal.MapType") { auto l = parser.get_type_parameters(); if (l.size() != 2) { throw exceptions::configuration_exception("MapType takes exactly 2 type parameters"); } - return map_type_impl::get_instance(l[0], l[1], true); + return map_type_impl::get_instance(l[0], l[1], multicell); } else { throw std::runtime_error("unknown type: " + class_name); } diff --git a/db/marshal/type_parser.hh b/db/marshal/type_parser.hh index 7f94ed6881..8f0f0b1c97 100644 --- a/db/marshal/type_parser.hh +++ b/db/marshal/type_parser.hh @@ -109,8 +109,8 @@ public: throw new SyntaxException(String.format("Syntax error parsing '%s' at char %d: unexpected end of string", str, idx)); } #endif - - std::vector get_type_parameters(); + std::vector get_type_parameters(bool multicell=true); + data_type do_parse(bool multicell = true); #if 0 public Map> getAliasParameters() throws SyntaxException, ConfigurationException @@ -265,7 +265,7 @@ public: static data_type get_abstract_type(const sstring& compare_with); - static data_type get_abstract_type(const sstring& compare_with, type_parser& parser); + static data_type get_abstract_type(const sstring& compare_with, type_parser& parser, bool multicell = true); #if 0 private static AbstractType getRawAbstractType(Class> typeClass) throws ConfigurationException From 8b94cc86548170ca55e039560266cbefeede4cf3 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 15 Jul 2015 15:21:42 -0400 Subject: [PATCH 4/6] type_parser: don't duplicate parsing code. According to the comments, we are doing this for simplicity, to avoid creating a new type_parse object. However, while this approach works well for the simple case where we expect a single token, it won't work as the parser becomes more able to recognize other cases. Signed-off-by: Glauber Costa --- db/marshal/type_parser.cc | 37 +------------------------------------ 1 file changed, 1 insertion(+), 36 deletions(-) diff --git a/db/marshal/type_parser.cc b/db/marshal/type_parser.cc index ff235f077a..61e42f5100 100644 --- a/db/marshal/type_parser.cc +++ b/db/marshal/type_parser.cc @@ -43,42 +43,7 @@ type_parser::type_parser(const sstring& str) { } data_type type_parser::parse(const sstring& str) { -#if 0 - if (str == null) - return BytesType.instance; - - AbstractType type = cache.get(str); - - if (type != null) - return type; -#endif - - // This could be simplier (i.e. new TypeParser(str).parse()) but we avoid creating a TypeParser object if not really necessary. - size_t i = 0; - i = skip_blank(str, i); - size_t j = i; - while (!is_eos(str, i) && is_identifier_char(str[i])) { - ++i; - } - if (i == j) { - return bytes_type; - } - sstring name = str.substr(j, i-j); - i = skip_blank(str, i); - - data_type type; - - if (!is_eos(str, i) && str[i] == '(') { - type = get_abstract_type(name, type_parser{str, i}); - } else { - type = get_abstract_type(name); - } - -#if 0 - // We don't really care about concurrency here. Worst case scenario, we do some parsing unnecessarily - cache.put(str, type); -#endif - return type; + return type_parser(str).parse(); } data_type type_parser::parse() { From e5dbbc61a26d04a6af9d10676aa33519f10c39ce Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Wed, 15 Jul 2015 15:36:02 -0400 Subject: [PATCH 5/6] type_parser: parse tuples Signed-off-by: Glauber Costa --- db/marshal/type_parser.cc | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/db/marshal/type_parser.cc b/db/marshal/type_parser.cc index 61e42f5100..5263b8527b 100644 --- a/db/marshal/type_parser.cc +++ b/db/marshal/type_parser.cc @@ -156,6 +156,12 @@ data_type type_parser::get_abstract_type(const sstring& compare_with, type_parse throw exceptions::configuration_exception("MapType takes exactly 2 type parameters"); } return map_type_impl::get_instance(l[0], l[1], multicell); + } else if (class_name == "org.apache.cassandra.db.marshal.TupleType") { + auto l = parser.get_type_parameters(); + if (l.size() == 0) { + throw exceptions::configuration_exception("TupleType takes exactly at least 1 type parameter"); + } + return tuple_type_impl::get_instance(l); } else { throw std::runtime_error("unknown type: " + class_name); } From bfe7656cfcc6b0021449606787bb7404ab24da3c Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Tue, 14 Jul 2015 21:40:18 -0400 Subject: [PATCH 6/6] types_test: add test for collection parser Signed-off-by: Glauber Costa --- tests/urchin/types_test.cc | 76 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 76 insertions(+) diff --git a/tests/urchin/types_test.cc b/tests/urchin/types_test.cc index 2ee72ce593..488bc44e8e 100644 --- a/tests/urchin/types_test.cc +++ b/tests/urchin/types_test.cc @@ -11,6 +11,8 @@ #include #include "types.hh" #include "compound.hh" +#include "db/marshal/type_parser.hh" +#include "cql3/cql3_type.hh" using namespace std::literals::chrono_literals; @@ -320,3 +322,77 @@ BOOST_AUTO_TEST_CASE(test_uuid_type_validation) { uuid_type->validate(random.to_bytes()); test_validation_fails(uuid_type, from_hex("00")); } + +BOOST_AUTO_TEST_CASE(test_parse_bad_hex) { + auto parser = db::marshal::type_parser("636f6c75kd6h:org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.Int32Type)"); + BOOST_REQUIRE_THROW(parser.parse(), exceptions::syntax_exception); +} + +BOOST_AUTO_TEST_CASE(test_parse_long_hex) { + auto parser = db::marshal::type_parser("6636f6c756d6e636f6c756d6e36f6c756d6e:org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.Int32Type)"); + BOOST_REQUIRE_THROW(parser.parse(), exceptions::syntax_exception); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_list) { + auto parser = db::marshal::type_parser("636f6c756d6e:org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.Int32Type)"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "list"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_set) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.Int32Type)"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "set"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_map) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.MapType(org.apache.cassandra.db.marshal.Int32Type,org.apache.cassandra.db.marshal.Int32Type)"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "map"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_tuple) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.TupleType(org.apache.cassandra.db.marshal.Int32Type,org.apache.cassandra.db.marshal.Int32Type)"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "tuple"); +} + +BOOST_AUTO_TEST_CASE(test_parse_invalid_tuple) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.TupleType()"); + BOOST_REQUIRE_THROW(parser.parse(), exceptions::configuration_exception); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_frozen_set) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.FrozenType(org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.Int32Type))"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "frozen>"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_set_frozen_set) { + sstring frozen = "org.apache.cassandra.db.marshal.FrozenType(org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.Int32Type))"; + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.SetType(" + frozen + ")"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "set>>"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_set_frozen_set_set) { + sstring set_set = "org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.Int32Type))"; + sstring frozen = "org.apache.cassandra.db.marshal.FrozenType(" + set_set + ")"; + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.SetType(" + frozen + ")"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "set>>>"); +} + + +BOOST_AUTO_TEST_CASE(test_parse_invalid_type) { + auto parser = db::marshal::type_parser("636f6c756d6e:org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.Int32Type, org.apache.cassandra.db.marshal.UTF8Type)"); + BOOST_REQUIRE_THROW(parser.parse(), exceptions::configuration_exception); +} + +BOOST_AUTO_TEST_CASE(test_parse_recursive_type) { + sstring key("org.apache.cassandra.db.marshal.Int32Type"); + sstring value("org.apache.cassandra.db.marshal.TupleType(org.apache.cassandra.db.marshal.Int32Type,org.apache.cassandra.db.marshal.Int32Type)"); + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.MapType(" + key + "," + value + ")"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "map>"); +}