diff --git a/db/marshal/type_parser.cc b/db/marshal/type_parser.cc index aeb9a0943a..5263b8527b 100644 --- a/db/marshal/type_parser.cc +++ b/db/marshal/type_parser.cc @@ -27,6 +27,7 @@ #include "exceptions/exceptions.hh" #include +#include namespace db { @@ -42,58 +43,42 @@ type_parser::type_parser(const sstring& str) { } data_type type_parser::parse(const sstring& str) { -#if 0 - if (str == null) - return BytesType.instance; - - AbstractType type = cache.get(str); - - if (type != null) - return type; -#endif - - // This could be simplier (i.e. new TypeParser(str).parse()) but we avoid creating a TypeParser object if not really necessary. - size_t i = 0; - i = skip_blank(str, i); - size_t j = i; - while (!is_eos(str, i) && is_identifier_char(str[i])) { - ++i; - } - if (i == j) { - return bytes_type; - } - sstring name = str.substr(j, i-j); - i = skip_blank(str, i); - - data_type type; - - if (!is_eos(str, i) && str[i] == '(') { - type = get_abstract_type(name, type_parser{str, i}); - } else { - type = get_abstract_type(name); - } - -#if 0 - // We don't really care about concurrency here. Worst case scenario, we do some parsing unnecessarily - cache.put(str, type); -#endif - return type; + return type_parser(str).parse(); } -data_type type_parser::parse() +data_type type_parser::parse() { + return do_parse(true); +} + +data_type type_parser::do_parse(bool multicell) { skip_blank(); sstring name = read_next_identifier(); + if (_str[_idx] == ':') { + _idx++; + try { + size_t pos; + std::stoul(name, &pos, 0x10); + if (pos != name.size()) { + throw exceptions::syntax_exception(sprint("expected 8-byte hex number, found %s", name)); + } + } catch (const std::invalid_argument & e) { + throw exceptions::syntax_exception(sprint("expected 8-byte hex number, found %s", name)); + } catch (const std::out_of_range& e) { + throw exceptions::syntax_exception(sprint("expected 8-byte hex number, found %s", name)); + } + name = read_next_identifier(); + } skip_blank(); if (!is_eos() && _str[_idx] == '(') - return get_abstract_type(name, *this); + return get_abstract_type(name, *this, multicell); else return get_abstract_type(name); } -std::vector type_parser::get_type_parameters() +std::vector type_parser::get_type_parameters(bool multicell) { std::vector list; @@ -115,7 +100,7 @@ std::vector type_parser::get_type_parameters() } try { - list.emplace_back(parse()); + list.emplace_back(do_parse(multicell)); } catch (exceptions::syntax_exception e) { // FIXME #if 0 @@ -139,7 +124,7 @@ data_type type_parser::get_abstract_type(const sstring& compare_with) return abstract_type::parse_type(class_name); } -data_type type_parser::get_abstract_type(const sstring& compare_with, type_parser parser) +data_type type_parser::get_abstract_type(const sstring& compare_with, type_parser& parser, bool multicell) { sstring class_name; if (compare_with.find('.') != sstring::npos) { @@ -147,24 +132,36 @@ data_type type_parser::get_abstract_type(const sstring& compare_with, type_parse } else { class_name = "org.apache.cassandra.db.marshal." + compare_with; } - if (class_name == "org.apache.cassandra.db.marshal.ListType") { + if (class_name == "org.apache.cassandra.db.marshal.FrozenType") { + auto l = parser.get_type_parameters(false); + if (l.size() != 1) { + throw exceptions::configuration_exception("FrozenType takes exactly 1 type parameter"); + } + return l[0]; + } else if (class_name == "org.apache.cassandra.db.marshal.ListType") { auto l = parser.get_type_parameters(); if (l.size() != 1) { throw exceptions::configuration_exception("ListType takes exactly 1 type parameter"); } - return list_type_impl::get_instance(l[0], true); + return list_type_impl::get_instance(l[0], multicell); } else if (class_name == "org.apache.cassandra.db.marshal.SetType") { auto l = parser.get_type_parameters(); if (l.size() != 1) { throw exceptions::configuration_exception("SetType takes exactly 1 type parameter"); } - return set_type_impl::get_instance(l[0], true); + return set_type_impl::get_instance(l[0], multicell); } else if (class_name == "org.apache.cassandra.db.marshal.MapType") { auto l = parser.get_type_parameters(); if (l.size() != 2) { throw exceptions::configuration_exception("MapType takes exactly 2 type parameters"); } - return map_type_impl::get_instance(l[0], l[1], true); + return map_type_impl::get_instance(l[0], l[1], multicell); + } else if (class_name == "org.apache.cassandra.db.marshal.TupleType") { + auto l = parser.get_type_parameters(); + if (l.size() == 0) { + throw exceptions::configuration_exception("TupleType takes exactly at least 1 type parameter"); + } + return tuple_type_impl::get_instance(l); } else { throw std::runtime_error("unknown type: " + class_name); } diff --git a/db/marshal/type_parser.hh b/db/marshal/type_parser.hh index 8e924f20ef..8f0f0b1c97 100644 --- a/db/marshal/type_parser.hh +++ b/db/marshal/type_parser.hh @@ -109,8 +109,8 @@ public: throw new SyntaxException(String.format("Syntax error parsing '%s' at char %d: unexpected end of string", str, idx)); } #endif - - std::vector get_type_parameters(); + std::vector get_type_parameters(bool multicell=true); + data_type do_parse(bool multicell = true); #if 0 public Map> getAliasParameters() throws SyntaxException, ConfigurationException @@ -265,7 +265,7 @@ public: static data_type get_abstract_type(const sstring& compare_with); - static data_type get_abstract_type(const sstring& compare_with, type_parser parser); + static data_type get_abstract_type(const sstring& compare_with, type_parser& parser, bool multicell = true); #if 0 private static AbstractType getRawAbstractType(Class> typeClass) throws ConfigurationException diff --git a/tests/urchin/types_test.cc b/tests/urchin/types_test.cc index 2ee72ce593..488bc44e8e 100644 --- a/tests/urchin/types_test.cc +++ b/tests/urchin/types_test.cc @@ -11,6 +11,8 @@ #include #include "types.hh" #include "compound.hh" +#include "db/marshal/type_parser.hh" +#include "cql3/cql3_type.hh" using namespace std::literals::chrono_literals; @@ -320,3 +322,77 @@ BOOST_AUTO_TEST_CASE(test_uuid_type_validation) { uuid_type->validate(random.to_bytes()); test_validation_fails(uuid_type, from_hex("00")); } + +BOOST_AUTO_TEST_CASE(test_parse_bad_hex) { + auto parser = db::marshal::type_parser("636f6c75kd6h:org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.Int32Type)"); + BOOST_REQUIRE_THROW(parser.parse(), exceptions::syntax_exception); +} + +BOOST_AUTO_TEST_CASE(test_parse_long_hex) { + auto parser = db::marshal::type_parser("6636f6c756d6e636f6c756d6e36f6c756d6e:org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.Int32Type)"); + BOOST_REQUIRE_THROW(parser.parse(), exceptions::syntax_exception); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_list) { + auto parser = db::marshal::type_parser("636f6c756d6e:org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.Int32Type)"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "list"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_set) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.Int32Type)"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "set"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_map) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.MapType(org.apache.cassandra.db.marshal.Int32Type,org.apache.cassandra.db.marshal.Int32Type)"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "map"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_tuple) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.TupleType(org.apache.cassandra.db.marshal.Int32Type,org.apache.cassandra.db.marshal.Int32Type)"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "tuple"); +} + +BOOST_AUTO_TEST_CASE(test_parse_invalid_tuple) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.TupleType()"); + BOOST_REQUIRE_THROW(parser.parse(), exceptions::configuration_exception); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_frozen_set) { + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.FrozenType(org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.Int32Type))"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "frozen>"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_set_frozen_set) { + sstring frozen = "org.apache.cassandra.db.marshal.FrozenType(org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.Int32Type))"; + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.SetType(" + frozen + ")"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "set>>"); +} + +BOOST_AUTO_TEST_CASE(test_parse_valid_set_frozen_set_set) { + sstring set_set = "org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.SetType(org.apache.cassandra.db.marshal.Int32Type))"; + sstring frozen = "org.apache.cassandra.db.marshal.FrozenType(" + set_set + ")"; + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.SetType(" + frozen + ")"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "set>>>"); +} + + +BOOST_AUTO_TEST_CASE(test_parse_invalid_type) { + auto parser = db::marshal::type_parser("636f6c756d6e:org.apache.cassandra.db.marshal.ListType(org.apache.cassandra.db.marshal.Int32Type, org.apache.cassandra.db.marshal.UTF8Type)"); + BOOST_REQUIRE_THROW(parser.parse(), exceptions::configuration_exception); +} + +BOOST_AUTO_TEST_CASE(test_parse_recursive_type) { + sstring key("org.apache.cassandra.db.marshal.Int32Type"); + sstring value("org.apache.cassandra.db.marshal.TupleType(org.apache.cassandra.db.marshal.Int32Type,org.apache.cassandra.db.marshal.Int32Type)"); + auto parser = db::marshal::type_parser("org.apache.cassandra.db.marshal.MapType(" + key + "," + value + ")"); + auto type = parser.parse(); + BOOST_REQUIRE(type->as_cql3_type()->to_string() == "map>"); +}