In the current code, support for case-sensitive (quoted) user-defined type
names is broken. For example, a test doing:
CREATE TYPE "PHone" (country_code int, number text)
CREATE TABLE cf (pk blob, pn "PHone", PRIMARY KEY (pk))
Fails - the first line creates the type with the case-sensitive name PHone,
but the second line wrongly ends up looking for the lowercased name phone,
and fails with an exception "Unknown type ks.phone".
The problem is in cql3_type_name_impl. This class is used to convert a
type object into its proper CQL syntax - for example frozen<list<int>>.
The problem is that for a user-defined type, we forgot to quote its name
if not lowercase, and the result is wrong CQL; For example, a list of
PHone will be written as list<PHone> - but this is wrong because the CQL
parser, when it sees this expression, lowercases the unquoted type name
PHone and it becomes just phone. It should be list<"PHone">, not list<PHone>.
The solution is for cql3_type_name_impl to use for a user-defined type
its get_name_as_cql_string() method instead of get_name_as_string().
get_name_as_cql_string() is a new method which prints the name of the
user type as it should be in a CQL expression, i.e., quoted if necessary.
The bug in the above test was apparently caused when our code serialized
the type name to disk as the string PHone (without any quoting), and then
later deserialized it using the CQL type parser, which converted it into
a lowercase phone. With this patch, the type's name is serialized as
"PHone", with the quotes, and deserialized properly as the type PHone.
While the extra quotes may seem excessive, they are necessary for the
correct CQL type expression - remember that the type expression may be
significantly more complex, e.g., frozen<list<"PHone">> and all of this,
including the quotes, is necessary for our parser to be able to translate
this string back into a type object.
This patch may cause breakage to existing databases which used case-
sensitive user-defined types, but I argue that these use cases were
already broken (as demonstrated by this test) so we won't break anything
that actually worked before.
Fixes #5544
Signed-off-by: Nadav Har'El <nyh@scylladb.com>
Message-Id: <20200101160805.15847-1-nyh@scylladb.com>
80 lines
3.7 KiB
C++
80 lines
3.7 KiB
C++
/*
|
|
* Copyright (C) 2014 ScyllaDB
|
|
*/
|
|
|
|
/*
|
|
* This file is part of Scylla.
|
|
*
|
|
* Scylla is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Affero General Public License as published by
|
|
* the Free Software Foundation, either version 3 of the License, or
|
|
* (at your option) any later version.
|
|
*
|
|
* Scylla is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include "types.hh"
|
|
#include "types/tuple.hh"
|
|
|
|
class user_type_impl : public tuple_type_impl {
|
|
using intern = type_interning_helper<user_type_impl, sstring, bytes, std::vector<bytes>, std::vector<data_type>, bool>;
|
|
public:
|
|
const sstring _keyspace;
|
|
const bytes _name;
|
|
private:
|
|
const std::vector<bytes> _field_names;
|
|
const std::vector<sstring> _string_field_names;
|
|
const bool _is_multi_cell;
|
|
public:
|
|
using native_type = std::vector<data_value>;
|
|
user_type_impl(sstring keyspace, bytes name, std::vector<bytes> field_names, std::vector<data_type> field_types, bool is_multi_cell)
|
|
: tuple_type_impl(kind::user, make_name(keyspace, name, field_names, field_types, is_multi_cell), field_types, false /* don't freeze inner */)
|
|
, _keyspace(std::move(keyspace))
|
|
, _name(std::move(name))
|
|
, _field_names(std::move(field_names))
|
|
, _string_field_names(boost::copy_range<std::vector<sstring>>(_field_names | boost::adaptors::transformed(
|
|
[] (const bytes& field_name) { return utf8_type->to_string(field_name); })))
|
|
, _is_multi_cell(is_multi_cell) {
|
|
}
|
|
static shared_ptr<const user_type_impl> get_instance(sstring keyspace, bytes name,
|
|
std::vector<bytes> field_names, std::vector<data_type> field_types, bool multi_cell) {
|
|
return intern::get_instance(std::move(keyspace), std::move(name), std::move(field_names), std::move(field_types), multi_cell);
|
|
}
|
|
data_type field_type(size_t i) const { return type(i); }
|
|
const std::vector<data_type>& field_types() const { return _types; }
|
|
bytes_view field_name(size_t i) const { return _field_names[i]; }
|
|
sstring field_name_as_string(size_t i) const { return _string_field_names[i]; }
|
|
const std::vector<bytes>& field_names() const { return _field_names; }
|
|
const std::vector<sstring>& string_field_names() const { return _string_field_names; }
|
|
std::optional<size_t> idx_of_field(const bytes& name) const;
|
|
bool is_multi_cell() const { return _is_multi_cell; }
|
|
virtual data_type freeze() const override;
|
|
sstring get_name_as_string() const;
|
|
sstring get_name_as_cql_string() const;
|
|
|
|
private:
|
|
static sstring make_name(sstring keyspace,
|
|
bytes name,
|
|
std::vector<bytes> field_names,
|
|
std::vector<data_type> field_types,
|
|
bool is_multi_cell);
|
|
};
|
|
|
|
data_value make_user_value(data_type tuple_type, user_type_impl::native_type value);
|
|
|
|
constexpr size_t max_udt_fields = std::numeric_limits<int16_t>::max();
|
|
|
|
// The following two functions are used to translate field indices (used to identify fields inside non-frozen UDTs)
|
|
// from/to a serialized bytes representation to be stored in mutations and sstables.
|
|
// Refer to collection_mutation.hh for a detailed description on how the serialized indices are used inside mutations.
|
|
bytes serialize_field_index(size_t);
|
|
size_t deserialize_field_index(const bytes_view&);
|