/* * Copyright (C) 2015 ScyllaDB */ /* * This file is part of Scylla. * * Scylla is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by * the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * Scylla is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with Scylla. If not, see . */ #pragma once #include #include #include "compound.hh" #include "schema.hh" // // This header provides adaptors between the representation used by our compound_type<> // and representation used by Origin. // // For single-component keys the legacy representation is equivalent // to the only component's serialized form. For composite keys it the following // (See org.apache.cassandra.db.marshal.CompositeType): // // ::= ( )+ // ::= // ::= // ::= // // is component's value in serialized form. is always 0 for partition key. // // Given a representation serialized using @CompoundType, provides a view on the // representation of the same components as they would be serialized by Origin. // // The view is exposed in a form of a byte range. For example of use see to_legacy() function. template class legacy_compound_view { static_assert(!CompoundType::is_prefixable, "Legacy view not defined for prefixes"); CompoundType& _type; bytes_view _packed; public: legacy_compound_view(CompoundType& c, bytes_view packed) : _type(c) , _packed(packed) { } class iterator : public std::iterator { bool _singular; // Offset within virtual output space of a component. // // Offset: -2 -1 0 ... LEN-1 LEN // Field: [ length MSB ] [ length LSB ] [ VALUE ] [ EOC ] // int32_t _offset; typename CompoundType::iterator _i; public: struct end_tag {}; iterator(const legacy_compound_view& v) : _singular(v._type.is_singular()) , _offset(_singular ? 0 : -2) , _i(v._type.begin(v._packed)) { } iterator(const legacy_compound_view& v, end_tag) : _offset(-2) , _i(v._type.end(v._packed)) { } value_type operator*() const { int32_t component_size = _i->size(); if (_offset == -2) { return (component_size >> 8) & 0xff; } else if (_offset == -1) { return component_size & 0xff; } else if (_offset < component_size) { return (*_i)[_offset]; } else { // _offset == component_size return 0; // EOC field } } iterator& operator++() { auto component_size = (int32_t) _i->size(); if (_offset < component_size // When _singular, we skip the EOC byte. && (!_singular || _offset != (component_size - 1))) { ++_offset; } else { ++_i; _offset = -2; } return *this; } bool operator==(const iterator& other) const { return _offset == other._offset && other._i == _i; } bool operator!=(const iterator& other) const { return !(*this == other); } }; // A trichotomic comparator defined on @CompoundType representations which // orders them according to lexicographical ordering of their corresponding // legacy representations. // // tri_comparator(t)(k1, k2) // // ...is equivalent to: // // compare_unsigned(to_legacy(t, k1), to_legacy(t, k2)) // // ...but more efficient. // struct tri_comparator { const CompoundType& _type; tri_comparator(const CompoundType& type) : _type(type) { } // @k1 and @k2 must be serialized using @type, which was passed to the constructor. int operator()(bytes_view k1, bytes_view k2) const { if (_type.is_singular()) { return compare_unsigned(*_type.begin(k1), *_type.begin(k2)); } return lexicographical_tri_compare( _type.begin(k1), _type.end(k1), _type.begin(k2), _type.end(k2), [] (const bytes_view& c1, const bytes_view& c2) -> int { if (c1.size() != c2.size()) { return c1.size() < c2.size() ? -1 : 1; } return memcmp(c1.begin(), c2.begin(), c1.size()); }); } }; // Equivalent to std::distance(begin(), end()), but computes faster size_t size() const { if (_type.is_singular()) { return _type.begin(_packed)->size(); } size_t s = 0; for (auto&& component : _type.components(_packed)) { s += 2 /* length field */ + component.size() + 1 /* EOC */; } return s; } iterator begin() const { return iterator(*this); } iterator end() const { return iterator(*this, typename iterator::end_tag()); } }; // Converts compound_type<> representation to legacy representation // @packed is assumed to be serialized using supplied @type. template static inline bytes to_legacy(CompoundType& type, bytes_view packed) { legacy_compound_view lv(type, packed); bytes legacy_form(bytes::initialized_later(), lv.size()); std::copy(lv.begin(), lv.end(), legacy_form.begin()); return legacy_form; } class composite_view; // Represents a value serialized according to Origin's CompositeType. // If is_compound is true, then the value is one or more components encoded as: // // ::= ( )+ // ::= // ::= // ::= // // If false, then it encodes a single value, without a prefix length or a suffix EOC. class composite final { bytes _bytes; bool _is_compound; public: composite(bytes&& b, bool is_compound) : _bytes(std::move(b)) , _is_compound(is_compound) { } explicit composite(bytes&& b) : _bytes(std::move(b)) , _is_compound(true) { } composite() : _bytes() , _is_compound(true) { } using size_type = uint16_t; using eoc_type = int8_t; /* * The 'end-of-component' byte should always be 0 for actual column name. * However, it can set to 1 for query bounds. This allows to query for the * equivalent of 'give me the full range'. That is, if a slice query is: * start = <3><"foo".getBytes()><0> * end = <3><"foo".getBytes()><1> * then we'll return *all* the columns whose first component is "foo". * If for a component, the 'end-of-component' is != 0, there should not be any * following component. The end-of-component can also be -1 to allow * non-inclusive query. For instance: * end = <3><"foo".getBytes()><-1> * allows to query everything that is smaller than <3><"foo".getBytes()>, but * not <3><"foo".getBytes()> itself. */ enum class eoc : eoc_type { start = -1, none = 0, end = 1 }; using component = std::pair; using component_view = std::pair; private: template>::value>> static size_t size(Value& val) { return val.size(); } static size_t size(const data_value& val) { return val.serialized_size(); } template>::value>> static void write_value(Value&& val, bytes::iterator& out) { out = std::copy(val.begin(), val.end(), out); } static void write_value(const data_value& val, bytes::iterator& out) { val.serialize(out); } template static void serialize_value(RangeOfSerializedComponents&& values, bytes::iterator& out, bool is_compound) { if (!is_compound) { auto it = values.begin(); write_value(std::forward(*it), out); return; } for (auto&& val : values) { write(out, static_cast(size(val))); write_value(std::forward(val), out); // Range tombstones are not keys. For collections, only frozen // values can be keys. Therefore, for as long as it is safe to // assume that this code will be used to create keys, it is safe // to assume the trailing byte is always zero. write(out, eoc_type(eoc::none)); } } template static size_t serialized_size(RangeOfSerializedComponents&& values, bool is_compound) { size_t len = 0; auto it = values.begin(); if (it != values.end()) { // CQL3 uses a specific prefix (0xFFFF) to encode "static columns" // (CASSANDRA-6561). This does mean the maximum size of the first component of a // composite is 65534, not 65535 (or we wouldn't be able to detect if the first 2 // bytes is the static prefix or not). auto value_size = size(*it); if (value_size > static_cast(std::numeric_limits::max() - uint8_t(is_compound))) { throw std::runtime_error(sprint("First component size too large: %d > %d", value_size, std::numeric_limits::max() - is_compound)); } if (!is_compound) { return value_size; } len += sizeof(size_type) + value_size + sizeof(eoc_type); ++it; } for ( ; it != values.end(); ++it) { auto value_size = size(*it); if (value_size > std::numeric_limits::max()) { throw std::runtime_error(sprint("Component size too large: %d > %d", value_size, std::numeric_limits::max())); } len += sizeof(size_type) + value_size + sizeof(eoc_type); } return len; } public: template auto describe_type(Describer f) const { return f(const_cast(_bytes)); } // marker is ignored if !is_compound template static composite serialize_value(RangeOfSerializedComponents&& values, bool is_compound = true, eoc marker = eoc::none) { auto size = serialized_size(values, is_compound); bytes b(bytes::initialized_later(), size); auto i = b.begin(); serialize_value(std::forward(values), i, is_compound); if (is_compound && !b.empty()) { b.back() = eoc_type(marker); } return composite(std::move(b), is_compound); } template static composite serialize_static(const schema& s, RangeOfSerializedComponents&& values) { // FIXME: Optimize auto b = bytes(size_t(2), bytes::value_type(0xff)); std::vector sv(s.clustering_key_size()); b += composite::serialize_value(boost::range::join(sv, std::forward(values)), true).release_bytes(); return composite(std::move(b)); } static eoc to_eoc(int8_t eoc_byte) { return eoc_byte == 0 ? eoc::none : (eoc_byte < 0 ? eoc::start : eoc::end); } class iterator : public std::iterator { bytes_view _v; component_view _current; private: void read_current() { size_type len; { if (_v.empty()) { _v = bytes_view(nullptr, 0); return; } len = read_simple(_v); if (_v.size() < len) { throw marshal_exception(); } } auto value = bytes_view(_v.begin(), len); _v.remove_prefix(len); _current = component_view(std::move(value), to_eoc(read_simple(_v))); } public: struct end_iterator_tag {}; iterator(const bytes_view& v, bool is_compound, bool is_static) : _v(v) { if (is_static) { _v.remove_prefix(2); } if (is_compound) { read_current(); } else { _current = component_view(_v, eoc::none); _v.remove_prefix(_v.size()); } } iterator(end_iterator_tag) : _v(nullptr, 0) {} iterator& operator++() { read_current(); return *this; } iterator operator++(int) { iterator i(*this); ++(*this); return i; } const value_type& operator*() const { return _current; } const value_type* operator->() const { return &_current; } bool operator!=(const iterator& i) const { return _v.begin() != i._v.begin(); } bool operator==(const iterator& i) const { return _v.begin() == i._v.begin(); } }; iterator begin() const { return iterator(_bytes, _is_compound, is_static()); } iterator end() const { return iterator(iterator::end_iterator_tag()); } boost::iterator_range components() const & { return { begin(), end() }; } auto values() const & { return components() | boost::adaptors::transformed([](auto&& c) { return c.first; }); } std::vector components() const && { std::vector result; std::transform(begin(), end(), std::back_inserter(result), [](auto&& p) { return component(bytes(p.first.begin(), p.first.end()), p.second); }); return result; } std::vector values() const && { std::vector result; boost::copy(components() | boost::adaptors::transformed([](auto&& c) { return to_bytes(c.first); }), std::back_inserter(result)); return result; } const bytes& get_bytes() const { return _bytes; } bytes release_bytes() && { return std::move(_bytes); } size_t size() const { return _bytes.size(); } bool empty() const { return _bytes.empty(); } static bool is_static(bytes_view bytes, bool is_compound) { return is_compound && bytes.size() > 2 && (bytes[0] & bytes[1] & 0xff) == 0xff; } bool is_static() const { return is_static(_bytes, _is_compound); } bool is_compound() const { return _is_compound; } // The following factory functions assume this composite is a compound value. template static composite from_clustering_element(const schema& s, const ClusteringElement& ce) { return serialize_value(ce.components(s)); } static composite from_exploded(const std::vector& v, eoc marker = eoc::none) { if (v.size() == 0) { return composite(bytes(size_t(1), bytes::value_type(marker))); } return serialize_value(v, true, marker); } static composite static_prefix(const schema& s) { return serialize_static(s, std::vector()); } explicit operator bytes_view() const { return _bytes; } template friend inline std::ostream& operator<<(std::ostream& os, const std::pair& c) { return os << "{value=" << c.first << "; eoc=" << sprint("0x%02x", eoc_type(c.second) & 0xff) << "}"; } friend std::ostream& operator<<(std::ostream& os, const composite& v); struct tri_compare { const std::vector& _types; tri_compare(const std::vector& types) : _types(types) {} int operator()(const composite&, const composite&) const; int operator()(composite_view, composite_view) const; }; }; class composite_view final { bytes_view _bytes; bool _is_compound; public: composite_view(bytes_view b, bool is_compound = true) : _bytes(b) , _is_compound(is_compound) { } composite_view(const composite& c) : composite_view(static_cast(c), c.is_compound()) { } composite_view() : _bytes(nullptr, 0) , _is_compound(true) { } std::vector explode() const { if (!_is_compound) { return { to_bytes(_bytes) }; } std::vector ret; for (auto it = begin(), e = end(); it != e; ) { ret.push_back(to_bytes(it->first)); auto marker = it->second; ++it; if (it != e && marker != composite::eoc::none) { throw runtime_exception(sprint("non-zero component divider found (%d) mid", sprint("0x%02x", composite::eoc_type(marker) & 0xff))); } } return ret; } composite::iterator begin() const { return composite::iterator(_bytes, _is_compound, is_static()); } composite::iterator end() const { return composite::iterator(composite::iterator::end_iterator_tag()); } boost::iterator_range components() const { return { begin(), end() }; } composite::eoc last_eoc() const { if (!_is_compound || _bytes.empty()) { return composite::eoc::none; } bytes_view v(_bytes); v.remove_prefix(v.size() - 1); return composite::to_eoc(read_simple(v)); } auto values() const { return components() | boost::adaptors::transformed([](auto&& c) { return c.first; }); } size_t size() const { return _bytes.size(); } bool empty() const { return _bytes.empty(); } bool is_static() const { return composite::is_static(_bytes, _is_compound); } explicit operator bytes_view() const { return _bytes; } bool operator==(const composite_view& k) const { return k._bytes == _bytes && k._is_compound == _is_compound; } bool operator!=(const composite_view& k) const { return !(k == *this); } friend inline std::ostream& operator<<(std::ostream& os, composite_view v) { return os << "{" << ::join(", ", v.components()) << ", compound=" << v._is_compound << ", static=" << v.is_static() << "}"; } }; inline std::ostream& operator<<(std::ostream& os, const composite& v) { return os << composite_view(v); } inline int composite::tri_compare::operator()(const composite& v1, const composite& v2) const { return (*this)(composite_view(v1), composite_view(v2)); } inline int composite::tri_compare::operator()(composite_view v1, composite_view v2) const { // See org.apache.cassandra.db.composites.AbstractCType#compare if (v1.empty()) { return v2.empty() ? 0 : -1; } if (v2.empty()) { return 1; } if (v1.is_static() != v2.is_static()) { return v1.is_static() ? -1 : 1; } auto a_values = v1.components(); auto b_values = v2.components(); auto cmp = [&](const data_type& t, component_view c1, component_view c2) { // First by value, then by EOC auto r = t->compare(c1.first, c2.first); if (r) { return r; } return static_cast(c1.second) - static_cast(c2.second); }; return lexicographical_tri_compare(_types.begin(), _types.end(), a_values.begin(), a_values.end(), b_values.begin(), b_values.end(), cmp); }