/* * Copyright (C) 2014 Cloudius Systems, Ltd. */ #ifndef DATABASE_HH_ #define DATABASE_HH_ #include "dht/i_partitioner.hh" #include "core/sstring.hh" #include "core/shared_ptr.hh" #include "net/byteorder.hh" #include "utils/UUID.hh" #include "db_clock.hh" #include "gc_clock.hh" #include #include #include #include #include #include #include #include #include #include #include #include "types.hh" #include "tuple.hh" #include "core/future.hh" #include "cql3/column_specification.hh" #include #include #include "schema.hh" #include "timestamp.hh" #include "tombstone.hh" #include "atomic_cell.hh" #include "query.hh" #include "keys.hh" #include #include #include "sstables/sstables.hh" using row = std::map; struct deletable_row final { tombstone t; row cells; void apply(tombstone t_) { t.apply(t_); } }; class row_tombstones_entry : public boost::intrusive::set_base_hook<> { clustering_key_prefix _prefix; tombstone _t; public: row_tombstones_entry(clustering_key_prefix&& prefix, tombstone t) : _prefix(std::move(prefix)) , _t(std::move(t)) { } clustering_key_prefix& prefix() { return _prefix; } const clustering_key_prefix& prefix() const { return _prefix; } tombstone& t() { return _t; } const tombstone& t() const { return _t; } void apply(tombstone t) { _t.apply(t); } struct compare { clustering_key_prefix::less_compare _c; compare(const schema& s) : _c(s) {} bool operator()(const row_tombstones_entry& e1, const row_tombstones_entry& e2) const { return _c(e1._prefix, e2._prefix); } bool operator()(const clustering_key_prefix& prefix, const row_tombstones_entry& e) const { return _c(prefix, e._prefix); } bool operator()(const row_tombstones_entry& e, const clustering_key_prefix& prefix) const { return _c(e._prefix, prefix); } }; template struct delegating_compare { Comparator _c; delegating_compare(Comparator&& c) : _c(std::move(c)) {} template bool operator()(const Comparable& prefix, const row_tombstones_entry& e) const { return _c(prefix, e._prefix); } template bool operator()(const row_tombstones_entry& e, const Comparable& prefix) const { return _c(e._prefix, prefix); } }; template static auto key_comparator(Comparator&& c) { return delegating_compare(std::move(c)); } }; class rows_entry : public boost::intrusive::set_base_hook<> { clustering_key _key; deletable_row _row; public: rows_entry(clustering_key&& key) : _key(std::move(key)) { } rows_entry(const clustering_key& key) : _key(key) { } rows_entry(const rows_entry& e) : _key(e._key) , _row(e._row) { } clustering_key& key() { return _key; } const clustering_key& key() const { return _key; } deletable_row& row() { return _row; } const deletable_row& row() const { return _row; } void apply(tombstone t) { _row.apply(t); } struct compare { clustering_key::less_compare _c; compare(const schema& s) : _c(s) {} bool operator()(const rows_entry& e1, const rows_entry& e2) const { return _c(e1._key, e2._key); } bool operator()(const clustering_key& key, const rows_entry& e) const { return _c(key, e._key); } bool operator()(const rows_entry& e, const clustering_key& key) const { return _c(e._key, key); } }; template struct delegating_compare { Comparator _c; delegating_compare(Comparator&& c) : _c(std::move(c)) {} template bool operator()(const Comparable& v, const rows_entry& e) const { return _c(v, e._key); } template bool operator()(const rows_entry& e, const Comparable& v) const { return _c(e._key, v); } }; template static auto key_comparator(Comparator&& c) { return delegating_compare(std::move(c)); } }; class mutation_partition final { using rows_type = boost::intrusive::set>; private: tombstone _tombstone; row _static_row; rows_type _rows; // Contains only strict prefixes so that we don't have to lookup full keys // in both _row_tombstones and _rows. boost::intrusive::set> _row_tombstones; public: mutation_partition(schema_ptr s) : _rows(rows_entry::compare(*s)) , _row_tombstones(row_tombstones_entry::compare(*s)) { } mutation_partition(mutation_partition&&) = default; ~mutation_partition(); void apply(tombstone t) { _tombstone.apply(t); } void apply_delete(schema_ptr schema, const exploded_clustering_prefix& prefix, tombstone t); void apply_delete(schema_ptr schema, clustering_key&& key, tombstone t); // prefix must not be full void apply_row_tombstone(schema_ptr schema, clustering_key_prefix prefix, tombstone t); void apply(schema_ptr schema, const mutation_partition& p); row& static_row() { return _static_row; } row& clustered_row(const clustering_key& key); row* find_row(const clustering_key& key); rows_entry* find_entry(schema_ptr schema, const clustering_key_prefix& key); tombstone range_tombstone_for_row(const schema& schema, const clustering_key& key); tombstone tombstone_for_row(const schema& schema, const clustering_key& key); tombstone tombstone_for_row(const schema& schema, const rows_entry& e); friend std::ostream& operator<<(std::ostream& os, const mutation_partition& mp); boost::iterator_range range(const schema& schema, const query::range& r); }; class mutation final { public: schema_ptr schema; partition_key key; mutation_partition p; public: mutation(partition_key key_, schema_ptr schema_) : schema(std::move(schema_)) , key(std::move(key_)) , p(schema) { } mutation(mutation&&) = default; mutation(const mutation&) = default; void set_static_cell(const column_definition& def, atomic_cell_or_collection value) { update_column(p.static_row(), def, std::move(value)); } void set_clustered_cell(const exploded_clustering_prefix& prefix, const column_definition& def, atomic_cell_or_collection value) { auto& row = p.clustered_row(clustering_key::from_clustering_prefix(*schema, prefix)); update_column(row, def, std::move(value)); } void set_clustered_cell(const clustering_key& key, const column_definition& def, atomic_cell_or_collection value) { auto& row = p.clustered_row(key); update_column(row, def, std::move(value)); } void set_cell(const exploded_clustering_prefix& prefix, const bytes& name, const boost::any& value, api::timestamp_type timestamp, ttl_opt ttl = {}) { auto column_def = schema->get_column_definition(name); if (!column_def) { throw std::runtime_error(sprint("no column definition found for '%s'", name)); } return set_cell(prefix, *column_def, atomic_cell::make_live(timestamp, ttl, column_def->type->decompose(value))); } void set_cell(const exploded_clustering_prefix& prefix, const column_definition& def, atomic_cell_or_collection value) { if (def.is_static()) { set_static_cell(def, std::move(value)); } else if (def.is_regular()) { set_clustered_cell(prefix, def, std::move(value)); } else { throw std::runtime_error("attemting to store into a key cell"); } } auto get_cell(const clustering_key& rkey, const column_definition& def) -> std::experimental::optional { auto find_cell = [&def] (row& r) { auto i = r.find(def.id); if (i == r.end()) { return std::experimental::optional{}; } return std::experimental::optional{i->second}; }; if (def.is_static()) { return find_cell(p.static_row()); } else { auto r = p.find_row(rkey); if (!r) { return {}; } return find_cell(*r); } } private: static void update_column(row& row, const column_definition& def, atomic_cell_or_collection&& value) { // our mutations are not yet immutable auto id = def.id; auto i = row.lower_bound(id); if (i == row.end() || i->first != id) { row.emplace_hint(i, id, std::move(value)); } else { merge_column(def, i->second, value); } } friend std::ostream& operator<<(std::ostream& os, const mutation& m); }; struct column_family { column_family(schema_ptr schema); column_family(column_family&&) = default; mutation_partition& find_or_create_partition(const partition_key& key); row& find_or_create_row(const partition_key& partition_key, const clustering_key& clustering_key); mutation_partition* find_partition(const partition_key& key); row* find_row(const partition_key& partition_key, const clustering_key& clustering_key); schema_ptr _schema; // partition key -> partition std::map partitions; void apply(const mutation& m); // Returns at most "cmd.limit" rows future> query(const query::read_command& cmd); future<> populate(sstring datadir); private: // generation -> sstable. Ordered by key so we can easily get the most recent. std::map> _sstables; future<> probe_file(sstring sstdir, sstring fname); // Returns at most "limit" rows query::result::partition get_partition_slice(mutation_partition& partition, const query::partition_slice& slice, uint32_t limit); }; class keyspace { public: std::unordered_map column_families; future<> populate(sstring datadir); schema_ptr find_schema(const sstring& cf_name); column_family* find_column_family(const sstring& cf_name); }; // Policy for distributed: // broadcast metadata writes // local metadata reads // use shard_of() for data class database { public: database(); std::unordered_map keyspaces; future<> init_from_data_directory(sstring datadir); future<> populate(sstring datadir); keyspace* find_keyspace(const sstring& name); keyspace& find_or_create_keyspace(const sstring& name); schema_ptr find_schema(const sstring& ks_name, const sstring& cf_name); future<> stop() { return make_ready_future<>(); } void assign(database&& db) { *this = std::move(db); } unsigned shard_of(const dht::token& t); future> query(const query::read_command& cmd); }; // FIXME: stub class secondary_index_manager {}; #endif /* DATABASE_HH_ */