Compare commits
163 Commits
branch-0.1
...
branch-0.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
123eb04136 | ||
|
|
37dfe9cfc6 | ||
|
|
43d3c5f354 | ||
|
|
59cd785778 | ||
|
|
bb24d4324f | ||
|
|
eb7ae39f40 | ||
|
|
4281b48472 | ||
|
|
fef7375beb | ||
|
|
8c98e239d0 | ||
|
|
098136f4ab | ||
|
|
2086c651ba | ||
|
|
b43b5af894 | ||
|
|
95f515a6bd | ||
|
|
79e70568d7 | ||
|
|
934d3f06d1 | ||
|
|
c0ac7b3a73 | ||
|
|
657841922a | ||
|
|
fdc28a73f8 | ||
|
|
538de7222a | ||
|
|
bc23ebcbc3 | ||
|
|
91c2af2803 | ||
|
|
2891291ad1 | ||
|
|
3f0ca277e5 | ||
|
|
564cb2bcd1 | ||
|
|
d435ca7da6 | ||
|
|
a95a7294ef | ||
|
|
5e8249f062 | ||
|
|
cd215fc552 | ||
|
|
54b4f26cb0 | ||
|
|
dcb9b441ab | ||
|
|
d64db98943 | ||
|
|
d4d3a5b620 | ||
|
|
96d215168e | ||
|
|
f0cfa61968 | ||
|
|
9e0c498425 | ||
|
|
1812fe9e70 | ||
|
|
ae53604ed7 | ||
|
|
53e3e79349 | ||
|
|
871bfb1c94 | ||
|
|
b61ea247d2 | ||
|
|
0c66c25250 | ||
|
|
3004866f59 | ||
|
|
5c3951b28a | ||
|
|
7a6ad7aec2 | ||
|
|
e22972009b | ||
|
|
ad30cf0faf | ||
|
|
eb05dc680d | ||
|
|
0fe14e2b4b | ||
|
|
5a7f15ba49 | ||
|
|
7e79d35f85 | ||
|
|
61abc85eb3 | ||
|
|
88dcf2e935 | ||
|
|
f667e05e08 | ||
|
|
dc6f2157e7 | ||
|
|
3674ee2fc1 | ||
|
|
56df32ba56 | ||
|
|
59694a8e43 | ||
|
|
af91a8f31b | ||
|
|
2f071d9648 | ||
|
|
224db2ba37 | ||
|
|
51fcc48700 | ||
|
|
1b9e350614 | ||
|
|
f62a6f234b | ||
|
|
8c02ad0e9e | ||
|
|
b85f3ad130 | ||
|
|
5b9f3bff7d | ||
|
|
5484f25091 | ||
|
|
cf95c3f681 | ||
|
|
d8afc6014e | ||
|
|
3c9ded27cc | ||
|
|
616903de12 | ||
|
|
0e8f80b5ee | ||
|
|
2022117234 | ||
|
|
db70643fe3 | ||
|
|
aaca88a1e7 | ||
|
|
2c59e2f81f | ||
|
|
8d88ece896 | ||
|
|
a64fa3db03 | ||
|
|
879a4ad4d3 | ||
|
|
0af7fb5509 | ||
|
|
f80e3d7859 | ||
|
|
21882f5122 | ||
|
|
6b258f1247 | ||
|
|
aa2b11f21b | ||
|
|
80d1d4d161 | ||
|
|
8b19373536 | ||
|
|
df46542832 | ||
|
|
6f69d4b700 | ||
|
|
de75f3fa69 | ||
|
|
ab328ead3d | ||
|
|
32ac2ccc4a | ||
|
|
7c3e6c306b | ||
|
|
a3e3add28a | ||
|
|
a26ffefd53 | ||
|
|
2599b78583 | ||
|
|
da0e80a286 | ||
|
|
36b2de10ed | ||
|
|
ed9cd23a2d | ||
|
|
25bb889c2a | ||
|
|
ca8c4f3e77 | ||
|
|
ad358300a9 | ||
|
|
569d288891 | ||
|
|
96f40d535e | ||
|
|
a7c11d1e30 | ||
|
|
3a402db1be | ||
|
|
d03b452322 | ||
|
|
d9d8f87c1b | ||
|
|
cf7541020f | ||
|
|
c3f03d5c96 | ||
|
|
89f7f746cb | ||
|
|
65875124b7 | ||
|
|
273b8daeeb | ||
|
|
e6cf3e915f | ||
|
|
9396956955 | ||
|
|
aaecf5424c | ||
|
|
6b113a9a7a | ||
|
|
4a8c79ca0e | ||
|
|
3a9200db03 | ||
|
|
7ddf8963f5 | ||
|
|
33ef58c5c9 | ||
|
|
1e55aa38c1 | ||
|
|
644c226d58 | ||
|
|
bfe26ea208 | ||
|
|
730abbc421 | ||
|
|
2513d6ddbe | ||
|
|
6e72e78e0d | ||
|
|
505b3e4936 | ||
|
|
33e5097090 | ||
|
|
cb0b56f75f | ||
|
|
b1a0c4b451 | ||
|
|
d65adef10c | ||
|
|
262f44948d | ||
|
|
76b43fbf74 | ||
|
|
2fe2320490 | ||
|
|
23895ac7f5 | ||
|
|
ae9e0c3d41 | ||
|
|
0799251a9f | ||
|
|
23723991ed | ||
|
|
a5842642fa | ||
|
|
0f3ccc1143 | ||
|
|
0053394ec0 | ||
|
|
0ce2b7bc8d | ||
|
|
f06b72eb18 | ||
|
|
966e8c7144 | ||
|
|
83c8b3e433 | ||
|
|
53e842aaf7 | ||
|
|
0b91b643ba | ||
|
|
7257f72fbf | ||
|
|
58d3a3e138 | ||
|
|
0870caaea1 | ||
|
|
242e5ea291 | ||
|
|
3816e35d11 | ||
|
|
6390bc3121 | ||
|
|
3b52033371 | ||
|
|
374414ffd0 | ||
|
|
17f6dc3671 | ||
|
|
16cd5892f7 | ||
|
|
269ea7f81b | ||
|
|
bb1470f0d4 | ||
|
|
ba859acb3b | ||
|
|
f0a6c33b6d | ||
|
|
27737d702b | ||
|
|
66e428799f |
2
.gitmodules
vendored
2
.gitmodules
vendored
@@ -1,6 +1,6 @@
|
||||
[submodule "seastar"]
|
||||
path = seastar
|
||||
url = ../seastar
|
||||
url = ../scylla-seastar
|
||||
ignore = dirty
|
||||
[submodule "swagger-ui"]
|
||||
path = swagger-ui
|
||||
|
||||
76
ORIGIN
76
ORIGIN
@@ -1 +1,77 @@
|
||||
http://git-wip-us.apache.org/repos/asf/cassandra.git trunk (bf599fb5b062cbcc652da78b7d699e7a01b949ad)
|
||||
|
||||
import = bf599fb5b062cbcc652da78b7d699e7a01b949ad
|
||||
Y = Already in scylla
|
||||
|
||||
$ git log --oneline import..cassandra-2.1.11 -- gms/
|
||||
Y 484e645 Mark node as dead even if already left
|
||||
d0c166f Add trampled commit back
|
||||
ba5837e Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
718e47f Forgot a damn c/r
|
||||
a7282e4 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
Y ae4cd69 Print versions for gossip states in gossipinfo.
|
||||
Y 7fba3d2 Don't mark nodes down before the max local pause interval once paused.
|
||||
c2142e6 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
ba9a69e checkForEndpointCollision fails for legitimate collisions, finalized list of statuses and nits, CASSANDRA-9765
|
||||
54470a2 checkForEndpointCollision fails for legitimate collisions, improved version after CR, CASSANDRA-9765
|
||||
2c9b490 checkForEndpointCollision fails for legitimate collisions, CASSANDRA-9765
|
||||
4c15970 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
ad8047a ArrivalWindow should use primitives
|
||||
Y 4012134 Failure detector detects and ignores local pauses
|
||||
9bcdd0f Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
cefaa4e Close incoming connections when MessagingService is stopped
|
||||
ea1beda Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
08dbbd6 Ignore gossip SYNs after shutdown
|
||||
3c17ac6 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
a64bc43 lists work better when you initialize them
|
||||
543a899 change list to arraylist
|
||||
730d4d4 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
e3e2de0 change list to arraylist
|
||||
f7884c5 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
Y 84b2846 remove redundant state
|
||||
4f2c372 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
Y b2c62bb Add shutdown gossip state to prevent timeouts during rolling restarts
|
||||
Y def4835 Add missing follow on fix for 7816 only applied to cassandra-2.1 branch in 763130bdbde2f4cec2e8973bcd5203caf51cc89f
|
||||
Y 763130b Followup commit for 7816
|
||||
1376b8e Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
Y 2199a87 Fix duplicate up/down messages sent to native clients
|
||||
136042e Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
Y eb9c5bb Improve FD logging when the arrival time is ignored.
|
||||
|
||||
$ git log --oneline import..cassandra-2.1.11 -- service/StorageService.java
|
||||
92c5787 Keep StorageServiceMBean interface stable
|
||||
6039d0e Fix DC and Rack in nodetool info
|
||||
a2f0da0 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
c4de752 Follow-up to CASSANDRA-10238
|
||||
e889ee4 2i key cache load fails
|
||||
4b1d59e Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
257cdaa Fix consolidating racks violating the RF contract
|
||||
Y 27754c0 refuse to decomission if not in state NORMAL patch by Jan Karlsson and Stefania for CASSANDRA-8741
|
||||
Y 5bc56c3 refuse to decomission if not in state NORMAL patch by Jan Karlsson and Stefania for CASSANDRA-8741
|
||||
Y 8f9ca07 Cannot replace token does not exist - DN node removed as Fat Client
|
||||
c2142e6 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
54470a2 checkForEndpointCollision fails for legitimate collisions, improved version after CR, CASSANDRA-9765
|
||||
1eccced Handle corrupt files on startup
|
||||
2c9b490 checkForEndpointCollision fails for legitimate collisions, CASSANDRA-9765
|
||||
c4b5260 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
Y 52dbc3f Can't transition from write survey to normal mode
|
||||
9966419 Make rebuild only run one at a time
|
||||
d693ca1 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
be9eff5 Add option to not validate atoms during scrub
|
||||
2a4daaf followup fix for 8564
|
||||
93478ab Wait for anticompaction to finish
|
||||
9e9846e Fix for harmless exceptions being logged as ERROR
|
||||
6d06f32 Fix anticompaction blocking ANTI_ENTROPY stage
|
||||
4f2c372 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
Y b2c62bb Add shutdown gossip state to prevent timeouts during rolling restarts
|
||||
Y cba1b68 Fix failed bootstrap/replace attempts being persisted in system.peers
|
||||
f59df28 Allow takeColumnFamilySnapshot to take a list of tables patch by Sachin Jarin; reviewed by Nick Bailey for CASSANDRA-8348
|
||||
Y ac46747 Fix failed bootstrap/replace attempts being persisted in system.peers
|
||||
5abab57 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
0ff9c3c Allow reusing snapshot tags across different column families.
|
||||
f9c57a5 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
Y b296c55 Fix MOVED_NODE client event
|
||||
bbb3fc7 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
37eb2a0 Fix NPE in nodetool getendpoints with bad ks/cf
|
||||
f8b43d4 Merge branch 'cassandra-2.0' into cassandra-2.1
|
||||
e20810c Remove C* specific class from JMX API
|
||||
|
||||
12
README.md
12
README.md
@@ -82,3 +82,15 @@ Run the image with:
|
||||
```
|
||||
docker run -p $(hostname -i):9042:9042 -i -t <image name>
|
||||
```
|
||||
|
||||
|
||||
## Contributing to Scylla
|
||||
|
||||
Do not send pull requests.
|
||||
|
||||
Send patches to the mailing list address scylladb-dev@googlegroups.com.
|
||||
Be sure to subscribe.
|
||||
|
||||
In order for your patches to be merged, you must sign the Contributor's
|
||||
License Agreement, protecting your rights and ours. See
|
||||
http://www.scylladb.com/opensource/cla/.
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
VERSION=development
|
||||
VERSION=0.13.2
|
||||
|
||||
if test -f version
|
||||
then
|
||||
|
||||
@@ -579,30 +579,6 @@
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/column_family/sstables/snapshots_size/{name}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"the size of SSTables in 'snapshots' subdirectory which aren't live anymore",
|
||||
"type":"double",
|
||||
"nickname":"true_snapshots_size",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"name",
|
||||
"description":"The column family name in keysspace:name format",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"path"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/column_family/metrics/memtable_columns_count/{name}",
|
||||
"operations":[
|
||||
@@ -2041,7 +2017,7 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/column_family/metrics/true_snapshots_size/{name}",
|
||||
"path":"/column_family/metrics/snapshots_size/{name}",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
|
||||
@@ -15,7 +15,7 @@
|
||||
"summary":"get List of running compactions",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"jsonmap"
|
||||
"type":"summary"
|
||||
},
|
||||
"nickname":"get_compactions",
|
||||
"produces":[
|
||||
@@ -46,16 +46,16 @@
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/compaction_manager/compaction_summary",
|
||||
"path":"/compaction_manager/compaction_info",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"get compaction summary",
|
||||
"summary":"get a list of all active compaction info",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"string"
|
||||
"type":"compaction_info"
|
||||
},
|
||||
"nickname":"get_compaction_summary",
|
||||
"nickname":"get_compaction_info",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
@@ -174,30 +174,73 @@
|
||||
}
|
||||
],
|
||||
"models":{
|
||||
"mapper":{
|
||||
"id":"mapper",
|
||||
"description":"A key value mapping",
|
||||
"row_merged":{
|
||||
"id":"row_merged",
|
||||
"description":"A row merged information",
|
||||
"properties":{
|
||||
"key":{
|
||||
"type":"string",
|
||||
"description":"The key"
|
||||
"type":"int",
|
||||
"description":"The number of sstable"
|
||||
},
|
||||
"value":{
|
||||
"type":"string",
|
||||
"description":"The value"
|
||||
"type":"long",
|
||||
"description":"The number or row compacted"
|
||||
}
|
||||
}
|
||||
},
|
||||
"jsonmap":{
|
||||
"id":"jsonmap",
|
||||
"description":"A json representation of a map as a list of key value",
|
||||
"compaction_info" :{
|
||||
"id": "compaction_info",
|
||||
"description":"A key value mapping",
|
||||
"properties":{
|
||||
"operation_type":{
|
||||
"type":"string",
|
||||
"description":"The operation type"
|
||||
},
|
||||
"completed":{
|
||||
"type":"long",
|
||||
"description":"The current completed"
|
||||
},
|
||||
"total":{
|
||||
"type":"long",
|
||||
"description":"The total to compact"
|
||||
},
|
||||
"unit":{
|
||||
"type":"string",
|
||||
"description":"The compacted unit"
|
||||
}
|
||||
}
|
||||
},
|
||||
"summary":{
|
||||
"id":"summary",
|
||||
"description":"A compaction summary object",
|
||||
"properties":{
|
||||
"value":{
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"mapper"
|
||||
},
|
||||
"description":"A list of key, value mapping"
|
||||
"id":{
|
||||
"type":"string",
|
||||
"description":"The UUID"
|
||||
},
|
||||
"ks":{
|
||||
"type":"string",
|
||||
"description":"The keyspace name"
|
||||
},
|
||||
"cf":{
|
||||
"type":"string",
|
||||
"description":"The column family name"
|
||||
},
|
||||
"completed":{
|
||||
"type":"long",
|
||||
"description":"The number of units completed"
|
||||
},
|
||||
"total":{
|
||||
"type":"long",
|
||||
"description":"The total number of units"
|
||||
},
|
||||
"task_type":{
|
||||
"type":"string",
|
||||
"description":"The task compaction type"
|
||||
},
|
||||
"unit":{
|
||||
"type":"string",
|
||||
"description":"The units being used"
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -232,7 +275,7 @@
|
||||
"rows_merged":{
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"mapper"
|
||||
"type":"row_merged"
|
||||
},
|
||||
"description":"The merged rows"
|
||||
}
|
||||
|
||||
@@ -48,7 +48,10 @@
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get all endpoint states",
|
||||
"type":"string",
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"endpoint_state"
|
||||
},
|
||||
"nickname":"get_all_endpoint_states",
|
||||
"produces":[
|
||||
"application/json"
|
||||
@@ -148,6 +151,53 @@
|
||||
"description": "The value"
|
||||
}
|
||||
}
|
||||
},
|
||||
"endpoint_state": {
|
||||
"id": "states",
|
||||
"description": "Holds an endpoint state",
|
||||
"properties": {
|
||||
"addrs": {
|
||||
"type": "string",
|
||||
"description": "The endpoint address"
|
||||
},
|
||||
"generation": {
|
||||
"type": "int",
|
||||
"description": "The heart beat generation"
|
||||
},
|
||||
"version": {
|
||||
"type": "int",
|
||||
"description": "The heart beat version"
|
||||
},
|
||||
"update_time": {
|
||||
"type": "long",
|
||||
"description": "The update timestamp"
|
||||
},
|
||||
"is_alive": {
|
||||
"type": "boolean",
|
||||
"description": "Is the endpoint alive"
|
||||
},
|
||||
"application_state" : {
|
||||
"type":"array",
|
||||
"items":{
|
||||
"type":"version_value"
|
||||
},
|
||||
"description": "Is the endpoint alive"
|
||||
}
|
||||
}
|
||||
},
|
||||
"version_value": {
|
||||
"id": "version_value",
|
||||
"description": "Holds a version value for an application state",
|
||||
"properties": {
|
||||
"application_state": {
|
||||
"type": "int",
|
||||
"description": "The application state enum index"
|
||||
},
|
||||
"value": {
|
||||
"type": "string",
|
||||
"description": "The version value"
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -184,6 +184,30 @@
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"path":"/messaging_service/version",
|
||||
"operations":[
|
||||
{
|
||||
"method":"GET",
|
||||
"summary":"Get the version number",
|
||||
"type":"int",
|
||||
"nickname":"get_version",
|
||||
"produces":[
|
||||
"application/json"
|
||||
],
|
||||
"parameters":[
|
||||
{
|
||||
"name":"addr",
|
||||
"description":"Address",
|
||||
"required":true,
|
||||
"allowMultiple":false,
|
||||
"type":"string",
|
||||
"paramType":"query"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"models":{
|
||||
|
||||
41
api/api.hh
41
api/api.hh
@@ -128,47 +128,54 @@ inline double pow2(double a) {
|
||||
return a * a;
|
||||
}
|
||||
|
||||
inline httpd::utils_json::histogram add_histogram(httpd::utils_json::histogram res,
|
||||
// FIXME: Move to utils::ihistogram::operator+=()
|
||||
inline utils::ihistogram add_histogram(utils::ihistogram res,
|
||||
const utils::ihistogram& val) {
|
||||
if (!res.count._set) {
|
||||
res = val;
|
||||
return res;
|
||||
if (res.count == 0) {
|
||||
return val;
|
||||
}
|
||||
if (val.count == 0) {
|
||||
return res;
|
||||
return std::move(res);
|
||||
}
|
||||
if (res.min() > val.min) {
|
||||
if (res.min > val.min) {
|
||||
res.min = val.min;
|
||||
}
|
||||
if (res.max() < val.max) {
|
||||
if (res.max < val.max) {
|
||||
res.max = val.max;
|
||||
}
|
||||
double ncount = res.count() + val.count;
|
||||
double ncount = res.count + val.count;
|
||||
// To get an estimated sum we take the estimated mean
|
||||
// and multiply it by the true count
|
||||
res.sum = res.sum() + val.mean * val.count;
|
||||
double a = res.count()/ncount;
|
||||
res.sum = res.sum + val.mean * val.count;
|
||||
double a = res.count/ncount;
|
||||
double b = val.count/ncount;
|
||||
|
||||
double mean = a * res.mean() + b * val.mean;
|
||||
double mean = a * res.mean + b * val.mean;
|
||||
|
||||
res.variance = (res.variance() + pow2(res.mean() - mean) )* a +
|
||||
res.variance = (res.variance + pow2(res.mean - mean) )* a +
|
||||
(val.variance + pow2(val.mean -mean))* b;
|
||||
|
||||
res.mean = mean;
|
||||
res.count = res.count() + val.count;
|
||||
res.count = res.count + val.count;
|
||||
for (auto i : val.sample) {
|
||||
res.sample.push(i);
|
||||
res.sample.push_back(i);
|
||||
}
|
||||
return res;
|
||||
}
|
||||
|
||||
inline
|
||||
httpd::utils_json::histogram to_json(const utils::ihistogram& val) {
|
||||
httpd::utils_json::histogram h;
|
||||
h = val;
|
||||
return h;
|
||||
}
|
||||
|
||||
template<class T, class F>
|
||||
future<json::json_return_type> sum_histogram_stats(distributed<T>& d, utils::ihistogram F::*f) {
|
||||
|
||||
return d.map_reduce0([f](const T& p) {return p.get_stats().*f;}, httpd::utils_json::histogram(),
|
||||
add_histogram).then([](const httpd::utils_json::histogram& val) {
|
||||
return make_ready_future<json::json_return_type>(val);
|
||||
return d.map_reduce0([f](const T& p) {return p.get_stats().*f;}, utils::ihistogram(),
|
||||
add_histogram).then([](const utils::ihistogram& val) {
|
||||
return make_ready_future<json::json_return_type>(to_json(val));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -110,23 +110,25 @@ static future<json::json_return_type> get_cf_histogram(http_context& ctx, const
|
||||
utils::ihistogram column_family::stats::*f) {
|
||||
utils::UUID uuid = get_uuid(name, ctx.db.local());
|
||||
return ctx.db.map_reduce0([f, uuid](const database& p) {return p.find_column_family(uuid).get_stats().*f;},
|
||||
httpd::utils_json::histogram(),
|
||||
utils::ihistogram(),
|
||||
add_histogram)
|
||||
.then([](const httpd::utils_json::histogram& val) {
|
||||
return make_ready_future<json::json_return_type>(val);
|
||||
.then([](const utils::ihistogram& val) {
|
||||
return make_ready_future<json::json_return_type>(to_json(val));
|
||||
});
|
||||
}
|
||||
|
||||
static future<json::json_return_type> get_cf_histogram(http_context& ctx, utils::ihistogram column_family::stats::*f) {
|
||||
std::function<httpd::utils_json::histogram(const database&)> fun = [f] (const database& db) {
|
||||
httpd::utils_json::histogram res;
|
||||
std::function<utils::ihistogram(const database&)> fun = [f] (const database& db) {
|
||||
utils::ihistogram res;
|
||||
for (auto i : db.get_column_families()) {
|
||||
res = add_histogram(res, i.second->get_stats().*f);
|
||||
}
|
||||
return res;
|
||||
};
|
||||
return ctx.db.map(fun).then([](const std::vector<httpd::utils_json::histogram> &res) {
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
return ctx.db.map(fun).then([](const std::vector<utils::ihistogram> &res) {
|
||||
std::vector<httpd::utils_json::histogram> r;
|
||||
boost::copy(res | boost::adaptors::transformed(to_json), std::back_inserter(r));
|
||||
return make_ready_future<json::json_return_type>(r);
|
||||
});
|
||||
}
|
||||
|
||||
@@ -589,11 +591,16 @@ void set_column_family(http_context& ctx, routes& r) {
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
cf::get_true_snapshots_size.set(r, [] (std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
// FIXME
|
||||
//auto id = get_uuid(req->param["name"], ctx.db.local());
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
cf::get_true_snapshots_size.set(r, [&ctx] (std::unique_ptr<request> req) {
|
||||
auto uuid = get_uuid(req->param["name"], ctx.db.local());
|
||||
return ctx.db.local().find_column_family(uuid).get_snapshot_details().then([](
|
||||
const std::unordered_map<sstring, column_family::snapshot_details>& sd) {
|
||||
int64_t res = 0;
|
||||
for (auto i : sd) {
|
||||
res += i.second.total;
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
});
|
||||
|
||||
cf::get_all_true_snapshots_size.set(r, [] (std::unique_ptr<request> req) {
|
||||
|
||||
@@ -26,7 +26,7 @@ namespace api {
|
||||
|
||||
using namespace scollectd;
|
||||
namespace cm = httpd::compaction_manager_json;
|
||||
|
||||
using namespace json;
|
||||
|
||||
static future<json::json_return_type> get_cm_stats(http_context& ctx,
|
||||
int64_t compaction_manager::stats::*f) {
|
||||
@@ -40,27 +40,23 @@ static future<json::json_return_type> get_cm_stats(http_context& ctx,
|
||||
void set_compaction_manager(http_context& ctx, routes& r) {
|
||||
cm::get_compactions.set(r, [] (std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
std::vector<cm::jsonmap> map;
|
||||
// FIXME
|
||||
warn(unimplemented::cause::API);
|
||||
std::vector<cm::summary> map;
|
||||
return make_ready_future<json::json_return_type>(map);
|
||||
});
|
||||
|
||||
cm::get_compaction_summary.set(r, [] (std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
std::vector<sstring> res;
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
cm::force_user_defined_compaction.set(r, [] (std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
return make_ready_future<json::json_return_type>("");
|
||||
// FIXME
|
||||
warn(unimplemented::cause::API);
|
||||
return make_ready_future<json::json_return_type>(json_void());
|
||||
});
|
||||
|
||||
cm::stop_compaction.set(r, [] (std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
// FIXME
|
||||
warn(unimplemented::cause::API);
|
||||
return make_ready_future<json::json_return_type>("");
|
||||
});
|
||||
|
||||
@@ -81,17 +77,27 @@ void set_compaction_manager(http_context& ctx, routes& r) {
|
||||
|
||||
cm::get_bytes_compacted.set(r, [] (std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
// FIXME
|
||||
warn(unimplemented::cause::API);
|
||||
return make_ready_future<json::json_return_type>(0);
|
||||
});
|
||||
|
||||
cm::get_compaction_history.set(r, [] (std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
unimplemented();
|
||||
// FIXME
|
||||
warn(unimplemented::cause::API);
|
||||
std::vector<cm::history> res;
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
cm::get_compaction_info.set(r, [] (std::unique_ptr<request> req) {
|
||||
//TBD
|
||||
// FIXME
|
||||
warn(unimplemented::cause::API);
|
||||
std::vector<cm::compaction_info> res;
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -22,15 +22,33 @@
|
||||
#include "failure_detector.hh"
|
||||
#include "api/api-doc/failure_detector.json.hh"
|
||||
#include "gms/failure_detector.hh"
|
||||
#include "gms/application_state.hh"
|
||||
#include "gms/gossiper.hh"
|
||||
namespace api {
|
||||
|
||||
namespace fd = httpd::failure_detector_json;
|
||||
|
||||
void set_failure_detector(http_context& ctx, routes& r) {
|
||||
fd::get_all_endpoint_states.set(r, [](std::unique_ptr<request> req) {
|
||||
return gms::get_all_endpoint_states().then([](const sstring& str) {
|
||||
return make_ready_future<json::json_return_type>(str);
|
||||
});
|
||||
std::vector<fd::endpoint_state> res;
|
||||
for (auto i : gms::get_local_gossiper().endpoint_state_map) {
|
||||
fd::endpoint_state val;
|
||||
val.addrs = boost::lexical_cast<std::string>(i.first);
|
||||
val.is_alive = i.second.is_alive();
|
||||
val.generation = i.second.get_heart_beat_state().get_generation();
|
||||
val.version = i.second.get_heart_beat_state().get_heart_beat_version();
|
||||
val.update_time = i.second.get_update_timestamp().time_since_epoch().count();
|
||||
for (auto a : i.second.get_application_state_map()) {
|
||||
fd::version_value version_val;
|
||||
// We return the enum index and not it's name to stay compatible to origin
|
||||
// method that the state index are static but the name can be changed.
|
||||
version_val.application_state = static_cast<std::underlying_type<gms::application_state>::type>(a.first);
|
||||
version_val.value = a.second.value;
|
||||
val.application_state.push(version_val);
|
||||
}
|
||||
res.push_back(val);
|
||||
}
|
||||
return make_ready_future<json::json_return_type>(res);
|
||||
});
|
||||
|
||||
fd::get_up_endpoint_count.set(r, [](std::unique_ptr<request> req) {
|
||||
|
||||
@@ -119,6 +119,10 @@ void set_messaging_service(http_context& ctx, routes& r) {
|
||||
return c.sent_messages;
|
||||
}));
|
||||
|
||||
get_version.set(r, [](const_req req) {
|
||||
return net::get_local_messaging_service().get_raw_version(req.get_query_param("addr"));
|
||||
});
|
||||
|
||||
get_dropped_messages_by_ver.set(r, [](std::unique_ptr<request> req) {
|
||||
shared_ptr<std::vector<uint64_t>> map = make_shared<std::vector<uint64_t>>(num_verb, 0);
|
||||
|
||||
|
||||
@@ -33,8 +33,10 @@
|
||||
*
|
||||
*/
|
||||
class bytes_ostream {
|
||||
public:
|
||||
using size_type = bytes::size_type;
|
||||
using value_type = bytes::value_type;
|
||||
private:
|
||||
static_assert(sizeof(value_type) == 1, "value_type is assumed to be one byte long");
|
||||
struct chunk {
|
||||
// FIXME: group fragment pointers to reduce pointer chasing when packetizing
|
||||
@@ -117,13 +119,13 @@ private:
|
||||
};
|
||||
}
|
||||
public:
|
||||
bytes_ostream()
|
||||
bytes_ostream() noexcept
|
||||
: _begin()
|
||||
, _current(nullptr)
|
||||
, _size(0)
|
||||
{ }
|
||||
|
||||
bytes_ostream(bytes_ostream&& o)
|
||||
bytes_ostream(bytes_ostream&& o) noexcept
|
||||
: _begin(std::move(o._begin))
|
||||
, _current(o._current)
|
||||
, _size(o._size)
|
||||
@@ -148,7 +150,7 @@ public:
|
||||
return *this;
|
||||
}
|
||||
|
||||
bytes_ostream& operator=(bytes_ostream&& o) {
|
||||
bytes_ostream& operator=(bytes_ostream&& o) noexcept {
|
||||
_size = o._size;
|
||||
_begin = std::move(o._begin);
|
||||
_current = o._current;
|
||||
|
||||
@@ -856,7 +856,7 @@ dropIndexStatement returns [DropIndexStatement expr]
|
||||
* TRUNCATE <CF>;
|
||||
*/
|
||||
truncateStatement returns [::shared_ptr<truncate_statement> stmt]
|
||||
: K_TRUNCATE cf=columnFamilyName { $stmt = ::make_shared<truncate_statement>(cf); }
|
||||
: K_TRUNCATE (K_COLUMNFAMILY)? cf=columnFamilyName { $stmt = ::make_shared<truncate_statement>(cf); }
|
||||
;
|
||||
|
||||
#if 0
|
||||
|
||||
145
database.cc
145
database.cc
@@ -416,6 +416,23 @@ static std::vector<sstring> parse_fname(sstring filename) {
|
||||
return comps;
|
||||
}
|
||||
|
||||
static bool belongs_to_current_shard(const schema& s, const partition_key& first, const partition_key& last) {
|
||||
auto key_shard = [&s] (const partition_key& pk) {
|
||||
auto token = dht::global_partitioner().get_token(s, pk);
|
||||
return dht::shard_of(token);
|
||||
};
|
||||
auto s1 = key_shard(first);
|
||||
auto s2 = key_shard(last);
|
||||
auto me = engine().cpu_id();
|
||||
return (s1 <= me) && (me <= s2);
|
||||
}
|
||||
|
||||
static bool belongs_to_current_shard(const schema& s, range<partition_key> r) {
|
||||
assert(r.start());
|
||||
assert(r.end());
|
||||
return belongs_to_current_shard(s, r.start()->value(), r.end()->value());
|
||||
}
|
||||
|
||||
future<sstables::entry_descriptor> column_family::probe_file(sstring sstdir, sstring fname) {
|
||||
|
||||
using namespace sstables;
|
||||
@@ -432,12 +449,21 @@ future<sstables::entry_descriptor> column_family::probe_file(sstring sstdir, sst
|
||||
update_sstables_known_generation(comps.generation);
|
||||
assert(_sstables->count(comps.generation) == 0);
|
||||
|
||||
auto sst = std::make_unique<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
|
||||
auto fut = sst->load();
|
||||
return std::move(fut).then([this, sst = std::move(sst)] () mutable {
|
||||
add_sstable(std::move(*sst));
|
||||
return make_ready_future<>();
|
||||
}).then_wrapped([fname, comps = std::move(comps)] (future<> f) {
|
||||
auto fut = sstable::get_sstable_key_range(*_schema, _schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
|
||||
return std::move(fut).then([this, sstdir = std::move(sstdir), comps] (range<partition_key> r) {
|
||||
// Checks whether or not sstable belongs to current shard.
|
||||
if (!belongs_to_current_shard(*_schema, std::move(r))) {
|
||||
sstable::mark_sstable_for_deletion(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
auto sst = std::make_unique<sstables::sstable>(_schema->ks_name(), _schema->cf_name(), sstdir, comps.generation, comps.version, comps.format);
|
||||
auto fut = sst->load();
|
||||
return std::move(fut).then([this, sst = std::move(sst)] () mutable {
|
||||
add_sstable(std::move(*sst));
|
||||
return make_ready_future<>();
|
||||
});
|
||||
}).then_wrapped([fname, comps] (future<> f) {
|
||||
try {
|
||||
f.get();
|
||||
} catch (malformed_sstable_exception& e) {
|
||||
@@ -462,19 +488,6 @@ void column_family::add_sstable(sstables::sstable&& sstable) {
|
||||
}
|
||||
|
||||
void column_family::add_sstable(lw_shared_ptr<sstables::sstable> sstable) {
|
||||
auto key_shard = [this] (const partition_key& pk) {
|
||||
auto token = dht::global_partitioner().get_token(*_schema, pk);
|
||||
return dht::shard_of(token);
|
||||
};
|
||||
auto s1 = key_shard(sstable->get_first_partition_key(*_schema));
|
||||
auto s2 = key_shard(sstable->get_last_partition_key(*_schema));
|
||||
auto me = engine().cpu_id();
|
||||
auto included = (s1 <= me) && (me <= s2);
|
||||
if (!included) {
|
||||
dblog.info("sstable {} not relevant for this shard, ignoring", sstable->get_filename());
|
||||
sstable->mark_for_deletion();
|
||||
return;
|
||||
}
|
||||
auto generation = sstable->generation();
|
||||
// allow in-progress reads to continue using old list
|
||||
_sstables = make_lw_shared<sstable_list>(*_sstables);
|
||||
@@ -745,7 +758,11 @@ column_family::load_new_sstables(std::vector<sstables::entry_descriptor> new_tab
|
||||
return sst->load().then([this, sst] {
|
||||
return sst->mutate_sstable_level(0);
|
||||
}).then([this, sst] {
|
||||
this->add_sstable(sst);
|
||||
auto first = sst->get_first_partition_key(*_schema);
|
||||
auto last = sst->get_last_partition_key(*_schema);
|
||||
if (belongs_to_current_shard(*_schema, first, last)) {
|
||||
this->add_sstable(sst);
|
||||
}
|
||||
return make_ready_future<>();
|
||||
});
|
||||
});
|
||||
@@ -837,9 +854,10 @@ future<> column_family::populate(sstring sstdir) {
|
||||
auto verifier = make_lw_shared<std::unordered_map<unsigned long, status>>();
|
||||
auto descriptor = make_lw_shared<sstable_descriptor>();
|
||||
|
||||
return lister::scan_dir(sstdir, { directory_entry_type::regular }, [this, sstdir, verifier, descriptor] (directory_entry de) {
|
||||
return do_with(std::vector<future<>>(), [this, sstdir, verifier, descriptor] (std::vector<future<>>& futures) {
|
||||
return lister::scan_dir(sstdir, { directory_entry_type::regular }, [this, sstdir, verifier, descriptor, &futures] (directory_entry de) {
|
||||
// FIXME: The secondary indexes are in this level, but with a directory type, (starting with ".")
|
||||
return probe_file(sstdir, de.name).then([verifier, descriptor] (auto entry) {
|
||||
auto f = probe_file(sstdir, de.name).then([verifier, descriptor] (auto entry) {
|
||||
if (verifier->count(entry.generation)) {
|
||||
if (verifier->at(entry.generation) == status::has_toc_file) {
|
||||
if (entry.component == sstables::sstable::component_type::TOC) {
|
||||
@@ -870,6 +888,23 @@ future<> column_family::populate(sstring sstdir) {
|
||||
descriptor->format = entry.format;
|
||||
}
|
||||
});
|
||||
|
||||
// push future returned by probe_file into an array of futures,
|
||||
// so that the supplied callback will not block scan_dir() from
|
||||
// reading the next entry in the directory.
|
||||
futures.push_back(std::move(f));
|
||||
|
||||
return make_ready_future<>();
|
||||
}).then([&futures] {
|
||||
return when_all(futures.begin(), futures.end()).then([] (std::vector<future<>> ret) {
|
||||
try {
|
||||
for (auto& f : ret) {
|
||||
f.get();
|
||||
}
|
||||
} catch(...) {
|
||||
throw;
|
||||
}
|
||||
});
|
||||
}).then([verifier, sstdir, descriptor, this] {
|
||||
return parallel_for_each(*verifier, [sstdir = std::move(sstdir), descriptor, this] (auto v) {
|
||||
if (v.second == status::has_temporary_toc_file) {
|
||||
@@ -891,6 +926,7 @@ future<> column_family::populate(sstring sstdir) {
|
||||
return make_ready_future<>();
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
utils::UUID database::empty_version = utils::UUID_gen::get_name_UUID(bytes{});
|
||||
@@ -996,7 +1032,7 @@ template <typename Func>
|
||||
static future<>
|
||||
do_parse_system_tables(distributed<service::storage_proxy>& proxy, const sstring& _cf_name, Func&& func) {
|
||||
using namespace db::schema_tables;
|
||||
static_assert(std::is_same<future<>, std::result_of_t<Func(schema_result::value_type&)>>::value,
|
||||
static_assert(std::is_same<future<>, std::result_of_t<Func(schema_result_value_type&)>>::value,
|
||||
"bad Func signature");
|
||||
|
||||
|
||||
@@ -1031,11 +1067,11 @@ do_parse_system_tables(distributed<service::storage_proxy>& proxy, const sstring
|
||||
|
||||
future<> database::parse_system_tables(distributed<service::storage_proxy>& proxy) {
|
||||
using namespace db::schema_tables;
|
||||
return do_parse_system_tables(proxy, db::schema_tables::KEYSPACES, [this] (schema_result::value_type &v) {
|
||||
return do_parse_system_tables(proxy, db::schema_tables::KEYSPACES, [this] (schema_result_value_type &v) {
|
||||
auto ksm = create_keyspace_from_schema_partition(v);
|
||||
return create_keyspace(ksm);
|
||||
}).then([&proxy, this] {
|
||||
return do_parse_system_tables(proxy, db::schema_tables::COLUMNFAMILIES, [this, &proxy] (schema_result::value_type &v) {
|
||||
return do_parse_system_tables(proxy, db::schema_tables::COLUMNFAMILIES, [this, &proxy] (schema_result_value_type &v) {
|
||||
return create_tables_from_tables_partition(proxy, v.second).then([this] (std::map<sstring, schema_ptr> tables) {
|
||||
for (auto& t: tables) {
|
||||
auto s = t.second;
|
||||
@@ -1462,7 +1498,7 @@ column_family::query(const query::read_command& cmd, const std::vector<query::pa
|
||||
}).finally([lc, this]() mutable {
|
||||
_stats.reads.mark(lc);
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_read.add(lc.latency_in_nano(), _stats.reads.count);
|
||||
_stats.estimated_read.add(lc.latency(), _stats.reads.count);
|
||||
}
|
||||
});
|
||||
}
|
||||
@@ -1476,28 +1512,14 @@ column_family::as_mutation_source() const {
|
||||
|
||||
future<lw_shared_ptr<query::result>>
|
||||
database::query(const query::read_command& cmd, const std::vector<query::partition_range>& ranges) {
|
||||
static auto make_empty = [] {
|
||||
return make_ready_future<lw_shared_ptr<query::result>>(make_lw_shared(query::result()));
|
||||
};
|
||||
|
||||
try {
|
||||
column_family& cf = find_column_family(cmd.cf_id);
|
||||
return cf.query(cmd, ranges);
|
||||
} catch (const no_such_column_family&) {
|
||||
// FIXME: load from sstables
|
||||
return make_empty();
|
||||
}
|
||||
column_family& cf = find_column_family(cmd.cf_id);
|
||||
return cf.query(cmd, ranges);
|
||||
}
|
||||
|
||||
future<reconcilable_result>
|
||||
database::query_mutations(const query::read_command& cmd, const query::partition_range& range) {
|
||||
try {
|
||||
column_family& cf = find_column_family(cmd.cf_id);
|
||||
return mutation_query(cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.timestamp);
|
||||
} catch (const no_such_column_family&) {
|
||||
// FIXME: load from sstables
|
||||
return make_ready_future<reconcilable_result>(reconcilable_result());
|
||||
}
|
||||
column_family& cf = find_column_family(cmd.cf_id);
|
||||
return mutation_query(cf.as_mutation_source(), range, cmd.slice, cmd.row_limit, cmd.timestamp);
|
||||
}
|
||||
|
||||
std::unordered_set<sstring> database::get_initial_tokens() {
|
||||
@@ -1512,6 +1534,31 @@ std::unordered_set<sstring> database::get_initial_tokens() {
|
||||
return tokens;
|
||||
}
|
||||
|
||||
std::experimental::optional<gms::inet_address> database::get_replace_address() {
|
||||
auto& cfg = get_config();
|
||||
sstring replace_address = cfg.replace_address();
|
||||
sstring replace_address_first_boot = cfg.replace_address_first_boot();
|
||||
try {
|
||||
if (!replace_address.empty()) {
|
||||
return gms::inet_address(replace_address);
|
||||
} else if (!replace_address_first_boot.empty()) {
|
||||
return gms::inet_address(replace_address_first_boot);
|
||||
}
|
||||
return std::experimental::nullopt;
|
||||
} catch (...) {
|
||||
return std::experimental::nullopt;
|
||||
}
|
||||
}
|
||||
|
||||
bool database::is_replacing() {
|
||||
sstring replace_address_first_boot = get_config().replace_address_first_boot();
|
||||
if (!replace_address_first_boot.empty() && db::system_keyspace::bootstrap_complete()) {
|
||||
dblog.info("Replace address on first boot requested; this node is already bootstrapped");
|
||||
return false;
|
||||
}
|
||||
return bool(get_replace_address());
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const atomic_cell_or_collection& c) {
|
||||
return out << to_hex(c._data);
|
||||
}
|
||||
@@ -1541,8 +1588,7 @@ future<> database::apply_in_memory(const frozen_mutation& m, const db::replay_po
|
||||
auto& cf = find_column_family(m.column_family_id());
|
||||
cf.apply(m, rp);
|
||||
} catch (no_such_column_family&) {
|
||||
// TODO: log a warning
|
||||
// FIXME: load keyspace meta-data from storage
|
||||
dblog.error("Attempting to mutate non-existent table {}", m.column_family_id());
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}
|
||||
@@ -1975,7 +2021,11 @@ future<> column_family::clear_snapshot(sstring tag) {
|
||||
future<std::unordered_map<sstring, column_family::snapshot_details>> column_family::get_snapshot_details() {
|
||||
std::unordered_map<sstring, snapshot_details> all_snapshots;
|
||||
return do_with(std::move(all_snapshots), [this] (auto& all_snapshots) {
|
||||
return lister::scan_dir(_config.datadir + "/snapshots", { directory_entry_type::directory }, [this, &all_snapshots] (directory_entry de) {
|
||||
return engine().file_exists(_config.datadir + "/snapshots").then([this, &all_snapshots](bool file_exists) {
|
||||
if (!file_exists) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return lister::scan_dir(_config.datadir + "/snapshots", { directory_entry_type::directory }, [this, &all_snapshots] (directory_entry de) {
|
||||
auto snapshot_name = de.name;
|
||||
auto snapshot = _config.datadir + "/snapshots/" + snapshot_name;
|
||||
all_snapshots.emplace(snapshot_name, snapshot_details());
|
||||
@@ -2010,6 +2060,7 @@ future<std::unordered_map<sstring, column_family::snapshot_details>> column_fami
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}).then([&all_snapshots] {
|
||||
return std::move(all_snapshots);
|
||||
});
|
||||
|
||||
@@ -648,6 +648,8 @@ public:
|
||||
}
|
||||
|
||||
std::unordered_set<sstring> get_initial_tokens();
|
||||
std::experimental::optional<gms::inet_address> get_replace_address();
|
||||
bool is_replacing();
|
||||
};
|
||||
|
||||
// FIXME: stub
|
||||
@@ -662,7 +664,7 @@ column_family::apply(const mutation& m, const db::replay_position& rp) {
|
||||
seal_on_overflow();
|
||||
_stats.writes.mark(lc);
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_write.add(lc.latency_in_nano(), _stats.writes.count);
|
||||
_stats.estimated_write.add(lc.latency(), _stats.writes.count);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -696,7 +698,7 @@ column_family::apply(const frozen_mutation& m, const db::replay_position& rp) {
|
||||
seal_on_overflow();
|
||||
_stats.writes.mark(lc);
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_write.add(lc.latency_in_nano(), _stats.writes.count);
|
||||
_stats.estimated_write.add(lc.latency(), _stats.writes.count);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -90,7 +90,7 @@ public:
|
||||
|
||||
db::commitlog::config::config(const db::config& cfg)
|
||||
: commit_log_location(cfg.commitlog_directory())
|
||||
, commitlog_total_space_in_mb(cfg.commitlog_total_space_in_mb() >= 0 ? cfg.commitlog_total_space_in_mb() : memory::stats().total_memory())
|
||||
, commitlog_total_space_in_mb(cfg.commitlog_total_space_in_mb() >= 0 ? cfg.commitlog_total_space_in_mb() : memory::stats().total_memory() >> 20)
|
||||
, commitlog_segment_size_in_mb(cfg.commitlog_segment_size_in_mb())
|
||||
, commitlog_sync_period_in_ms(cfg.commitlog_sync_batch_window_in_ms())
|
||||
, mode(cfg.commitlog_sync() == "batch" ? sync_mode::BATCH : sync_mode::PERIODIC)
|
||||
@@ -1097,7 +1097,7 @@ db::commitlog::commitlog(config cfg)
|
||||
: _segment_manager(new segment_manager(std::move(cfg))) {
|
||||
}
|
||||
|
||||
db::commitlog::commitlog(commitlog&& v)
|
||||
db::commitlog::commitlog(commitlog&& v) noexcept
|
||||
: _segment_manager(std::move(v._segment_manager)) {
|
||||
}
|
||||
|
||||
@@ -1173,10 +1173,11 @@ const db::commitlog::config& db::commitlog::active_config() const {
|
||||
return _segment_manager->cfg;
|
||||
}
|
||||
|
||||
future<subscription<temporary_buffer<char>, db::replay_position>>
|
||||
future<std::unique_ptr<subscription<temporary_buffer<char>, db::replay_position>>>
|
||||
db::commitlog::read_log_file(const sstring& filename, commit_load_reader_func next, position_type off) {
|
||||
return engine().open_file_dma(filename, open_flags::ro).then([next = std::move(next), off](file f) {
|
||||
return read_log_file(std::move(f), std::move(next), off);
|
||||
return std::make_unique<subscription<temporary_buffer<char>, replay_position>>(
|
||||
read_log_file(std::move(f), std::move(next), off));
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1192,6 +1193,8 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
|
||||
size_t next = 0;
|
||||
size_t start_off = 0;
|
||||
size_t skip_to = 0;
|
||||
size_t file_size = 0;
|
||||
size_t corrupt_size = 0;
|
||||
bool eof = false;
|
||||
bool header = true;
|
||||
|
||||
@@ -1289,7 +1292,11 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
|
||||
|
||||
auto cs = crc.checksum();
|
||||
if (cs != checksum) {
|
||||
throw std::runtime_error("Checksum error in chunk header");
|
||||
// if a chunk header checksum is broken, we shall just assume that all
|
||||
// remaining is as well. We cannot trust the "next" pointer, so...
|
||||
logger.debug("Checksum error in segment chunk at {}.", pos);
|
||||
corrupt_size += (file_size - pos);
|
||||
return stop();
|
||||
}
|
||||
|
||||
this->next = next;
|
||||
@@ -1315,21 +1322,24 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
|
||||
auto size = in.read<uint32_t>();
|
||||
auto checksum = in.read<uint32_t>();
|
||||
|
||||
if (size == 0) {
|
||||
// special scylla case: zero padding due to dma blocks
|
||||
auto slack = next - pos;
|
||||
return skip(slack);
|
||||
}
|
||||
crc32_nbo crc;
|
||||
crc.process(size);
|
||||
|
||||
if (size < 3 * sizeof(uint32_t)) {
|
||||
throw std::runtime_error("Invalid entry size");
|
||||
if (size < 3 * sizeof(uint32_t) || checksum != crc.checksum()) {
|
||||
auto slack = next - pos;
|
||||
if (size != 0) {
|
||||
logger.debug("Segment entry at {} has broken header. Skipping to next chunk ({} bytes)", rp, slack);
|
||||
corrupt_size += slack;
|
||||
}
|
||||
// size == 0 -> special scylla case: zero padding due to dma blocks
|
||||
return skip(slack);
|
||||
}
|
||||
|
||||
if (start_off > pos) {
|
||||
return skip(size - entry_header_size);
|
||||
}
|
||||
|
||||
return fin.read_exactly(size - entry_header_size).then([this, size, checksum, rp](temporary_buffer<char> buf) {
|
||||
return fin.read_exactly(size - entry_header_size).then([this, size, crc = std::move(crc), rp](temporary_buffer<char> buf) mutable {
|
||||
advance(buf);
|
||||
|
||||
data_input in(buf);
|
||||
@@ -1338,12 +1348,15 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
|
||||
in.skip(data_size);
|
||||
auto checksum = in.read<uint32_t>();
|
||||
|
||||
crc32_nbo crc;
|
||||
crc.process(size);
|
||||
crc.process_bytes(buf.get(), data_size);
|
||||
|
||||
if (crc.checksum() != checksum) {
|
||||
throw std::runtime_error("Checksum error in data entry");
|
||||
// If we're getting a checksum error here, most likely the rest of
|
||||
// the file will be corrupt as well. But it does not hurt to retry.
|
||||
// Just go to the next entry (since "size" in header seemed ok).
|
||||
logger.debug("Segment entry at {} checksum error. Skipping {} bytes", rp, size);
|
||||
corrupt_size += size;
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
return s.produce(buf.share(0, data_size), rp);
|
||||
@@ -1351,10 +1364,18 @@ db::commitlog::read_log_file(file f, commit_load_reader_func next, position_type
|
||||
});
|
||||
}
|
||||
future<> read_file() {
|
||||
return read_header().then(
|
||||
[this] {
|
||||
return do_until(std::bind(&work::end_of_file, this), std::bind(&work::read_chunk, this));
|
||||
});
|
||||
return f.size().then([this](uint64_t size) {
|
||||
file_size = size;
|
||||
}).then([this] {
|
||||
return read_header().then(
|
||||
[this] {
|
||||
return do_until(std::bind(&work::end_of_file, this), std::bind(&work::read_chunk, this));
|
||||
}).then([this] {
|
||||
if (corrupt_size > 0) {
|
||||
throw segment_data_corruption_error("Data corruption", corrupt_size);
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
@@ -1382,6 +1403,10 @@ uint64_t db::commitlog::get_completed_tasks() const {
|
||||
return _segment_manager->totals.allocation_count;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_flush_count() const {
|
||||
return _segment_manager->totals.flush_count;
|
||||
}
|
||||
|
||||
uint64_t db::commitlog::get_pending_tasks() const {
|
||||
return _segment_manager->totals.pending_operations;
|
||||
}
|
||||
|
||||
@@ -139,7 +139,7 @@ public:
|
||||
const uint32_t ver;
|
||||
};
|
||||
|
||||
commitlog(commitlog&&);
|
||||
commitlog(commitlog&&) noexcept;
|
||||
~commitlog();
|
||||
|
||||
/**
|
||||
@@ -231,6 +231,7 @@ public:
|
||||
|
||||
uint64_t get_total_size() const;
|
||||
uint64_t get_completed_tasks() const;
|
||||
uint64_t get_flush_count() const;
|
||||
uint64_t get_pending_tasks() const;
|
||||
uint64_t get_num_segments_created() const;
|
||||
uint64_t get_num_segments_destroyed() const;
|
||||
@@ -265,8 +266,21 @@ public:
|
||||
|
||||
typedef std::function<future<>(temporary_buffer<char>, replay_position)> commit_load_reader_func;
|
||||
|
||||
class segment_data_corruption_error: public std::runtime_error {
|
||||
public:
|
||||
segment_data_corruption_error(std::string msg, uint64_t s)
|
||||
: std::runtime_error(msg), _bytes(s) {
|
||||
}
|
||||
uint64_t bytes() const {
|
||||
return _bytes;
|
||||
}
|
||||
private:
|
||||
uint64_t _bytes;
|
||||
};
|
||||
|
||||
static subscription<temporary_buffer<char>, replay_position> read_log_file(file, commit_load_reader_func, position_type = 0);
|
||||
static future<subscription<temporary_buffer<char>, replay_position>> read_log_file(const sstring&, commit_load_reader_func, position_type = 0);
|
||||
static future<std::unique_ptr<subscription<temporary_buffer<char>, replay_position>>> read_log_file(
|
||||
const sstring&, commit_load_reader_func, position_type = 0);
|
||||
private:
|
||||
commitlog(config);
|
||||
};
|
||||
|
||||
@@ -69,6 +69,7 @@ public:
|
||||
uint64_t invalid_mutations = 0;
|
||||
uint64_t skipped_mutations = 0;
|
||||
uint64_t applied_mutations = 0;
|
||||
uint64_t corrupt_bytes = 0;
|
||||
};
|
||||
|
||||
future<> process(stats*, temporary_buffer<char> buf, replay_position rp);
|
||||
@@ -166,9 +167,16 @@ db::commitlog_replayer::impl::recover(sstring file) {
|
||||
return db::commitlog::read_log_file(file,
|
||||
std::bind(&impl::process, this, s.get(), std::placeholders::_1,
|
||||
std::placeholders::_2), p).then([](auto s) {
|
||||
auto f = s.done();
|
||||
auto f = s->done();
|
||||
return f.finally([s = std::move(s)] {});
|
||||
}).then([s] {
|
||||
}).then_wrapped([s](future<> f) {
|
||||
try {
|
||||
f.get();
|
||||
} catch (commitlog::segment_data_corruption_error& e) {
|
||||
s->corrupt_bytes += e.bytes();
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
return make_ready_future<stats>(*s);
|
||||
});
|
||||
}
|
||||
@@ -233,7 +241,7 @@ db::commitlog_replayer::commitlog_replayer(seastar::sharded<cql3::query_processo
|
||||
: _impl(std::make_unique<impl>(qp))
|
||||
{}
|
||||
|
||||
db::commitlog_replayer::commitlog_replayer(commitlog_replayer&& r)
|
||||
db::commitlog_replayer::commitlog_replayer(commitlog_replayer&& r) noexcept
|
||||
: _impl(std::move(r._impl))
|
||||
{}
|
||||
|
||||
@@ -250,31 +258,32 @@ future<db::commitlog_replayer> db::commitlog_replayer::create_replayer(seastar::
|
||||
}
|
||||
|
||||
future<> db::commitlog_replayer::recover(std::vector<sstring> files) {
|
||||
logger.info("Replaying {}", files);
|
||||
|
||||
return parallel_for_each(files, [this](auto f) {
|
||||
return this->recover(f).handle_exception([f](auto ep) {
|
||||
logger.error("Error recovering {}: {}", f, ep);
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (std::invalid_argument&) {
|
||||
logger.error("Scylla cannot process {}. Make sure to fully flush all Cassandra commit log files to sstable before migrating.");
|
||||
throw;
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
});
|
||||
return this->recover(f);
|
||||
});
|
||||
}
|
||||
|
||||
future<> db::commitlog_replayer::recover(sstring file) {
|
||||
return _impl->recover(file).then([file](impl::stats stats) {
|
||||
future<> db::commitlog_replayer::recover(sstring f) {
|
||||
return _impl->recover(f).then([f](impl::stats stats) {
|
||||
if (stats.corrupt_bytes != 0) {
|
||||
logger.warn("Corrupted file: {}. {} bytes skipped.", f, stats.corrupt_bytes);
|
||||
}
|
||||
logger.info("Log replay of {} complete, {} replayed mutations ({} invalid, {} skipped)"
|
||||
, file
|
||||
, f
|
||||
, stats.applied_mutations
|
||||
, stats.invalid_mutations
|
||||
, stats.skipped_mutations
|
||||
);
|
||||
});
|
||||
}).handle_exception([f](auto ep) {
|
||||
logger.error("Error recovering {}: {}", f, ep);
|
||||
try {
|
||||
std::rethrow_exception(ep);
|
||||
} catch (std::invalid_argument&) {
|
||||
logger.error("Scylla cannot process {}. Make sure to fully flush all Cassandra commit log files to sstable before migrating.");
|
||||
throw;
|
||||
} catch (...) {
|
||||
throw;
|
||||
}
|
||||
});;
|
||||
}
|
||||
|
||||
|
||||
@@ -57,7 +57,7 @@ class commitlog;
|
||||
|
||||
class commitlog_replayer {
|
||||
public:
|
||||
commitlog_replayer(commitlog_replayer&&);
|
||||
commitlog_replayer(commitlog_replayer&&) noexcept;
|
||||
~commitlog_replayer();
|
||||
|
||||
static future<commitlog_replayer> create_replayer(seastar::sharded<cql3::query_processor>&);
|
||||
|
||||
11
db/config.hh
11
db/config.hh
@@ -290,7 +290,7 @@ public:
|
||||
"Related information: Configuring compaction" \
|
||||
) \
|
||||
/* Common fault detection setting */ \
|
||||
val(phi_convict_threshold, uint32_t, 8, Unused, \
|
||||
val(phi_convict_threshold, uint32_t, 8, Used, \
|
||||
"Adjusts the sensitivity of the failure detector on an exponential scale. Generally this setting never needs adjusting.\n" \
|
||||
"Related information: Failure detection and recovery" \
|
||||
) \
|
||||
@@ -560,7 +560,7 @@ public:
|
||||
) \
|
||||
/* RPC (remote procedure call) settings */ \
|
||||
/* Settings for configuring and tuning client connections. */ \
|
||||
val(broadcast_rpc_address, sstring, /* unset */, Unused, \
|
||||
val(broadcast_rpc_address, sstring, /* unset */, Used, \
|
||||
"RPC address to broadcast to drivers and other Cassandra nodes. This cannot be set to 0.0.0.0. If blank, it is set to the value of the rpc_address or rpc_interface. If rpc_address or rpc_interfaceis set to 0.0.0.0, this property must be set.\n" \
|
||||
) \
|
||||
val(rpc_port, uint16_t, 9160, Used, \
|
||||
@@ -743,6 +743,13 @@ public:
|
||||
val(api_ui_dir, sstring, "swagger-ui/dist/", Used, "The directory location of the API GUI") \
|
||||
val(api_doc_dir, sstring, "api/api-doc/", Used, "The API definition file directory") \
|
||||
val(load_balance, sstring, "none", Used, "CQL request load balancing: 'none' or round-robin'") \
|
||||
val(consistent_rangemovement, bool, true, Used, "When set to true, range movements will be consistent. It means: 1) it will refuse to bootstrapp a new node if other bootstrapping/leaving/moving nodes detected. 2) data will be streamed to a new node only from the node which is no longer responsible for the token range. Same as -Dcassandra.consistent.rangemovement in cassandra") \
|
||||
val(join_ring, bool, true, Used, "When set to true, a node will join the token ring. When set to false, a node will not join the token ring. User can use nodetool join to initiate ring joinging later. Same as -Dcassandra.join_ring in cassandra.") \
|
||||
val(load_ring_state, bool, true, Used, "When set to true, load tokens and host_ids previously saved. Same as -Dcassandra.load_ring_state in cassandra.") \
|
||||
val(replace_node, sstring, "", Used, "The UUID of the node to replace. Same as -Dcassandra.replace_node in cssandra.") \
|
||||
val(replace_token, sstring, "", Used, "The tokens of the node to replace. Same as -Dcassandra.replace_token in cassandra.") \
|
||||
val(replace_address, sstring, "", Used, "The listen_address or broadcast_address of the dead node to replace. Same as -Dcassandra.replace_address.") \
|
||||
val(replace_address_first_boot, sstring, "", Used, "Like replace_address option, but if the node has been bootstrapped sucessfully it will be ignored. Same as -Dcassandra.replace_address_first_boot.") \
|
||||
/* done! */
|
||||
|
||||
#define _make_value_member(name, type, deflt, status, desc, ...) \
|
||||
|
||||
@@ -398,18 +398,18 @@ read_schema_for_keyspaces(distributed<service::storage_proxy>& proxy, const sstr
|
||||
return map_reduce(keyspace_names.begin(), keyspace_names.end(), map, schema_result{}, insert);
|
||||
}
|
||||
|
||||
future<schema_result::value_type>
|
||||
future<schema_result_value_type>
|
||||
read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name)
|
||||
{
|
||||
auto schema = proxy.local().get_db().local().find_schema(system_keyspace::NAME, schema_table_name);
|
||||
auto keyspace_key = dht::global_partitioner().decorate_key(*schema,
|
||||
partition_key::from_singular(*schema, keyspace_name));
|
||||
return db::system_keyspace::query(proxy, schema_table_name, keyspace_key).then([keyspace_name] (auto&& rs) {
|
||||
return schema_result::value_type{keyspace_name, std::move(rs)};
|
||||
return schema_result_value_type{keyspace_name, std::move(rs)};
|
||||
});
|
||||
}
|
||||
|
||||
future<schema_result::value_type>
|
||||
future<schema_result_value_type>
|
||||
read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name, const sstring& table_name)
|
||||
{
|
||||
auto schema = proxy.local().get_db().local().find_schema(system_keyspace::NAME, schema_table_name);
|
||||
@@ -417,7 +417,7 @@ read_schema_partition_for_table(distributed<service::storage_proxy>& proxy, cons
|
||||
partition_key::from_singular(*schema, keyspace_name));
|
||||
auto clustering_range = query::clustering_range(clustering_key_prefix::from_clustering_prefix(*schema, exploded_clustering_prefix({utf8_type->decompose(table_name)})));
|
||||
return db::system_keyspace::query(proxy, schema_table_name, keyspace_key, clustering_range).then([keyspace_name] (auto&& rs) {
|
||||
return schema_result::value_type{keyspace_name, std::move(rs)};
|
||||
return schema_result_value_type{keyspace_name, std::move(rs)};
|
||||
});
|
||||
}
|
||||
|
||||
@@ -528,7 +528,7 @@ future<> do_merge_schema(distributed<service::storage_proxy>& proxy, std::vector
|
||||
|
||||
future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after)
|
||||
{
|
||||
std::vector<schema_result::value_type> created;
|
||||
std::vector<schema_result_value_type> created;
|
||||
std::vector<sstring> altered;
|
||||
std::set<sstring> dropped;
|
||||
|
||||
@@ -552,7 +552,7 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
|
||||
for (auto&& key : diff.entries_only_on_right) {
|
||||
auto&& value = after[key];
|
||||
if (!value->empty()) {
|
||||
created.emplace_back(schema_result::value_type{key, std::move(value)});
|
||||
created.emplace_back(schema_result_value_type{key, std::move(value)});
|
||||
}
|
||||
}
|
||||
for (auto&& key : diff.entries_differing) {
|
||||
@@ -566,7 +566,7 @@ future<std::set<sstring>> merge_keyspaces(distributed<service::storage_proxy>& p
|
||||
} else if (!pre->empty()) {
|
||||
dropped.emplace(keyspace_name);
|
||||
} else if (!post->empty()) { // a (re)created keyspace
|
||||
created.emplace_back(schema_result::value_type{key, std::move(post)});
|
||||
created.emplace_back(schema_result_value_type{key, std::move(post)});
|
||||
}
|
||||
}
|
||||
return do_with(std::move(created), [&proxy, altered = std::move(altered)] (auto& created) {
|
||||
@@ -899,7 +899,7 @@ std::vector<mutation> make_drop_keyspace_mutations(lw_shared_ptr<keyspace_metada
|
||||
*
|
||||
* @param partition Keyspace attributes in serialized form
|
||||
*/
|
||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result::value_type& result)
|
||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& result)
|
||||
{
|
||||
auto&& rs = result.second;
|
||||
if (rs->empty()) {
|
||||
@@ -1310,10 +1310,10 @@ void create_table_from_table_row_and_column_rows(schema_builder& builder, const
|
||||
builder.set_max_compaction_threshold(table_row.get_nonnull<int>("max_compaction_threshold"));
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (result.has("comment"))
|
||||
cfm.comment(result.getString("comment"));
|
||||
#endif
|
||||
if (table_row.has("comment")) {
|
||||
builder.set_comment(table_row.get_nonnull<sstring>("comment"));
|
||||
}
|
||||
|
||||
if (table_row.has("memtable_flush_period_in_ms")) {
|
||||
builder.set_memtable_flush_period(table_row.get_nonnull<int32_t>("memtable_flush_period_in_ms"));
|
||||
}
|
||||
|
||||
@@ -55,6 +55,7 @@ namespace db {
|
||||
namespace schema_tables {
|
||||
|
||||
using schema_result = std::map<sstring, lw_shared_ptr<query::result_set>>;
|
||||
using schema_result_value_type = std::pair<sstring, lw_shared_ptr<query::result_set>>;
|
||||
|
||||
static constexpr auto KEYSPACES = "schema_keyspaces";
|
||||
static constexpr auto COLUMNFAMILIES = "schema_columnfamilies";
|
||||
@@ -74,7 +75,7 @@ future<utils::UUID> calculate_schema_digest(distributed<service::storage_proxy>&
|
||||
|
||||
future<std::vector<frozen_mutation>> convert_schema_to_mutations(distributed<service::storage_proxy>& proxy);
|
||||
|
||||
future<schema_result::value_type>
|
||||
future<schema_result_value_type>
|
||||
read_schema_partition_for_keyspace(distributed<service::storage_proxy>& proxy, const sstring& schema_table_name, const sstring& keyspace_name);
|
||||
|
||||
future<> merge_schema(distributed<service::storage_proxy>& proxy, std::vector<mutation> mutations);
|
||||
@@ -89,11 +90,11 @@ std::vector<mutation> make_create_keyspace_mutations(lw_shared_ptr<keyspace_meta
|
||||
|
||||
std::vector<mutation> make_drop_keyspace_mutations(lw_shared_ptr<keyspace_metadata> keyspace, api::timestamp_type timestamp);
|
||||
|
||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result::value_type& partition);
|
||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);
|
||||
|
||||
future<> merge_tables(distributed<service::storage_proxy>& proxy, schema_result&& before, schema_result&& after);
|
||||
|
||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result::value_type& partition);
|
||||
lw_shared_ptr<keyspace_metadata> create_keyspace_from_schema_partition(const schema_result_value_type& partition);
|
||||
|
||||
mutation make_create_keyspace_mutation(lw_shared_ptr<keyspace_metadata> keyspace, api::timestamp_type timestamp, bool with_tables_and_types_and_functions = true);
|
||||
|
||||
|
||||
@@ -26,7 +26,6 @@
|
||||
#include "utils/data_output.hh"
|
||||
#include "bytes_ostream.hh"
|
||||
#include "bytes.hh"
|
||||
#include "mutation.hh"
|
||||
#include "keys.hh"
|
||||
#include "database_fwd.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
|
||||
@@ -45,6 +45,7 @@
|
||||
#include "log.hh"
|
||||
#include "streaming/stream_plan.hh"
|
||||
#include "streaming/stream_state.hh"
|
||||
#include "service/storage_service.hh"
|
||||
|
||||
namespace dht {
|
||||
|
||||
@@ -109,7 +110,6 @@ range_streamer::get_all_ranges_with_sources_for(const sstring& keyspace_name, st
|
||||
auto& ks = _db.local().find_keyspace(keyspace_name);
|
||||
auto& strat = ks.get_replication_strategy();
|
||||
|
||||
// std::unordered_multimap<range<token>, inet_address>
|
||||
auto tm = _metadata.clone_only_token_map();
|
||||
auto range_addresses = unordered_multimap_to_unordered_map(strat.get_range_addresses(tm));
|
||||
|
||||
@@ -205,9 +205,7 @@ range_streamer::get_all_ranges_with_strict_sources_for(const sstring& keyspace_n
|
||||
bool range_streamer::use_strict_sources_for_ranges(const sstring& keyspace_name) {
|
||||
auto& ks = _db.local().find_keyspace(keyspace_name);
|
||||
auto& strat = ks.get_replication_strategy();
|
||||
// FIXME: DatabaseDescriptor.isReplacing()
|
||||
auto is_replacing = false;
|
||||
return !is_replacing
|
||||
return !_db.local().is_replacing()
|
||||
&& use_strict_consistency()
|
||||
&& !_tokens.empty()
|
||||
&& _metadata.get_all_endpoints().size() != strat.get_replication_factor();
|
||||
@@ -224,25 +222,17 @@ void range_streamer::add_ranges(const sstring& keyspace_name, std::vector<range<
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: share code with unordered_multimap_to_unordered_map
|
||||
std::unordered_map<inet_address, std::vector<range<token>>> tmp;
|
||||
std::unordered_map<inet_address, std::vector<range<token>>> range_fetch_map;
|
||||
for (auto& x : get_range_fetch_map(ranges_for_keyspace, _source_filters, keyspace_name)) {
|
||||
auto& addr = x.first;
|
||||
auto& range_ = x.second;
|
||||
auto it = tmp.find(addr);
|
||||
if (it != tmp.end()) {
|
||||
it->second.push_back(range_);
|
||||
} else {
|
||||
tmp.emplace(addr, std::vector<range<token>>{range_});
|
||||
}
|
||||
range_fetch_map[x.first].emplace_back(x.second);
|
||||
}
|
||||
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
for (auto& x : tmp) {
|
||||
for (auto& x : range_fetch_map) {
|
||||
logger.debug("{} : range {} from source {} for keyspace {}", _description, x.second, x.first, keyspace_name);
|
||||
}
|
||||
}
|
||||
_to_fetch.emplace(keyspace_name, std::move(tmp));
|
||||
_to_fetch.emplace(keyspace_name, std::move(range_fetch_map));
|
||||
}
|
||||
|
||||
future<streaming::stream_state> range_streamer::fetch_async() {
|
||||
@@ -272,4 +262,8 @@ range_streamer::get_work_map(const std::unordered_multimap<range<token>, inet_ad
|
||||
return get_range_fetch_map(ranges_with_source_target, source_filters, keyspace);
|
||||
}
|
||||
|
||||
bool range_streamer::use_strict_consistency() {
|
||||
return service::get_local_storage_service().db().local().get_config().consistent_rangemovement();
|
||||
}
|
||||
|
||||
} // dht
|
||||
|
||||
@@ -62,10 +62,7 @@ public:
|
||||
using stream_plan = streaming::stream_plan;
|
||||
using stream_state = streaming::stream_state;
|
||||
using i_failure_detector = gms::i_failure_detector;
|
||||
static bool use_strict_consistency() {
|
||||
//FIXME: Boolean.parseBoolean(System.getProperty("cassandra.consistent.rangemovement","true"));
|
||||
return true;
|
||||
}
|
||||
static bool use_strict_consistency();
|
||||
public:
|
||||
/**
|
||||
* A filter applied to sources to stream from when constructing a fetch map.
|
||||
|
||||
2
dist/ami/files/scylla-ami
vendored
2
dist/ami/files/scylla-ami
vendored
Submodule dist/ami/files/scylla-ami updated: 3f371840c9...07b71180d3
92
dist/ami/files/scylla-setup.sh
vendored
92
dist/ami/files/scylla-setup.sh
vendored
@@ -1,52 +1,56 @@
|
||||
#!/bin/sh -e
|
||||
|
||||
if [ -b /dev/md0 ]; then
|
||||
RAIDCNT=`grep xvdb /proc/mdstat | wc -l`
|
||||
RAIDDEV=`grep xvdb /proc/mdstat | awk '{print $1}'`
|
||||
|
||||
if [ $RAIDCNT -ge 1 ]; then
|
||||
echo "RAID already constructed."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
dnf update -y
|
||||
|
||||
DISKS=""
|
||||
NR=0
|
||||
for i in xvd{b..z}; do
|
||||
if [ -b /dev/$i ];then
|
||||
echo Found disk /dev/$i
|
||||
DISKS="$DISKS /dev/$i"
|
||||
NR=$((NR+1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo Creating RAID0 for scylla using $NR disk\(s\): $DISKS
|
||||
|
||||
if [ $NR -ge 1 ]; then
|
||||
mdadm --create --verbose --force --run /dev/md0 --level=0 -c256 --raid-devices=$NR $DISKS
|
||||
blockdev --setra 65536 /dev/md0
|
||||
mkfs.xfs /dev/md0 -f
|
||||
echo "DEVICE $DISKS" > /etc/mdadm.conf
|
||||
mdadm --detail --scan >> /etc/mdadm.conf
|
||||
UUID=`blkid /dev/md0 | awk '{print $2}'`
|
||||
mkdir /data
|
||||
echo "$UUID /data xfs noatime,discard 0 0" >> /etc/fstab
|
||||
mount /data
|
||||
mount -o noatime /dev/$RAIDDEV /var/lib/scylla
|
||||
else
|
||||
echo "WARN: Scylla is not using XFS to store data. Perforamnce will suffer." > /home/fedora/WARN_PLEASE_READ.TXT
|
||||
echo "RAID does not constructed, going to initialize..."
|
||||
|
||||
dnf update -y
|
||||
|
||||
DISKS=""
|
||||
NR=0
|
||||
for i in xvd{b..z}; do
|
||||
if [ -b /dev/$i ];then
|
||||
echo Found disk /dev/$i
|
||||
DISKS="$DISKS /dev/$i"
|
||||
NR=$((NR+1))
|
||||
fi
|
||||
done
|
||||
|
||||
echo Creating RAID0 for scylla using $NR disk\(s\): $DISKS
|
||||
|
||||
if [ $NR -ge 1 ]; then
|
||||
mdadm --create --verbose --force --run /dev/md0 --level=0 -c256 --raid-devices=$NR $DISKS
|
||||
blockdev --setra 65536 /dev/md0
|
||||
mkfs.xfs /dev/md0 -f
|
||||
echo "DEVICE $DISKS" > /etc/mdadm.conf
|
||||
mdadm --detail --scan >> /etc/mdadm.conf
|
||||
UUID=`blkid /dev/md0 | awk '{print $2}'`
|
||||
mount -o noatime /dev/md0 /var/lib/scylla
|
||||
else
|
||||
echo "WARN: Scylla is not using XFS to store data. Perforamnce will suffer." > /home/fedora/WARN_PLEASE_READ.TXT
|
||||
fi
|
||||
|
||||
mkdir -p /var/lib/scylla/data
|
||||
mkdir -p /var/lib/scylla/commitlog
|
||||
chown scylla:scylla /var/lib/scylla/*
|
||||
chown scylla:scylla /var/lib/scylla/
|
||||
|
||||
CPU_NR=`cat /proc/cpuinfo |grep processor|wc -l`
|
||||
if [ $CPU_NR -ge 8 ]; then
|
||||
NR=$((CPU_NR - 1))
|
||||
grep -v SCYLLA_ARGS /etc/sysconfig/scylla-server | grep -v SET_NIC > /tmp/scylla-server
|
||||
echo SCYLLA_ARGS=\"--cpuset 1-$NR --smp $NR\" >> /tmp/scylla-server
|
||||
echo SET_NIC=\"yes\" >> /tmp/scylla-server
|
||||
mv /tmp/scylla-server /etc/sysconfig/scylla-server
|
||||
fi
|
||||
|
||||
/usr/lib/scylla/scylla-ami/ds2_configure.py
|
||||
fi
|
||||
|
||||
mkdir -p /data/data
|
||||
mkdir -p /data/commitlog
|
||||
chown scylla:scylla /data/*
|
||||
|
||||
CPU_NR=`cat /proc/cpuinfo |grep processor|wc -l`
|
||||
if [ $CPU_NR -ge 8 ]; then
|
||||
NR=$((CPU_NR - 1))
|
||||
echo SCYLLA_ARGS=\"--cpuset 1-$NR --smp $NR\" >> /etc/sysconfig/scylla-server
|
||||
echo SET_NIC=\"yes\" >> /etc/sysconfig/scylla-server
|
||||
fi
|
||||
|
||||
/usr/lib/scylla/scylla-ami/ds2_configure.py
|
||||
systemctl disable scylla-setup.service
|
||||
systemctl enable scylla-server.service
|
||||
systemctl start scylla-server.service
|
||||
systemctl enable scylla-jmx.service
|
||||
systemctl start scylla-jmx.service
|
||||
|
||||
2
dist/ami/files/setup-ami.sh
vendored
2
dist/ami/files/setup-ami.sh
vendored
@@ -14,7 +14,5 @@ chmod a+rx /usr/lib/scylla/scylla-setup.sh
|
||||
mv /home/fedora/scylla-ami /usr/lib/scylla/scylla-ami
|
||||
chmod a+rx /usr/lib/scylla/scylla-ami/ds2_configure.py
|
||||
systemctl enable scylla-setup.service
|
||||
sed -e 's!/var/lib/scylla/data!/data/data!' -e 's!commitlog_directory: /var/lib/scylla/commitlog!commitlog_directory: /data/commitlog!' /var/lib/scylla/conf/scylla.yaml > /tmp/scylla.yaml
|
||||
mv /tmp/scylla.yaml /var/lib/scylla/conf
|
||||
grep -v ' - mounts' /etc/cloud/cloud.cfg > /tmp/cloud.cfg
|
||||
mv /tmp/cloud.cfg /etc/cloud/cloud.cfg
|
||||
|
||||
4
dist/common/scripts/scylla_prepare
vendored
4
dist/common/scripts/scylla_prepare
vendored
@@ -13,6 +13,10 @@ elif [ "$NETWORK_MODE" = "dpdk" ]; then
|
||||
for n in /sys/devices/system/node/node?; do
|
||||
echo $NR_HUGEPAGES > $n/hugepages/hugepages-2048kB/nr_hugepages
|
||||
done
|
||||
else # NETWORK_MODE = posix
|
||||
if [ "$SET_NIC" = "yes" ]; then
|
||||
sudo sh /usr/lib/scylla/posix_net_conf.sh >/dev/null 2>&1 || true
|
||||
fi
|
||||
fi
|
||||
. /etc/os-release
|
||||
if [ "$NAME" = "Ubuntu" ]; then
|
||||
|
||||
4
dist/common/scripts/scylla_run
vendored
4
dist/common/scripts/scylla_run
vendored
@@ -10,9 +10,5 @@ elif [ "$NETWORK_MODE" = "dpdk" ]; then
|
||||
args="$args --network-stack native --dpdk-pmd"
|
||||
fi
|
||||
|
||||
if [ "$SET_NIC" == "yes" ]; then
|
||||
sudo sh /usr/lib/scylla/posix_net_conf.sh >/dev/null 2>&1 || true
|
||||
fi
|
||||
|
||||
export HOME=/var/lib/scylla
|
||||
exec sudo -E -u $USER /usr/bin/scylla $args
|
||||
|
||||
2
dist/redhat/centos_dep/build_dependency.sh
vendored
2
dist/redhat/centos_dep/build_dependency.sh
vendored
@@ -21,7 +21,7 @@ if [ ! -f isl-0.14-3.fc22.src.rpm ]; then
|
||||
fi
|
||||
|
||||
if [ ! -f gcc-5.1.1-4.fc22.src.rpm ]; then
|
||||
wget http://download.fedoraproject.org/pub/fedora/linux/updates/22/SRPMS/g/gcc-5.1.1-4.fc22.src.rpm
|
||||
wget https://s3.amazonaws.com/scylla-centos-dep/gcc-5.1.1-4.fc22.src.rpm
|
||||
fi
|
||||
|
||||
if [ ! -f boost-1.57.0-6.fc22.src.rpm ]; then
|
||||
|
||||
11
dist/redhat/scylla-server.spec.in
vendored
11
dist/redhat/scylla-server.spec.in
vendored
@@ -57,6 +57,10 @@ install -m644 licenses/* $RPM_BUILD_ROOT%{_docdir}/scylla/licenses/
|
||||
install -d -m755 $RPM_BUILD_ROOT%{_sharedstatedir}/scylla/
|
||||
install -d -m755 $RPM_BUILD_ROOT%{_sharedstatedir}/scylla/data
|
||||
install -d -m755 $RPM_BUILD_ROOT%{_sharedstatedir}/scylla/commitlog
|
||||
install -d -m755 $RPM_BUILD_ROOT%{_prefix}/lib/scylla/swagger-ui
|
||||
cp -r swagger-ui/dist $RPM_BUILD_ROOT%{_prefix}/lib/scylla/swagger-ui
|
||||
install -d -m755 $RPM_BUILD_ROOT%{_prefix}/lib/scylla/api
|
||||
cp -r api/api-doc $RPM_BUILD_ROOT%{_prefix}/lib/scylla/api
|
||||
|
||||
%pre
|
||||
/usr/sbin/groupadd scylla 2> /dev/null || :
|
||||
@@ -73,6 +77,11 @@ if [ -d /var/lib/scylla/conf ] && [ ! -L /var/lib/scylla/conf ]; then
|
||||
fi
|
||||
|
||||
%post
|
||||
grep -v api_ui_dir /etc/scylla/scylla.yaml | grep -v api_doc_dir > /tmp/scylla.yaml
|
||||
echo "api_ui_dir: /usr/lib/scylla/swagger-ui/dist/" >> /tmp/scylla.yaml
|
||||
echo "api_doc_dir: /usr/lib/scylla/api/api-doc/" >> /tmp/scylla.yaml
|
||||
mv /tmp/scylla.yaml /etc/scylla/scylla.yaml
|
||||
|
||||
%systemd_post scylla-server.service
|
||||
|
||||
%preun
|
||||
@@ -113,6 +122,8 @@ rm -rf $RPM_BUILD_ROOT
|
||||
%{_prefix}/lib/scylla/dpdk_nic_bind.py
|
||||
%{_prefix}/lib/scylla/dpdk_nic_bind.pyc
|
||||
%{_prefix}/lib/scylla/dpdk_nic_bind.pyo
|
||||
%{_prefix}/lib/scylla/swagger-ui/dist/*
|
||||
%{_prefix}/lib/scylla/api/api-doc/*
|
||||
%attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla/
|
||||
%attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla/data
|
||||
%attr(0755,scylla,scylla) %dir %{_sharedstatedir}/scylla/commitlog
|
||||
|
||||
29
dist/ubuntu/build_deb.sh
vendored
29
dist/ubuntu/build_deb.sh
vendored
@@ -10,6 +10,14 @@ if [ -e debian ] || [ -e build/release ]; then
|
||||
mkdir build
|
||||
fi
|
||||
|
||||
RELEASE=`lsb_release -r|awk '{print $2}'`
|
||||
CODENAME=`lsb_release -c|awk '{print $2}'`
|
||||
if [ `grep -c $RELEASE dist/ubuntu/supported_release` -lt 1 ]; then
|
||||
echo "Unsupported release: $RELEASE"
|
||||
echo "Pless any key to continue..."
|
||||
read input
|
||||
fi
|
||||
|
||||
VERSION=$(./SCYLLA-VERSION-GEN)
|
||||
SCYLLA_VERSION=$(cat build/SCYLLA-VERSION-FILE)
|
||||
SCYLLA_RELEASE=$(cat build/SCYLLA-RELEASE-FILE)
|
||||
@@ -24,14 +32,29 @@ cp dist/common/sysconfig/scylla-server debian/scylla-server.default
|
||||
cp dist/ubuntu/changelog.in debian/changelog
|
||||
sed -i -e "s/@@VERSION@@/$SCYLLA_VERSION/g" debian/changelog
|
||||
sed -i -e "s/@@RELEASE@@/$SCYLLA_RELEASE/g" debian/changelog
|
||||
sed -i -e "s/@@CODENAME@@/$CODENAME/g" debian/changelog
|
||||
|
||||
sudo apt-get -y update
|
||||
|
||||
./dist/ubuntu/dep/build_dependency.sh
|
||||
|
||||
sudo apt-get -y install libyaml-cpp-dev liblz4-dev libsnappy-dev libcrypto++-dev libboost1.55-dev libjsoncpp-dev libaio-dev ragel ninja-build git libyaml-cpp0.5 liblz4-1 libsnappy1 libcrypto++9 libboost-program-options1.55.0 libboost-program-options1.55-dev libboost-system1.55.0 libboost-system1.55-dev libboost-thread1.55.0 libboost-thread1.55-dev libboost-test1.55.0 libboost-test1.55-dev libjsoncpp0 libaio1 hugepages software-properties-common libboost-filesystem1.55-dev libboost-filesystem1.55.0
|
||||
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
sudo apt-get -y update
|
||||
DEP="libyaml-cpp-dev liblz4-dev libsnappy-dev libcrypto++-dev libjsoncpp-dev libaio-dev ragel ninja-build git liblz4-1 libaio1 hugepages software-properties-common"
|
||||
|
||||
if [ "$RELEASE" = "14.04" ]; then
|
||||
DEP="$DEP libboost1.55-dev libboost-program-options1.55.0 libboost-program-options1.55-dev libboost-system1.55.0 libboost-system1.55-dev libboost-thread1.55.0 libboost-thread1.55-dev libboost-test1.55.0 libboost-test1.55-dev libboost-filesystem1.55-dev libboost-filesystem1.55.0 libsnappy1"
|
||||
else
|
||||
DEP="$DEP libboost-dev libboost-program-options-dev libboost-system-dev libboost-thread-dev libboost-test-dev libboost-filesystem-dev libboost-filesystem-dev libsnappy1v5"
|
||||
fi
|
||||
if [ "$RELEASE" = "15.10" ]; then
|
||||
DEP="$DEP libjsoncpp0v5 libcrypto++9v5 libyaml-cpp0.5v5 antlr3"
|
||||
else
|
||||
DEP="$DEP libjsoncpp0 libcrypto++9 libyaml-cpp0.5"
|
||||
fi
|
||||
sudo apt-get -y install $DEP
|
||||
if [ "$RELEASE" != "15.10" ]; then
|
||||
sudo add-apt-repository -y ppa:ubuntu-toolchain-r/test
|
||||
sudo apt-get -y update
|
||||
fi
|
||||
sudo apt-get -y install g++-5
|
||||
|
||||
debuild -r fakeroot -us -uc
|
||||
|
||||
2
dist/ubuntu/changelog.in
vendored
2
dist/ubuntu/changelog.in
vendored
@@ -1,4 +1,4 @@
|
||||
scylla-server (@@VERSION@@-@@RELEASE@@-ubuntu1) trusty; urgency=medium
|
||||
scylla-server (@@VERSION@@-@@RELEASE@@-ubuntu1) @@CODENAME@@; urgency=medium
|
||||
|
||||
* Initial release.
|
||||
|
||||
|
||||
2
dist/ubuntu/debian/control
vendored
2
dist/ubuntu/debian/control
vendored
@@ -4,7 +4,7 @@ Homepage: http://scylladb.com
|
||||
Section: database
|
||||
Priority: optional
|
||||
Standards-Version: 3.9.5
|
||||
Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, libthrift-dev, thrift-compiler, antlr3-tool, antlr3-c++-dev, ragel, g++-5, ninja-build, git, libboost-program-options1.55-dev, libboost-filesystem1.55-dev, libboost-system1.55-dev, libboost-thread1.55-dev, libboost-test1.55-dev
|
||||
Build-Depends: debhelper (>= 9), libyaml-cpp-dev, liblz4-dev, libsnappy-dev, libcrypto++-dev, libjsoncpp-dev, libaio-dev, libthrift-dev, thrift-compiler, antlr3, antlr3-c++-dev, ragel, g++-5, ninja-build, git, libboost-program-options1.55-dev | libboost-program-options-dev, libboost-filesystem1.55-dev | libboost-filesystem-dev, libboost-system1.55-dev | libboost-system-dev, libboost-thread1.55-dev | libboost-thread-dev, libboost-test1.55-dev | libboost-test-dev
|
||||
|
||||
Package: scylla-server
|
||||
Architecture: amd64
|
||||
|
||||
8
dist/ubuntu/debian/rules
vendored
8
dist/ubuntu/debian/rules
vendored
@@ -2,6 +2,8 @@
|
||||
|
||||
DOC = $(CURDIR)/debian/scylla-server/usr/share/doc/scylla-server
|
||||
SCRIPTS = $(CURDIR)/debian/scylla-server/usr/lib/scylla
|
||||
SWAGGER = $(SCRIPTS)/swagger-ui
|
||||
API = $(SCRIPTS)/api
|
||||
LIMITS= $(CURDIR)/debian/scylla-server/etc/security/limits.d
|
||||
LIBS = $(CURDIR)/debian/scylla-server/usr/lib
|
||||
CONF = $(CURDIR)/debian/scylla-server/etc/scylla
|
||||
@@ -33,6 +35,12 @@ override_dh_auto_install:
|
||||
cp $(CURDIR)/seastar/dpdk/tools/dpdk_nic_bind.py $(SCRIPTS)
|
||||
cp $(CURDIR)/dist/common/scripts/* $(SCRIPTS)
|
||||
|
||||
mkdir -p $(SWAGGER) && \
|
||||
cp -r $(CURDIR)/swagger-ui/dist $(SWAGGER)
|
||||
|
||||
mkdir -p $(API) && \
|
||||
cp -r $(CURDIR)/api/api-doc $(API)
|
||||
|
||||
mkdir -p $(CURDIR)/debian/scylla-server/usr/bin/ && \
|
||||
cp $(CURDIR)/build/release/scylla \
|
||||
$(CURDIR)/debian/scylla-server/usr/bin/
|
||||
|
||||
5
dist/ubuntu/debian/scylla-server.postinst
vendored
5
dist/ubuntu/debian/scylla-server.postinst
vendored
@@ -14,4 +14,9 @@ fi
|
||||
|
||||
ln -sfT /etc/scylla /var/lib/scylla/conf
|
||||
|
||||
grep -v api_ui_dir /etc/scylla/scylla.yaml | grep -v api_doc_dir > /tmp/scylla.yaml
|
||||
echo "api_ui_dir: /usr/lib/scylla/swagger-ui/dist/" >> /tmp/scylla.yaml
|
||||
echo "api_doc_dir: /usr/lib/scylla/api/api-doc/" >> /tmp/scylla.yaml
|
||||
mv /tmp/scylla.yaml /etc/scylla/scylla.yaml
|
||||
|
||||
#DEBHELPER#
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
antlr3-tool (3.5.2-ubuntu1) trusty; urgency=medium
|
||||
antlr3 (3.5.2-ubuntu1) trusty; urgency=medium
|
||||
|
||||
* Initial release.
|
||||
|
||||
@@ -1,12 +1,13 @@
|
||||
Source: antlr3-tool
|
||||
Source: antlr3
|
||||
Maintainer: Takuya ASADA <syuu@scylladb.com>
|
||||
Section: misc
|
||||
Priority: optional
|
||||
Standards-Version: 3.5.2
|
||||
Build-Depends: debhelper (>= 9)
|
||||
|
||||
Package: antlr3-tool
|
||||
Package: antlr3
|
||||
Architecture: all
|
||||
Depends: ${shlibs:Depends}, ${misc:Depends}, openjdk-7-jre-headless
|
||||
Replaces: antlr3-tool
|
||||
Description: language tool for constructing recognizers, compilers etc
|
||||
A language tool that provides a framework for constructing recognizers, interpreters, compilers, and translators from grammatical descriptions containing actions in a variety of target languages.
|
||||
12
dist/ubuntu/dep/antlr3-3.5.2/debian/rules
vendored
Executable file
12
dist/ubuntu/dep/antlr3-3.5.2/debian/rules
vendored
Executable file
@@ -0,0 +1,12 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
override_dh_auto_install:
|
||||
mkdir -p $(CURDIR)/debian/antlr3/usr/share/java
|
||||
cp $(CURDIR)/antlr-3.5.2-complete-no-st3.jar \
|
||||
$(CURDIR)/debian/antlr3/usr/share/java
|
||||
|
||||
mkdir -p $(CURDIR)/debian/antlr3/usr/bin
|
||||
cp $(CURDIR)/antlr3 \
|
||||
$(CURDIR)/debian/antlr3/usr/bin
|
||||
%:
|
||||
dh $@
|
||||
12
dist/ubuntu/dep/antlr3-tool-3.5.2/debian/rules
vendored
12
dist/ubuntu/dep/antlr3-tool-3.5.2/debian/rules
vendored
@@ -1,12 +0,0 @@
|
||||
#!/usr/bin/make -f
|
||||
|
||||
override_dh_auto_install:
|
||||
mkdir -p $(CURDIR)/debian/antlr3-tool/usr/share/java
|
||||
cp $(CURDIR)/antlr-3.5.2-complete-no-st3.jar \
|
||||
$(CURDIR)/debian/antlr3-tool/usr/share/java
|
||||
|
||||
mkdir -p $(CURDIR)/debian/antlr3-tool/usr/bin
|
||||
cp $(CURDIR)/antlr3 \
|
||||
$(CURDIR)/debian/antlr3-tool/usr/bin
|
||||
%:
|
||||
dh $@
|
||||
28
dist/ubuntu/dep/build_dependency.sh
vendored
28
dist/ubuntu/dep/build_dependency.sh
vendored
@@ -1,15 +1,25 @@
|
||||
#!/bin/sh -e
|
||||
|
||||
sudo apt-get -y install build-essential debhelper openjdk-7-jre-headless build-essential autoconf automake pkg-config libtool bison flex libboost1.55-dev libboost-test1.55-dev libevent-dev libglib2.0-dev libqt4-dev python-dev python-dbg php5-dev devscripts python-support xfslibs-dev
|
||||
RELEASE=`lsb_release -r|awk '{print $2}'`
|
||||
DEP="build-essential debhelper openjdk-7-jre-headless build-essential autoconf automake pkg-config libtool bison flex libevent-dev libglib2.0-dev libqt4-dev python-dev python-dbg php5-dev devscripts python-support xfslibs-dev"
|
||||
|
||||
if [ ! -f build/antlr3-tool_3.5.2-1_all.deb ]; then
|
||||
rm -rf build/antlr3-tool-3.5.2
|
||||
mkdir -p build/antlr3-tool-3.5.2
|
||||
cp -a dist/ubuntu/dep/antlr3-tool-3.5.2/* build/antlr3-tool-3.5.2
|
||||
cd build/antlr3-tool-3.5.2
|
||||
wget http://www.antlr3.org/download/antlr-3.5.2-complete-no-st3.jar
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd -
|
||||
if [ "$RELEASE" = "14.04" ]; then
|
||||
DEP="$DEP libboost1.55-dev libboost-test1.55-dev"
|
||||
else
|
||||
DEP="$DEP libboost-dev libboost-test-dev"
|
||||
fi
|
||||
sudo apt-get -y install $DEP
|
||||
|
||||
if [ "$RELEASE" = "14.04" ]; then
|
||||
if [ ! -f build/antlr3_3.5.2-1_all.deb ]; then
|
||||
rm -rf build/antlr3-3.5.2
|
||||
mkdir -p build/antlr3-3.5.2
|
||||
cp -a dist/ubuntu/dep/antlr3-3.5.2/* build/antlr3-3.5.2
|
||||
cd build/antlr3-3.5.2
|
||||
wget http://www.antlr3.org/download/antlr-3.5.2-complete-no-st3.jar
|
||||
debuild -r fakeroot --no-tgz-check -us -uc
|
||||
cd -
|
||||
fi
|
||||
fi
|
||||
|
||||
if [ ! -f build/antlr3-c++-dev_3.5.2-1_all.deb ]; then
|
||||
|
||||
2
dist/ubuntu/supported_release
vendored
Normal file
2
dist/ubuntu/supported_release
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
14.04
|
||||
15.10
|
||||
@@ -21,6 +21,8 @@
|
||||
|
||||
#include "db/serializer.hh"
|
||||
#include "frozen_mutation.hh"
|
||||
#include "mutation_partition.hh"
|
||||
#include "mutation.hh"
|
||||
#include "partition_builder.hh"
|
||||
#include "mutation_partition_serializer.hh"
|
||||
#include "utils/UUID.hh"
|
||||
|
||||
@@ -23,9 +23,9 @@
|
||||
|
||||
#include "atomic_cell.hh"
|
||||
#include "keys.hh"
|
||||
#include "mutation.hh"
|
||||
#include "mutation_partition_view.hh"
|
||||
|
||||
class mutation;
|
||||
|
||||
// Immutable, compact form of mutation.
|
||||
//
|
||||
|
||||
@@ -43,12 +43,15 @@
|
||||
#include "gms/endpoint_state.hh"
|
||||
#include "gms/application_state.hh"
|
||||
#include "gms/inet_address.hh"
|
||||
#include "log.hh"
|
||||
#include <iostream>
|
||||
#include <chrono>
|
||||
|
||||
namespace gms {
|
||||
|
||||
extern logging::logger logger;
|
||||
static logging::logger logger("failure_detector");
|
||||
|
||||
constexpr std::chrono::milliseconds failure_detector::DEFAULT_MAX_PAUSE;
|
||||
|
||||
using clk = arrival_window::clk;
|
||||
|
||||
@@ -86,13 +89,13 @@ clk::duration arrival_window::get_max_interval() {
|
||||
return get_initial_value();
|
||||
}
|
||||
|
||||
void arrival_window::add(clk::time_point value) {
|
||||
void arrival_window::add(clk::time_point value, const gms::inet_address& ep) {
|
||||
if (_tlast > clk::time_point::min()) {
|
||||
auto inter_arrival_time = value - _tlast;
|
||||
if (inter_arrival_time <= get_max_interval()) {
|
||||
_arrival_intervals.add(inter_arrival_time.count());
|
||||
} else {
|
||||
logger.debug("failure_detector: Ignoring interval time of {}", inter_arrival_time.count());
|
||||
logger.debug("failure_detector: Ignoring interval time of {} for {}", inter_arrival_time.count(), ep);
|
||||
}
|
||||
} else {
|
||||
// We use a very large initial interval since the "right" average depends on the cluster size
|
||||
@@ -186,27 +189,28 @@ sstring failure_detector::get_endpoint_state(sstring address) {
|
||||
void failure_detector::append_endpoint_state(std::stringstream& ss, endpoint_state& state) {
|
||||
ss << " generation:" << state.get_heart_beat_state().get_generation() << "\n";
|
||||
ss << " heartbeat:" << state.get_heart_beat_state().get_heart_beat_version() << "\n";
|
||||
for (auto& entry : state.get_application_state_map()) {
|
||||
for (const auto& entry : state.get_application_state_map()) {
|
||||
auto& app_state = entry.first;
|
||||
auto& value = entry.second;
|
||||
auto& versioned_val = entry.second;
|
||||
if (app_state == application_state::TOKENS) {
|
||||
continue;
|
||||
}
|
||||
// FIXME: Add operator<< for application_state
|
||||
ss << " " << int32_t(app_state) << ":" << value.value << "\n";
|
||||
ss << " " << app_state << ":" << versioned_val.version << ":" << versioned_val.value << "\n";
|
||||
}
|
||||
const auto& app_state_map = state.get_application_state_map();
|
||||
if (app_state_map.count(application_state::TOKENS)) {
|
||||
ss << " TOKENS:" << app_state_map.at(application_state::TOKENS).version << ":<hidden>\n";
|
||||
} else {
|
||||
ss << " TOKENS: not present" << "\n";
|
||||
}
|
||||
}
|
||||
|
||||
void failure_detector::set_phi_convict_threshold(double phi) {
|
||||
// FIXME
|
||||
// DatabaseDescriptor.setPhiConvictThreshold(phi);
|
||||
_phi = phi;
|
||||
}
|
||||
|
||||
double failure_detector::get_phi_convict_threshold() {
|
||||
// FIXME: phi_convict_threshold must be between 5 and 16"
|
||||
// return DatabaseDescriptor.getPhiConvictThreshold();
|
||||
warn(unimplemented::cause::GOSSIP);
|
||||
return 8;
|
||||
return _phi;
|
||||
}
|
||||
|
||||
bool failure_detector::is_alive(inet_address ep) {
|
||||
@@ -220,10 +224,10 @@ void failure_detector::report(inet_address ep) {
|
||||
if (it == _arrival_samples.end()) {
|
||||
// avoid adding an empty ArrivalWindow to the Map
|
||||
auto heartbeat_window = arrival_window(SAMPLE_SIZE);
|
||||
heartbeat_window.add(now);
|
||||
heartbeat_window.add(now, ep);
|
||||
_arrival_samples.emplace(ep, heartbeat_window);
|
||||
} else {
|
||||
it->second.add(now);
|
||||
it->second.add(now, ep);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -235,8 +239,20 @@ void failure_detector::interpret(inet_address ep) {
|
||||
}
|
||||
arrival_window& hb_wnd = it->second;
|
||||
auto now = clk::now();
|
||||
auto diff = std::chrono::duration_cast<std::chrono::milliseconds>(now - _last_interpret);
|
||||
_last_interpret = now;
|
||||
if (diff > get_max_local_pause()) {
|
||||
logger.warn("Not marking nodes down due to local pause of {} > {} (milliseconds)", diff.count(), get_max_local_pause().count());
|
||||
_last_paused = now;
|
||||
return;
|
||||
}
|
||||
if (clk::now() - _last_paused < get_max_local_pause()) {
|
||||
logger.debug("Still not marking nodes down due to local pause");
|
||||
return;
|
||||
}
|
||||
double phi = hb_wnd.phi(now);
|
||||
logger.trace("failure_detector: PHI for {} : {}", ep, phi);
|
||||
logger.trace("failure_detector: phi_convict_threshold={}", _phi);
|
||||
|
||||
if (PHI_FACTOR * phi > get_phi_convict_threshold()) {
|
||||
logger.trace("failure_detector: notifying listeners that {} is down", ep);
|
||||
|
||||
@@ -78,7 +78,7 @@ public:
|
||||
// this value defaults to the same initial value the FD is seeded with
|
||||
static clk::duration get_max_interval();
|
||||
|
||||
void add(clk::time_point value);
|
||||
void add(clk::time_point value, const gms::inet_address& ep);
|
||||
|
||||
double mean();
|
||||
|
||||
@@ -105,9 +105,34 @@ private:
|
||||
static constexpr double PHI_FACTOR{1.0 / std::log(10.0)}; // 0.434...
|
||||
std::map<inet_address, arrival_window> _arrival_samples;
|
||||
std::list<i_failure_detection_event_listener*> _fd_evnt_listeners;
|
||||
double _phi = 8;
|
||||
|
||||
static constexpr std::chrono::milliseconds DEFAULT_MAX_PAUSE{5000};
|
||||
|
||||
std::chrono::milliseconds get_max_local_pause() {
|
||||
// FIXME: cassandra.max_local_pause_in_ms
|
||||
#if 0
|
||||
if (System.getProperty("cassandra.max_local_pause_in_ms") != null) {
|
||||
long pause = Long.parseLong(System.getProperty("cassandra.max_local_pause_in_ms"));
|
||||
logger.warn("Overriding max local pause time to {}ms", pause);
|
||||
return pause * 1000000L;
|
||||
} else {
|
||||
return DEFAULT_MAX_PAUSE;
|
||||
}
|
||||
#endif
|
||||
return DEFAULT_MAX_PAUSE;
|
||||
}
|
||||
|
||||
arrival_window::clk::time_point _last_interpret;
|
||||
arrival_window::clk::time_point _last_paused;
|
||||
|
||||
public:
|
||||
failure_detector() {
|
||||
_last_interpret = arrival_window::clk::now();
|
||||
}
|
||||
|
||||
failure_detector(double phi) : _phi(phi) {
|
||||
_last_interpret = arrival_window::clk::now();
|
||||
}
|
||||
|
||||
future<> stop() {
|
||||
@@ -188,7 +213,7 @@ inline future<> set_phi_convict_threshold(double phi) {
|
||||
});
|
||||
}
|
||||
|
||||
inline future<double> get_phi_convict_threshold() {
|
||||
inline future<double> get_phi_convict_threshold() {
|
||||
return smp::submit_to(0, [] {
|
||||
return get_local_failure_detector().get_phi_convict_threshold();
|
||||
});
|
||||
|
||||
190
gms/gossiper.cc
190
gms/gossiper.cc
@@ -62,7 +62,7 @@ namespace gms {
|
||||
|
||||
using clk = gossiper::clk;
|
||||
|
||||
logging::logger logger("gossip");
|
||||
static logging::logger logger("gossip");
|
||||
|
||||
constexpr std::chrono::milliseconds gossiper::INTERVAL;
|
||||
constexpr std::chrono::hours gossiper::A_VERY_LONG_TIME;
|
||||
@@ -251,8 +251,8 @@ void gossiper::init_messaging_service_handler() {
|
||||
logger.debug("Ignoring shutdown message from {} because gossip is disabled", from);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
return seastar::async([from, fd = get_local_failure_detector().shared_from_this()] {
|
||||
fd->force_conviction(from);
|
||||
return seastar::async([from] {
|
||||
gms::get_local_gossiper().mark_as_shutdown(from);
|
||||
});
|
||||
}).handle_exception([] (auto ep) {
|
||||
logger.warn("Fail to handle GOSSIP_SHUTDOWN: {}", ep);
|
||||
@@ -423,6 +423,7 @@ void gossiper::remove_endpoint(inet_address endpoint) {
|
||||
}
|
||||
|
||||
_live_endpoints.erase(endpoint);
|
||||
_live_endpoints_just_added.remove(endpoint);
|
||||
_unreachable_endpoints.erase(endpoint);
|
||||
// do not remove endpointState until the quarantine expires
|
||||
get_local_failure_detector().remove(endpoint);
|
||||
@@ -663,11 +664,17 @@ void gossiper::convict(inet_address endpoint, double phi) {
|
||||
return;
|
||||
}
|
||||
auto& state = it->second;
|
||||
// FIXME: Add getGossipStatus
|
||||
// logger.debug("Convicting {} with status {} - alive {}", endpoint, getGossipStatus(epState), state.is_alive());
|
||||
if (!state.is_alive()) {
|
||||
return;
|
||||
}
|
||||
|
||||
logger.trace("convict ep={}, phi={}, is_alive={}, is_dead_state={}", endpoint, phi, state.is_alive(), is_dead_state(state));
|
||||
if (state.is_alive() && !is_dead_state(state)) {
|
||||
mark_dead(endpoint, state);
|
||||
if (is_shutdown(endpoint)) {
|
||||
mark_as_shutdown(endpoint);
|
||||
} else {
|
||||
state.mark_dead();
|
||||
mark_dead(endpoint, state);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -868,6 +875,12 @@ future<bool> gossiper::do_gossip_to_live_member(gossip_digest_syn message) {
|
||||
return make_ready_future<bool>(false);
|
||||
}
|
||||
logger.trace("do_gossip_to_live_member: live_endpoint nr={}", _live_endpoints.size());
|
||||
if (!_live_endpoints_just_added.empty()) {
|
||||
auto ep = _live_endpoints_just_added.front();
|
||||
_live_endpoints_just_added.pop_front();
|
||||
logger.info("do_gossip_to_live_member: Favor newly added node {}", ep);
|
||||
return send_gossip(message, std::set<inet_address>{ep});
|
||||
}
|
||||
return send_gossip(message, _live_endpoints);
|
||||
}
|
||||
|
||||
@@ -937,7 +950,7 @@ clk::time_point gossiper::get_expire_time_for_endpoint(inet_address endpoint) {
|
||||
}
|
||||
}
|
||||
|
||||
std::experimental::optional<endpoint_state> gossiper::get_endpoint_state_for_endpoint(inet_address ep) {
|
||||
std::experimental::optional<endpoint_state> gossiper::get_endpoint_state_for_endpoint(inet_address ep) const {
|
||||
auto it = endpoint_state_map.find(ep);
|
||||
if (it == endpoint_state_map.end()) {
|
||||
return {};
|
||||
@@ -950,6 +963,7 @@ void gossiper::reset_endpoint_state_map() {
|
||||
endpoint_state_map.clear();
|
||||
_unreachable_endpoints.clear();
|
||||
_live_endpoints.clear();
|
||||
_live_endpoints_just_added.clear();
|
||||
}
|
||||
|
||||
std::unordered_map<inet_address, endpoint_state>& gms::gossiper::get_endpoint_states() {
|
||||
@@ -1059,10 +1073,13 @@ void gossiper::real_mark_alive(inet_address addr, endpoint_state& local_state) {
|
||||
local_state.mark_alive();
|
||||
local_state.update_timestamp(); // prevents do_status_check from racing us and evicting if it was down > A_VERY_LONG_TIME
|
||||
_live_endpoints.insert(addr);
|
||||
_live_endpoints_just_added.push_back(addr);
|
||||
_unreachable_endpoints.erase(addr);
|
||||
_expire_time_endpoint_map.erase(addr);
|
||||
logger.debug("removing expire time for endpoint : {}", addr);
|
||||
logger.info("inet_address {} is now UP", addr);
|
||||
if (!_in_shadow_round) {
|
||||
logger.info("InetAddress {} is now UP", addr);
|
||||
}
|
||||
|
||||
_subscribers.for_each([addr, local_state] (auto& subscriber) {
|
||||
subscriber->on_alive(addr, local_state);
|
||||
@@ -1075,8 +1092,9 @@ void gossiper::mark_dead(inet_address addr, endpoint_state& local_state) {
|
||||
logger.trace("marking as down {}", addr);
|
||||
local_state.mark_dead();
|
||||
_live_endpoints.erase(addr);
|
||||
_live_endpoints_just_added.remove(addr);
|
||||
_unreachable_endpoints[addr] = now();
|
||||
logger.info("inet_address {} is now DOWN", addr);
|
||||
logger.info("InetAddress {} is now DOWN", addr);
|
||||
_subscribers.for_each([addr, local_state] (auto& subscriber) {
|
||||
subscriber->on_dead(addr, local_state);
|
||||
logger.trace("Notified {}", subscriber.get());
|
||||
@@ -1089,7 +1107,7 @@ void gossiper::handle_major_state_change(inet_address ep, const endpoint_state&
|
||||
if (endpoint_state_map.count(ep) > 0) {
|
||||
local_ep_state = endpoint_state_map.at(ep);
|
||||
}
|
||||
if (!is_dead_state(eps)) {
|
||||
if (!is_dead_state(eps) && !_in_shadow_round) {
|
||||
if (endpoint_state_map.count(ep)) {
|
||||
logger.info("Node {} has restarted, now UP", ep);
|
||||
} else {
|
||||
@@ -1117,6 +1135,10 @@ void gossiper::handle_major_state_change(inet_address ep, const endpoint_state&
|
||||
_subscribers.for_each([ep, ep_state] (auto& subscriber) {
|
||||
subscriber->on_join(ep, ep_state);
|
||||
});
|
||||
// check this at the end so nodes will learn about the endpoint
|
||||
if (is_shutdown(ep)) {
|
||||
mark_as_shutdown(ep);
|
||||
}
|
||||
}
|
||||
|
||||
bool gossiper::is_dead_state(const endpoint_state& eps) const {
|
||||
@@ -1136,6 +1158,47 @@ bool gossiper::is_dead_state(const endpoint_state& eps) const {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool gossiper::is_shutdown(const inet_address& endpoint) const {
|
||||
auto ep_state = get_endpoint_state_for_endpoint(endpoint);
|
||||
if (!ep_state) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto app_state = ep_state->get_application_state(application_state::STATUS);
|
||||
if (!app_state) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto value = app_state->value;
|
||||
std::vector<sstring> pieces;
|
||||
boost::split(pieces, value, boost::is_any_of(","));
|
||||
assert(pieces.size() > 0);
|
||||
sstring state = pieces[0];
|
||||
|
||||
return state == sstring(versioned_value::SHUTDOWN);
|
||||
}
|
||||
|
||||
|
||||
bool gossiper::is_silent_shutdown_state(const endpoint_state& ep_state) const{
|
||||
auto app_state = ep_state.get_application_state(application_state::STATUS);
|
||||
if (!app_state) {
|
||||
return false;
|
||||
}
|
||||
|
||||
auto value = app_state->value;
|
||||
std::vector<sstring> pieces;
|
||||
boost::split(pieces, value, boost::is_any_of(","));
|
||||
assert(pieces.size() > 0);
|
||||
sstring state = pieces[0];
|
||||
|
||||
for (auto& deadstate : SILENT_SHUTDOWN_STATES) {
|
||||
if (state == deadstate) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::apply_new_states(inet_address addr, endpoint_state& local_state, const endpoint_state& remote_state) {
|
||||
// don't assert here, since if the node restarts the version will go back to zero
|
||||
@@ -1255,17 +1318,15 @@ void gossiper::examine_gossiper(std::vector<gossip_digest>& g_digest_list,
|
||||
}
|
||||
}
|
||||
|
||||
future<> gossiper::start(int generation_number) {
|
||||
return start(generation_number, std::map<application_state, versioned_value>());
|
||||
future<> gossiper::start_gossiping(int generation_number) {
|
||||
return start_gossiping(generation_number, std::map<application_state, versioned_value>());
|
||||
}
|
||||
|
||||
future<> gossiper::start(int generation_nbr, std::map<application_state, versioned_value> preload_local_states) {
|
||||
future<> gossiper::start_gossiping(int generation_nbr, std::map<application_state, versioned_value> preload_local_states) {
|
||||
// Although gossiper runs on cpu0 only, we need to listen incoming gossip
|
||||
// message on all cpus and forard them to cpu0 to process.
|
||||
return _handlers.start().then([this] {
|
||||
return _handlers.invoke_on_all([this] (handler& h) {
|
||||
this->init_messaging_service_handler();
|
||||
});
|
||||
return get_gossiper().invoke_on_all([] (gossiper& g) {
|
||||
g.init_messaging_service_handler();
|
||||
}).then([this, generation_nbr, preload_local_states] {
|
||||
build_seeds_list();
|
||||
/* initialize the heartbeat state for this localEndpoint */
|
||||
@@ -1382,46 +1443,60 @@ future<> gossiper::add_local_application_state(application_state state, versione
|
||||
});
|
||||
}
|
||||
|
||||
future<> gossiper::stop() {
|
||||
logger.debug("gossip::stop on cpu {}", engine().cpu_id());
|
||||
|
||||
if (engine().cpu_id() != 0) {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
future<> gossiper::do_stop_gossiping() {
|
||||
return seastar::async([this, g = this->shared_from_this()] {
|
||||
_enabled = false;
|
||||
_scheduled_gossip_task.cancel();
|
||||
logger.info("Announcing shutdown");
|
||||
sleep(INTERVAL * 2).get();
|
||||
for (inet_address addr : _live_endpoints) {
|
||||
shard_id id = get_shard_id(addr);
|
||||
logger.trace("Sending a GossipShutdown to {}", id);
|
||||
ms().send_gossip_shutdown(id, get_broadcast_address()).then_wrapped([id] (auto&&f) {
|
||||
try {
|
||||
f.get();
|
||||
logger.trace("Got GossipShutdown Reply");
|
||||
} catch (...) {
|
||||
logger.warn("Fail to send GossipShutdown to {}: {}", id, std::current_exception());
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}).get();
|
||||
}
|
||||
_handlers.stop().then([this] () {
|
||||
logger.debug("gossip::handler::stop on cpu {}", engine().cpu_id());
|
||||
if (engine().cpu_id() == 0) {
|
||||
get_local_failure_detector().unregister_failure_detection_event_listener(this);
|
||||
auto my_ep_state = get_endpoint_state_for_endpoint(get_broadcast_address());
|
||||
if (my_ep_state && !is_silent_shutdown_state(*my_ep_state)) {
|
||||
logger.info("Announcing shutdown");
|
||||
add_local_application_state(application_state::STATUS, storage_service_value_factory().shutdown(true)).get();
|
||||
for (inet_address addr : _live_endpoints) {
|
||||
shard_id id = get_shard_id(addr);
|
||||
logger.trace("Sending a GossipShutdown to {}", id);
|
||||
ms().send_gossip_shutdown(id, get_broadcast_address()).then_wrapped([id] (auto&&f) {
|
||||
try {
|
||||
f.get();
|
||||
logger.trace("Got GossipShutdown Reply");
|
||||
} catch (...) {
|
||||
logger.warn("Fail to send GossipShutdown to {}: {}", id, std::current_exception());
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}).get();
|
||||
}
|
||||
uninit_messaging_service_handler();
|
||||
// FIXME: Integer.getInteger("cassandra.shutdown_announce_in_ms", 2000)
|
||||
sleep(INTERVAL * 2).get();
|
||||
} else {
|
||||
logger.warn("No local state or state is in silent shutdown, not announcing shutdown");
|
||||
}
|
||||
_scheduled_gossip_task.cancel();
|
||||
get_gossiper().invoke_on_all([] (gossiper& g) {
|
||||
if (engine().cpu_id() == 0) {
|
||||
get_local_failure_detector().unregister_failure_detection_event_listener(&g);
|
||||
}
|
||||
g.uninit_messaging_service_handler();
|
||||
return make_ready_future<>();
|
||||
}).get();
|
||||
});
|
||||
}
|
||||
|
||||
future<> gossiper::stop_gossiping() {
|
||||
return get_gossiper().invoke_on(0, [] (gossiper& g) {
|
||||
return g.do_stop_gossiping();
|
||||
});
|
||||
}
|
||||
|
||||
future<> gossiper::stop() {
|
||||
return make_ready_future();
|
||||
}
|
||||
|
||||
bool gossiper::is_enabled() {
|
||||
return _enabled;
|
||||
}
|
||||
|
||||
void gossiper::goto_shadow_round() {
|
||||
_in_shadow_round = true;
|
||||
}
|
||||
|
||||
void gossiper::finish_shadow_round() {
|
||||
if (_in_shadow_round) {
|
||||
_in_shadow_round = false;
|
||||
@@ -1472,4 +1547,29 @@ bool gossiper::is_alive(inet_address ep) {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* This method is used to mark a node as shutdown; that is it gracefully exited on its own and told us about it
|
||||
* @param endpoint endpoint that has shut itself down
|
||||
*/
|
||||
// Runs inside seastar::async context
|
||||
void gossiper::mark_as_shutdown(const inet_address& endpoint) {
|
||||
auto it = endpoint_state_map.find(endpoint);
|
||||
if (it != endpoint_state_map.end()) {
|
||||
auto& ep_state = it->second;
|
||||
ep_state.add_application_state(application_state::STATUS, storage_service_value_factory().shutdown(true));
|
||||
ep_state.get_heart_beat_state().force_highest_possible_version_unsafe();
|
||||
mark_dead(endpoint, ep_state);
|
||||
get_local_failure_detector().force_conviction(endpoint);
|
||||
}
|
||||
}
|
||||
|
||||
void gossiper::force_newer_generation() {
|
||||
auto it = endpoint_state_map.find(get_broadcast_address());
|
||||
if (it != endpoint_state_map.end()) {
|
||||
auto& ep_state = it->second;
|
||||
ep_state.get_heart_beat_state().force_newer_generation_unsafe();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
} // namespace gms
|
||||
|
||||
@@ -86,13 +86,6 @@ private:
|
||||
net::messaging_service& ms() {
|
||||
return net::get_local_messaging_service();
|
||||
}
|
||||
class handler {
|
||||
public:
|
||||
future<> stop() {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
};
|
||||
distributed<handler> _handlers;
|
||||
void init_messaging_service_handler();
|
||||
void uninit_messaging_service_handler();
|
||||
future<gossip_digest_ack> handle_syn_msg(gossip_digest_syn syn_msg);
|
||||
@@ -121,8 +114,19 @@ public:
|
||||
/* map where key is the endpoint and value is the state associated with the endpoint */
|
||||
std::unordered_map<inet_address, endpoint_state> endpoint_state_map;
|
||||
|
||||
const std::vector<sstring> DEAD_STATES = { versioned_value::REMOVING_TOKEN, versioned_value::REMOVED_TOKEN,
|
||||
versioned_value::STATUS_LEFT, versioned_value::HIBERNATE };
|
||||
const std::vector<sstring> DEAD_STATES = {
|
||||
versioned_value::REMOVING_TOKEN,
|
||||
versioned_value::REMOVED_TOKEN,
|
||||
versioned_value::STATUS_LEFT,
|
||||
versioned_value::HIBERNATE
|
||||
};
|
||||
const std::vector<sstring> SILENT_SHUTDOWN_STATES = {
|
||||
versioned_value::REMOVING_TOKEN,
|
||||
versioned_value::REMOVED_TOKEN,
|
||||
versioned_value::STATUS_LEFT,
|
||||
versioned_value::HIBERNATE,
|
||||
versioned_value::STATUS_BOOTSTRAPPING,
|
||||
};
|
||||
static constexpr std::chrono::milliseconds INTERVAL{1000};
|
||||
static constexpr std::chrono::hours A_VERY_LONG_TIME{24 * 3};
|
||||
|
||||
@@ -172,6 +176,7 @@ private:
|
||||
|
||||
/* live member set */
|
||||
std::set<inet_address> _live_endpoints;
|
||||
std::list<inet_address> _live_endpoints_just_added;
|
||||
|
||||
/* unreachable member set */
|
||||
std::map<inet_address, clk::time_point> _unreachable_endpoints;
|
||||
@@ -366,7 +371,7 @@ private:
|
||||
public:
|
||||
clk::time_point get_expire_time_for_endpoint(inet_address endpoint);
|
||||
|
||||
std::experimental::optional<endpoint_state> get_endpoint_state_for_endpoint(inet_address ep);
|
||||
std::experimental::optional<endpoint_state> get_endpoint_state_for_endpoint(inet_address ep) const;
|
||||
|
||||
// removes ALL endpoint states; should only be called after shadow gossip
|
||||
void reset_endpoint_state_map();
|
||||
@@ -437,12 +442,12 @@ public:
|
||||
std::map<inet_address, endpoint_state>& delta_ep_state_map);
|
||||
|
||||
public:
|
||||
future<> start(int generation_number);
|
||||
future<> start_gossiping(int generation_number);
|
||||
|
||||
/**
|
||||
* Start the gossiper with the generation number, preloading the map of application states before starting
|
||||
*/
|
||||
future<> start(int generation_nbr, std::map<application_state, versioned_value> preload_local_states);
|
||||
future<> start_gossiping(int generation_nbr, std::map<application_state, versioned_value> preload_local_states);
|
||||
|
||||
public:
|
||||
/**
|
||||
@@ -465,7 +470,11 @@ public:
|
||||
|
||||
future<> add_local_application_state(application_state state, versioned_value value);
|
||||
|
||||
// Needed by seastar::sharded
|
||||
future<> stop();
|
||||
future<> stop_gossiping();
|
||||
private:
|
||||
future<> do_stop_gossiping();
|
||||
|
||||
public:
|
||||
bool is_enabled();
|
||||
@@ -474,6 +483,8 @@ public:
|
||||
|
||||
bool is_in_shadow_round();
|
||||
|
||||
void goto_shadow_round();
|
||||
|
||||
public:
|
||||
void add_expire_time_for_endpoint(inet_address endpoint, clk::time_point expire_time);
|
||||
|
||||
@@ -481,6 +492,11 @@ public:
|
||||
public:
|
||||
void dump_endpoint_state_map();
|
||||
void debug_show();
|
||||
public:
|
||||
bool is_shutdown(const inet_address& endpoint) const;
|
||||
bool is_silent_shutdown_state(const endpoint_state& ep_state) const;
|
||||
void mark_as_shutdown(const inet_address& endpoint);
|
||||
void force_newer_generation();
|
||||
};
|
||||
|
||||
extern distributed<gossiper> _the_gossiper;
|
||||
|
||||
@@ -42,6 +42,7 @@
|
||||
#include "types.hh"
|
||||
#include "utils/serialization.hh"
|
||||
#include <ostream>
|
||||
#include <limits>
|
||||
|
||||
namespace gms {
|
||||
/**
|
||||
@@ -82,6 +83,10 @@ public:
|
||||
_generation += 1;
|
||||
}
|
||||
|
||||
void force_highest_possible_version_unsafe() {
|
||||
_version = std::numeric_limits<int32_t>::max();
|
||||
}
|
||||
|
||||
friend inline std::ostream& operator<<(std::ostream& os, const heart_beat_state& h) {
|
||||
return os << "{ generation = " << h._generation << ", version = " << h._version << " }";
|
||||
}
|
||||
|
||||
@@ -49,6 +49,7 @@ constexpr const char* versioned_value::STATUS_MOVING;
|
||||
constexpr const char* versioned_value::REMOVING_TOKEN;
|
||||
constexpr const char* versioned_value::REMOVED_TOKEN;
|
||||
constexpr const char* versioned_value::HIBERNATE;
|
||||
constexpr const char* versioned_value::SHUTDOWN;
|
||||
constexpr const char* versioned_value::REMOVAL_COORDINATOR;
|
||||
|
||||
void versioned_value::serialize(bytes::iterator& out) const {
|
||||
|
||||
@@ -83,6 +83,7 @@ public:
|
||||
static constexpr const char* REMOVED_TOKEN = "removed";
|
||||
|
||||
static constexpr const char* HIBERNATE = "hibernate";
|
||||
static constexpr const char* SHUTDOWN = "shutdown";
|
||||
|
||||
// values for ApplicationState.REMOVAL_COORDINATOR
|
||||
static constexpr const char* REMOVAL_COORDINATOR = "REMOVER";
|
||||
@@ -134,11 +135,17 @@ public:
|
||||
class factory {
|
||||
using token = dht::token;
|
||||
public:
|
||||
sstring make_token_string(const std::unordered_set<token>& tokens) {
|
||||
sstring make_full_token_string(const std::unordered_set<token>& tokens) {
|
||||
return ::join(";", tokens | boost::adaptors::transformed([] (const token& t) {
|
||||
return dht::global_partitioner().to_sstring(t); })
|
||||
);
|
||||
}
|
||||
sstring make_token_string(const std::unordered_set<token>& tokens) {
|
||||
if (tokens.empty()) {
|
||||
return "";
|
||||
}
|
||||
return dht::global_partitioner().to_sstring(*tokens.begin());
|
||||
}
|
||||
|
||||
versioned_value clone_with_higher_version(const versioned_value& value) {
|
||||
return versioned_value(value.value);
|
||||
@@ -155,7 +162,7 @@ public:
|
||||
}
|
||||
|
||||
versioned_value load(double load) {
|
||||
return versioned_value(to_sstring_sprintf(load, "%g"));
|
||||
return versioned_value(to_sstring(load));
|
||||
}
|
||||
|
||||
versioned_value schema(const utils::UUID &new_version) {
|
||||
@@ -184,7 +191,7 @@ public:
|
||||
}
|
||||
|
||||
versioned_value tokens(const std::unordered_set<token>& tokens) {
|
||||
return versioned_value(make_token_string(tokens));
|
||||
return versioned_value(make_full_token_string(tokens));
|
||||
}
|
||||
|
||||
versioned_value removing_nonlocal(const utils::UUID& host_id) {
|
||||
@@ -206,6 +213,10 @@ public:
|
||||
return versioned_value(sstring(HIBERNATE) + sstring(DELIMITER_STR) + (value ? "true" : "false"));
|
||||
}
|
||||
|
||||
versioned_value shutdown(bool value) {
|
||||
return versioned_value(sstring(SHUTDOWN) + sstring(DELIMITER_STR) + (value ? "true" : "false"));
|
||||
}
|
||||
|
||||
versioned_value datacenter(const sstring& dc_id) {
|
||||
return versioned_value(dc_id);
|
||||
}
|
||||
@@ -231,7 +242,7 @@ public:
|
||||
}
|
||||
|
||||
versioned_value severity(double value) {
|
||||
return versioned_value(to_sstring_sprintf(value, "%g"));
|
||||
return versioned_value(to_sstring(value));
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
8
init.cc
8
init.cc
@@ -45,15 +45,15 @@ future<> init_storage_service(distributed<database>& db) {
|
||||
});
|
||||
}
|
||||
|
||||
future<> init_ms_fd_gossiper(sstring listen_address, uint16_t port, db::seed_provider_type seed_provider, sstring cluster_name) {
|
||||
future<> init_ms_fd_gossiper(sstring listen_address, uint16_t port, db::seed_provider_type seed_provider, sstring cluster_name, double phi) {
|
||||
const gms::inet_address listen(listen_address);
|
||||
// Init messaging_service
|
||||
return net::get_messaging_service().start(listen, std::move(port)).then([]{
|
||||
return net::get_messaging_service().start(listen, std::move(port)).then([] {
|
||||
// #293 - do not stop anything
|
||||
//engine().at_exit([] { return net::get_messaging_service().stop(); });
|
||||
}).then([] {
|
||||
}).then([phi] {
|
||||
// Init failure_detector
|
||||
return gms::get_failure_detector().start().then([] {
|
||||
return gms::get_failure_detector().start(std::move(phi)).then([] {
|
||||
// #293 - do not stop anything
|
||||
//engine().at_exit([]{ return gms::get_failure_detector().stop(); });
|
||||
});
|
||||
|
||||
2
init.hh
2
init.hh
@@ -27,4 +27,4 @@
|
||||
#include "database.hh"
|
||||
|
||||
future<> init_storage_service(distributed<database>& db);
|
||||
future<> init_ms_fd_gossiper(sstring listen_address, uint16_t storage_port, db::seed_provider_type seed_provider, sstring cluster_name = "Test Cluster");
|
||||
future<> init_ms_fd_gossiper(sstring listen_address, uint16_t storage_port, db::seed_provider_type seed_provider, sstring cluster_name = "Test Cluster", double phi = 8);
|
||||
|
||||
@@ -61,8 +61,7 @@ future<> ec2_multi_region_snitch::start() {
|
||||
// value to a public address in cassandra.yaml.
|
||||
//
|
||||
utils::fb_utilities::set_broadcast_address(local_public_address);
|
||||
//DatabaseDescriptor.setBroadcastRpcAddress(local_public_address);
|
||||
//
|
||||
utils::fb_utilities::set_broadcast_rpc_address(local_public_address);
|
||||
|
||||
return aws_api_call(AWS_QUERY_SERVER_ADDR, PRIVATE_IP_QUERY_REQ).then(
|
||||
[this] (sstring priv_addr) {
|
||||
|
||||
@@ -461,10 +461,23 @@ void token_metadata::calculate_pending_ranges(abstract_replication_strategy& str
|
||||
_pending_ranges[keyspace_name] = std::move(new_pending_ranges);
|
||||
|
||||
if (logger.is_enabled(logging::log_level::debug)) {
|
||||
// TODO: Enable printPendingRanges
|
||||
// logger.debug("Pending ranges: {}", (_pending_ranges.empty() ? "<empty>" : printPendingRanges()));
|
||||
logger.debug("Pending ranges: {}", (_pending_ranges.empty() ? "<empty>" : print_pending_ranges()));
|
||||
}
|
||||
}
|
||||
sstring token_metadata::print_pending_ranges() {
|
||||
std::stringstream ss;
|
||||
|
||||
for (auto& x : _pending_ranges) {
|
||||
auto& keyspace_name = x.first;
|
||||
ss << "\nkeyspace_name = " << keyspace_name << " {\n";
|
||||
for (auto& m : x.second) {
|
||||
ss << m.second << " : " << m.first << "\n";
|
||||
}
|
||||
ss << "}\n";
|
||||
}
|
||||
|
||||
return sstring(ss.str());
|
||||
}
|
||||
|
||||
void token_metadata::add_leaving_endpoint(inet_address endpoint) {
|
||||
_leaving_endpoints.emplace(endpoint);
|
||||
|
||||
@@ -865,23 +865,9 @@ public:
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
private String printPendingRanges()
|
||||
{
|
||||
StringBuilder sb = new StringBuilder();
|
||||
|
||||
for (Map.Entry<String, Multimap<Range<Token>, InetAddress>> entry : _pending_ranges.entrySet())
|
||||
{
|
||||
for (Map.Entry<Range<Token>, InetAddress> rmap : entry.getValue().entries())
|
||||
{
|
||||
sb.append(rmap.getValue()).append(":").append(rmap.getKey());
|
||||
sb.append(System.getProperty("line.separator"));
|
||||
}
|
||||
}
|
||||
|
||||
return sb.toString();
|
||||
}
|
||||
#endif
|
||||
sstring print_pending_ranges();
|
||||
public:
|
||||
std::vector<gms::inet_address> pending_endpoints_for(const token& token, const sstring& keyspace_name);
|
||||
#if 0
|
||||
/**
|
||||
|
||||
16
main.cc
16
main.cc
@@ -200,12 +200,14 @@ int main(int ac, char** av) {
|
||||
uint16_t storage_port = cfg->storage_port();
|
||||
ctx.api_dir = cfg->api_ui_dir();
|
||||
ctx.api_doc = cfg->api_doc_dir();
|
||||
double phi = cfg->phi_convict_threshold();
|
||||
sstring cluster_name = cfg->cluster_name();
|
||||
sstring listen_address = cfg->listen_address();
|
||||
sstring rpc_address = cfg->rpc_address();
|
||||
sstring api_address = cfg->api_address() != "" ? cfg->api_address() : rpc_address;
|
||||
auto seed_provider= cfg->seed_provider();
|
||||
sstring broadcast_address = cfg->broadcast_address();
|
||||
sstring broadcast_rpc_address = cfg->broadcast_rpc_address();
|
||||
|
||||
if (!broadcast_address.empty()) {
|
||||
utils::fb_utilities::set_broadcast_address(broadcast_address);
|
||||
@@ -216,6 +218,16 @@ int main(int ac, char** av) {
|
||||
throw bad_configuration_error();
|
||||
}
|
||||
|
||||
if (!broadcast_rpc_address.empty()) {
|
||||
utils::fb_utilities::set_broadcast_rpc_address(broadcast_rpc_address);
|
||||
} else {
|
||||
if (rpc_address == "0.0.0.0") {
|
||||
startlog.error("If rpc_address is set to a wildcard address {}, then you must set broadcast_rpc_address to a value other than {}", rpc_address, rpc_address);
|
||||
throw bad_configuration_error();
|
||||
}
|
||||
utils::fb_utilities::set_broadcast_rpc_address(rpc_address);
|
||||
}
|
||||
|
||||
using namespace locator;
|
||||
return i_endpoint_snitch::create_snitch(cfg->endpoint_snitch()).then([] {
|
||||
// #293 - do not stop anything
|
||||
@@ -236,8 +248,8 @@ int main(int ac, char** av) {
|
||||
});
|
||||
});
|
||||
});
|
||||
}).then([listen_address, storage_port, seed_provider, cluster_name] {
|
||||
return init_ms_fd_gossiper(listen_address, storage_port, seed_provider, cluster_name);
|
||||
}).then([listen_address, storage_port, seed_provider, cluster_name, phi] {
|
||||
return init_ms_fd_gossiper(listen_address, storage_port, seed_provider, cluster_name, phi);
|
||||
}).then([&db] {
|
||||
return streaming::stream_session::init_streaming_service(db);
|
||||
}).then([&proxy, &db] {
|
||||
|
||||
10
memtable.cc
10
memtable.cc
@@ -201,6 +201,16 @@ memtable::update(const db::replay_position& rp) {
|
||||
}
|
||||
}
|
||||
|
||||
future<>
|
||||
memtable::apply(const memtable& mt) {
|
||||
return do_with(mt.make_reader(), [this] (auto&& rd) mutable {
|
||||
return consume(rd, [self = this->shared_from_this(), &rd] (mutation&& m) {
|
||||
self->apply(m);
|
||||
return stop_iteration::no;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
void
|
||||
memtable::apply(const mutation& m, const db::replay_position& rp) {
|
||||
with_allocator(_region.allocator(), [this, &m] {
|
||||
|
||||
@@ -107,6 +107,7 @@ public:
|
||||
explicit memtable(schema_ptr schema, logalloc::region_group* dirty_memory_region_group = nullptr);
|
||||
~memtable();
|
||||
schema_ptr schema() const { return _schema; }
|
||||
future<> apply(const memtable&);
|
||||
void apply(const mutation& m, const db::replay_position& = db::replay_position());
|
||||
void apply(const frozen_mutation& m, const db::replay_position& = db::replay_position());
|
||||
const logalloc::region& region() const {
|
||||
|
||||
@@ -36,6 +36,8 @@
|
||||
|
||||
namespace net {
|
||||
|
||||
static logging::logger logger("messaging_service");
|
||||
|
||||
using inet_address = gms::inet_address;
|
||||
using gossip_digest_syn = gms::gossip_digest_syn;
|
||||
using gossip_digest_ack = gms::gossip_digest_ack;
|
||||
@@ -108,17 +110,11 @@ query::read_command net::serializer::read(Input& in, rpc::type<query::read_comma
|
||||
|
||||
template <typename Output>
|
||||
void net::serializer::write(Output& out, const query::result& v) const {
|
||||
// FIXME: allow const call to query::result::serialize()
|
||||
uint32_t sz = v.serialized_size();
|
||||
write(out, sz);
|
||||
bytes b(bytes::initialized_later(), sz);
|
||||
auto _out = b.begin();
|
||||
const_cast<query::result&>(v).serialize(_out);
|
||||
out.write(reinterpret_cast<const char*>(b.c_str()), sz);
|
||||
write_serializable(out, v);
|
||||
}
|
||||
template <typename Input>
|
||||
query::result net::serializer::read(Input& in, rpc::type<query::result>) const {
|
||||
return read_gms<query::result>(in);
|
||||
return read_serializable<query::result>(in);
|
||||
}
|
||||
|
||||
template <typename Output>
|
||||
@@ -351,7 +347,7 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
if (!c->error()) {
|
||||
return c;
|
||||
}
|
||||
remove_rpc_client(verb, id);
|
||||
remove_error_rpc_client(verb, id);
|
||||
}
|
||||
|
||||
auto remote_addr = ipv4_addr(get_preferred_ip(id.addr).raw_addr(), _port);
|
||||
@@ -361,9 +357,9 @@ shared_ptr<messaging_service::rpc_protocol_client_wrapper> messaging_service::ge
|
||||
return it->second.rpc_client;
|
||||
}
|
||||
|
||||
void messaging_service::remove_rpc_client_one(clients_map& clients, shard_id id) {
|
||||
void messaging_service::remove_rpc_client_one(clients_map& clients, shard_id id, bool dead_only) {
|
||||
auto it = clients.find(id);
|
||||
if (it != clients.end()) {
|
||||
if (it != clients.end() && (!dead_only || it->second.rpc_client->error())) {
|
||||
auto client = std::move(it->second.rpc_client);
|
||||
clients.erase(it);
|
||||
//
|
||||
@@ -372,17 +368,19 @@ void messaging_service::remove_rpc_client_one(clients_map& clients, shard_id id)
|
||||
// This will make sure messaging_service::stop() blocks until
|
||||
// client->stop() is over.
|
||||
//
|
||||
client->stop().finally([c = client, ms = shared_from_this()] {}).discard_result();
|
||||
client->stop().finally([id, client, ms = shared_from_this()] {
|
||||
logger.debug("dropped connection to {}", id.addr);
|
||||
}).discard_result();
|
||||
}
|
||||
}
|
||||
|
||||
void messaging_service::remove_rpc_client(messaging_verb verb, shard_id id) {
|
||||
remove_rpc_client_one(_clients[get_rpc_client_idx(verb)], id);
|
||||
void messaging_service::remove_error_rpc_client(messaging_verb verb, shard_id id) {
|
||||
remove_rpc_client_one(_clients[get_rpc_client_idx(verb)], id, true);
|
||||
}
|
||||
|
||||
void messaging_service::remove_rpc_client(shard_id id) {
|
||||
for (auto& c : _clients) {
|
||||
remove_rpc_client_one(c, id);
|
||||
remove_rpc_client_one(c, id, false);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -406,7 +404,7 @@ auto send_message(messaging_service* ms, messaging_verb verb, shard_id id, MsgOu
|
||||
return std::move(f);
|
||||
} catch (rpc::closed_error) {
|
||||
// This is a transport error
|
||||
ms->remove_rpc_client(verb, id);
|
||||
ms->remove_error_rpc_client(verb, id);
|
||||
throw;
|
||||
} catch (...) {
|
||||
// This is expected to be a rpc server error, e.g., the rpc handler throws a std::runtime_error.
|
||||
@@ -431,7 +429,7 @@ auto send_message_timeout(messaging_service* ms, messaging_verb verb, shard_id i
|
||||
return std::move(f);
|
||||
} catch (rpc::closed_error) {
|
||||
// This is a transport error
|
||||
ms->remove_rpc_client(verb, id);
|
||||
ms->remove_error_rpc_client(verb, id);
|
||||
throw;
|
||||
} catch (...) {
|
||||
// This is expected to be a rpc server error, e.g., the rpc handler throws a std::runtime_error.
|
||||
|
||||
@@ -550,8 +550,8 @@ public:
|
||||
public:
|
||||
// Return rpc::protocol::client for a shard which is a ip + cpuid pair.
|
||||
shared_ptr<rpc_protocol_client_wrapper> get_rpc_client(messaging_verb verb, shard_id id);
|
||||
void remove_rpc_client_one(clients_map& clients, shard_id id);
|
||||
void remove_rpc_client(messaging_verb verb, shard_id id);
|
||||
void remove_rpc_client_one(clients_map& clients, shard_id id, bool dead_only);
|
||||
void remove_error_rpc_client(messaging_verb verb, shard_id id);
|
||||
void remove_rpc_client(shard_id id);
|
||||
std::unique_ptr<rpc_protocol_wrapper>& rpc();
|
||||
};
|
||||
|
||||
@@ -73,6 +73,7 @@ public:
|
||||
std::experimental::optional<atomic_cell_or_collection> get_cell(const clustering_key& rkey, const column_definition& def) const;
|
||||
const partition_key& key() const { return _ptr->_dk._key; };
|
||||
const dht::decorated_key& decorated_key() const { return _ptr->_dk; };
|
||||
dht::ring_position ring_position() const { return { decorated_key() }; }
|
||||
const dht::token& token() const { return _ptr->_dk._token; }
|
||||
const schema_ptr& schema() const { return _ptr->_schema; }
|
||||
const mutation_partition& partition() const { return _ptr->_p; }
|
||||
|
||||
@@ -21,6 +21,7 @@
|
||||
*/
|
||||
|
||||
#include "mutation_partition_serializer.hh"
|
||||
#include "mutation_partition.hh"
|
||||
#include "db/serializer.hh"
|
||||
|
||||
//
|
||||
|
||||
@@ -25,6 +25,7 @@
|
||||
#include "db/serializer.hh"
|
||||
#include "utils/data_input.hh"
|
||||
#include "mutation_partition_serializer.hh"
|
||||
#include "mutation_partition.hh"
|
||||
|
||||
//
|
||||
// See mutation_partition_serializer.cc for representation layout.
|
||||
|
||||
78
noexcept_traits.hh
Normal file
78
noexcept_traits.hh
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright 2015 ScyllaDB
|
||||
*/
|
||||
|
||||
/*
|
||||
* This file is part of Scylla.
|
||||
*
|
||||
* Scylla is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Affero General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* Scylla is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <type_traits>
|
||||
#include <memory>
|
||||
#include <seastar/core/future.hh>
|
||||
|
||||
#pragma once
|
||||
|
||||
//
|
||||
// Utility for adapting types which are not nothrow move constructible into such
|
||||
// by wrapping them if necessary.
|
||||
//
|
||||
// Example usage:
|
||||
//
|
||||
// T val{};
|
||||
// using traits = noexcept_movable<T>;
|
||||
// auto f = make_ready_future<typename traits::type>(traits::wrap(std::move(val)));
|
||||
// T val2 = traits::unwrap(f.get0());
|
||||
//
|
||||
|
||||
template<typename T, typename Enable = void>
|
||||
struct noexcept_movable;
|
||||
|
||||
template<typename T>
|
||||
struct noexcept_movable<T, std::enable_if_t<std::is_nothrow_move_constructible<T>::value>> {
|
||||
using type = T;
|
||||
|
||||
static type wrap(T&& v) {
|
||||
return std::move(v);
|
||||
}
|
||||
|
||||
static future<T> wrap(future<T>&& v) {
|
||||
return std::move(v);
|
||||
}
|
||||
|
||||
static T unwrap(type&& v) {
|
||||
return std::move(v);
|
||||
}
|
||||
|
||||
static future<T> unwrap(future<type>&& v) {
|
||||
return std::move(v);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct noexcept_movable<T, std::enable_if_t<!std::is_nothrow_move_constructible<T>::value>> {
|
||||
using type = std::unique_ptr<T>;
|
||||
|
||||
static type wrap(T&& v) {
|
||||
return std::make_unique<T>(std::move(v));
|
||||
}
|
||||
|
||||
static T unwrap(type&& v) {
|
||||
return std::move(*v);
|
||||
}
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
using noexcept_movable_t = typename noexcept_movable<T>::type;
|
||||
@@ -25,6 +25,7 @@
|
||||
#include <cryptopp/md5.h>
|
||||
#include "bytes_ostream.hh"
|
||||
#include "query-request.hh"
|
||||
#include "db/serializer.hh"
|
||||
|
||||
namespace query {
|
||||
|
||||
@@ -139,17 +140,16 @@ public:
|
||||
return result_digest(std::move(b));
|
||||
}
|
||||
sstring pretty_print(schema_ptr, const query::partition_slice&) const;
|
||||
size_t serialized_size() const { return _w.size(); }
|
||||
void serialize(bytes::iterator& out) {
|
||||
auto v = _w.linearize();
|
||||
out = std::copy(v.begin(), v.end(), out);
|
||||
}
|
||||
static result deserialize(bytes_view& in) {
|
||||
bytes_ostream w;
|
||||
w.write(in);
|
||||
in.remove_prefix(in.size());
|
||||
return result(std::move(w));
|
||||
}
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
namespace db {
|
||||
|
||||
template<> serializer<query::result>::serializer(const query::result&);
|
||||
template<> void serializer<query::result>::write(output&, const query::result&);
|
||||
template<> query::result serializer<query::result>::read(input&);
|
||||
|
||||
extern template class serializer<query::result>;
|
||||
|
||||
}
|
||||
|
||||
32
query.cc
32
query.cc
@@ -19,6 +19,7 @@
|
||||
* along with Scylla. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <limits>
|
||||
#include "db/serializer.hh"
|
||||
#include "query-request.hh"
|
||||
#include "query-result.hh"
|
||||
@@ -270,3 +271,34 @@ result::pretty_print(schema_ptr s, const query::partition_slice& slice) const {
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
template class db::serializer<query::result>;
|
||||
|
||||
using query_result_size_type = uint32_t;
|
||||
|
||||
template<>
|
||||
db::serializer<query::result>::serializer(const query::result& v)
|
||||
: _item(v)
|
||||
, _size(sizeof(query_result_size_type) + v.buf().size())
|
||||
{
|
||||
static_assert(std::numeric_limits<bytes_ostream::size_type>::max() <=
|
||||
std::numeric_limits<query_result_size_type>::max(), "query_result_size_type too small");
|
||||
}
|
||||
|
||||
template<>
|
||||
void
|
||||
db::serializer<query::result>::write(output& out, const query::result& v) {
|
||||
const bytes_ostream& buf = v.buf();
|
||||
out.write<query_result_size_type>(buf.size());
|
||||
for (bytes_view frag : buf.fragments()) {
|
||||
out.write(frag.begin(), frag.end());
|
||||
}
|
||||
}
|
||||
|
||||
template<>
|
||||
query::result db::serializer<query::result>::read(input& in) {
|
||||
bytes_ostream buf;
|
||||
auto size = in.read<query_result_size_type>();
|
||||
buf.write(in.read_view(size));
|
||||
return query::result(std::move(buf));
|
||||
}
|
||||
|
||||
17
row_cache.cc
17
row_cache.cc
@@ -164,7 +164,7 @@ public:
|
||||
{ }
|
||||
|
||||
virtual future<mutation_opt> operator()() override {
|
||||
return _delegate().then([this] (mutation_opt&& mo) {
|
||||
return _delegate().then([this, op = _cache._populate_phaser.start()] (mutation_opt&& mo) {
|
||||
if (mo) {
|
||||
_cache.populate(*mo);
|
||||
}
|
||||
@@ -250,11 +250,13 @@ class scanning_and_populating_reader final : public mutation_reader::impl {
|
||||
mutation_opt _next_primary;
|
||||
mutation_source& _underlying;
|
||||
mutation_reader _secondary;
|
||||
utils::phased_barrier::phase_type _secondary_phase;
|
||||
const query::partition_range& _original_range;
|
||||
query::partition_range _range;
|
||||
key_source& _underlying_keys;
|
||||
key_reader _keys;
|
||||
dht::decorated_key_opt _next_key;
|
||||
dht::decorated_key_opt _last_secondary_key;
|
||||
public:
|
||||
scanning_and_populating_reader(row_cache& cache, const query::partition_range& range)
|
||||
: _cache(cache), _schema(cache._schema),
|
||||
@@ -293,6 +295,8 @@ public:
|
||||
end = _original_range.end();
|
||||
}
|
||||
_range = query::partition_range(query::partition_range::bound { std::move(*dk), true }, std::move(end));
|
||||
_last_secondary_key = {};
|
||||
_secondary_phase = _cache._populate_phaser.phase();
|
||||
_secondary = _underlying(_range);
|
||||
_secondary_only = true;
|
||||
return next_secondary();
|
||||
@@ -301,7 +305,14 @@ public:
|
||||
}
|
||||
private:
|
||||
future<mutation_opt> next_secondary() {
|
||||
return _secondary().then([this] (mutation_opt&& mo) {
|
||||
if (_secondary_phase != _cache._populate_phaser.phase()) {
|
||||
assert(_last_secondary_key);
|
||||
auto cmp = dht::ring_position_comparator(*_schema);
|
||||
_range = _range.split_after(*_last_secondary_key, cmp);
|
||||
_secondary_phase = _cache._populate_phaser.phase();
|
||||
_secondary = _underlying(_range);
|
||||
}
|
||||
return _secondary().then([this, op = _cache._populate_phaser.start()] (mutation_opt&& mo) {
|
||||
if (!mo && _next_primary) {
|
||||
auto cmp = dht::ring_position_comparator(*_schema);
|
||||
_range = _original_range.split_after(_next_primary->decorated_key(), cmp);
|
||||
@@ -312,6 +323,7 @@ private:
|
||||
}
|
||||
if (mo) {
|
||||
_cache.populate(*mo);
|
||||
_last_secondary_key = mo->decorated_key();
|
||||
}
|
||||
_cache.on_miss();
|
||||
return std::move(mo);
|
||||
@@ -397,6 +409,7 @@ future<> row_cache::update(memtable& m, partition_presence_checker presence_chec
|
||||
m.partitions.clear_and_dispose(current_deleter<partition_entry>());
|
||||
});
|
||||
});
|
||||
_populate_phaser.advance_and_await().get();
|
||||
while (!m.partitions.empty()) {
|
||||
with_allocator(_tracker.allocator(), [this, &m, &presence_checker] () {
|
||||
unsigned quota = 30;
|
||||
|
||||
14
row_cache.hh
14
row_cache.hh
@@ -31,6 +31,7 @@
|
||||
#include "mutation_partition.hh"
|
||||
#include "utils/logalloc.hh"
|
||||
#include "key_reader.hh"
|
||||
#include "utils/phased_barrier.hh"
|
||||
|
||||
namespace scollectd {
|
||||
|
||||
@@ -174,6 +175,17 @@ private:
|
||||
partitions_type _partitions; // Cached partitions are complete.
|
||||
mutation_source _underlying;
|
||||
key_source _underlying_keys;
|
||||
|
||||
// Synchronizes populating reads with update() to ensure that cache
|
||||
// remains consistent across flushes with the underlying data source.
|
||||
// Readers obtained from the underlying data source in earlier than
|
||||
// current phases must not be used to populate the cache, unless they hold
|
||||
// phaser::operation created in the reader's phase of origin. Readers
|
||||
// should hold to a phase only briefly because this inhibits progress of
|
||||
// update(). Phase changes occur only in update(), which can be assumed to
|
||||
// be asynchronous wrt invoking of the underlying data source.
|
||||
utils::phased_barrier _populate_phaser;
|
||||
|
||||
logalloc::allocating_section _update_section;
|
||||
logalloc::allocating_section _populate_section;
|
||||
logalloc::allocating_section _read_section;
|
||||
@@ -188,7 +200,7 @@ public:
|
||||
row_cache(const row_cache&) = delete;
|
||||
row_cache& operator=(row_cache&&) = default;
|
||||
public:
|
||||
mutation_reader make_reader(const query::partition_range&);
|
||||
mutation_reader make_reader(const query::partition_range& = query::full_partition_range);
|
||||
const stats& stats() const { return _stats; }
|
||||
public:
|
||||
// Populate cache from given mutation. The mutation must contain all
|
||||
|
||||
@@ -44,7 +44,7 @@ def cpus():
|
||||
return int(gdb.parse_and_eval('smp::count'))
|
||||
|
||||
def find_db(shard):
|
||||
return gdb.parse_and_eval('debug::db')['_instances']['_M_impl']['_M_start'][shard]
|
||||
return gdb.parse_and_eval('debug::db')['_instances']['_M_impl']['_M_start'][shard]['service']['_p']
|
||||
|
||||
def find_dbs():
|
||||
return [find_db(shard) for shard in range(cpus())]
|
||||
@@ -102,9 +102,15 @@ class scylla_memory(gdb.Command):
|
||||
gdb.Command.__init__(self, 'scylla memory', gdb.COMMAND_USER, gdb.COMPLETE_COMMAND)
|
||||
def invoke(self, arg, from_tty):
|
||||
cpu_mem = gdb.parse_and_eval('memory::cpu_mem')
|
||||
page_size = int(gdb.parse_and_eval('memory::page_size'))
|
||||
free_mem = int(cpu_mem['nr_free_pages']) * page_size
|
||||
total_mem = int(cpu_mem['nr_pages']) * page_size
|
||||
gdb.write('Used memory: {used_mem:>13}\nFree memory: {free_mem:>13}\nTotal memory: {total_mem:>12}\n\n'
|
||||
.format(used_mem=total_mem-free_mem, free_mem=free_mem, total_mem=total_mem))
|
||||
|
||||
gdb.write('Small pools:\n')
|
||||
small_pools = cpu_mem['small_pools']
|
||||
nr = small_pools['nr_small_pools']
|
||||
page_size = int(gdb.parse_and_eval('memory::page_size'))
|
||||
gdb.write('{objsize:>5} {span_size:>6} {use_count:>10} {memory:>12} {wasted_percent:>5}\n'
|
||||
.format(objsize='objsz', span_size='spansz', use_count='usedobj', memory='memory', wasted_percent='wst%'))
|
||||
for i in range(int(nr)):
|
||||
@@ -133,9 +139,46 @@ class scylla_memory(gdb.Command):
|
||||
front = int(span['link']['_next'])
|
||||
gdb.write('{index:5} {size:13} {total}\n'.format(index=index, size=(1<<index)*page_size, total=total*page_size))
|
||||
|
||||
class scylla_lsa(gdb.Command):
|
||||
def __init__(self):
|
||||
gdb.Command.__init__(self, 'scylla lsa', gdb.COMMAND_USER, gdb.COMPLETE_COMMAND)
|
||||
def invoke(self, arg, from_tty):
|
||||
lsa = gdb.parse_and_eval('logalloc::shard_segment_pool')
|
||||
segment_size = int(gdb.parse_and_eval('logalloc::segment::size'))
|
||||
|
||||
lsa_mem = int(lsa['_segments_in_use']) * segment_size
|
||||
non_lsa_mem = int(lsa['_non_lsa_memory_in_use'])
|
||||
total_mem = lsa_mem + non_lsa_mem
|
||||
gdb.write('Log Structured Allocator\n\nLSA memory in use: {lsa_mem:>16}\n'
|
||||
'Non-LSA memory in use: {non_lsa_mem:>12}\nTotal memory in use: {total_mem:>14}\n\n'
|
||||
.format(lsa_mem=lsa_mem, non_lsa_mem = non_lsa_mem, total_mem = total_mem))
|
||||
|
||||
er_goal = int(lsa['_current_emergency_reserve_goal'])
|
||||
er_max = int(lsa['_emergency_reserve_max'])
|
||||
er_current = int(lsa['_emergency_reserve']['_size'])
|
||||
gdb.write('Emergency reserve goal: {er_goal:>11}\nEmergency reserve max: {er_max:>12}\n'
|
||||
'Emergency reserve current: {er_current:>8}\n\n'
|
||||
.format(er_goal=er_goal, er_max=er_max, er_current=er_current))
|
||||
|
||||
lsa_tracker = gdb.parse_and_eval('logalloc::tracker_instance._impl')['_M_t']['_M_head_impl']
|
||||
regions = lsa_tracker['_regions']
|
||||
region = regions['_M_impl']['_M_start']
|
||||
gdb.write('LSA regions:\n')
|
||||
while region != regions['_M_impl']['_M_finish']:
|
||||
gdb.write(' Region #{r_id}\n - reclaimable: {r_en:>14}\n'
|
||||
' - evictable: {r_ev:16}\n - non-LSA memory: {r_non_lsa:>11}\n'
|
||||
' - closed LSA memory: {r_lsa:>8}\n - unused memory: {r_unused:>12}\n'
|
||||
.format(r_id=int(region['_id']), r_en=bool(region['_reclaiming_enabled']),
|
||||
r_ev=bool(region['_evictable']),
|
||||
r_non_lsa=int(region['_non_lsa_occupancy']['_total_space']),
|
||||
r_lsa=int(region['_closed_occupancy']['_total_space']),
|
||||
r_unused=int(region['_closed_occupancy']['_free_space'])))
|
||||
region = region + 1
|
||||
|
||||
|
||||
scylla()
|
||||
scylla_databases()
|
||||
scylla_keyspaces()
|
||||
scylla_column_families()
|
||||
scylla_memory()
|
||||
scylla_memory()
|
||||
scylla_lsa()
|
||||
|
||||
2
seastar
2
seastar
Submodule seastar updated: 95ddb8e243...8a76d06797
@@ -88,6 +88,29 @@ future<> migration_manager::schedule_schema_pull(const gms::inet_address& endpoi
|
||||
return make_ready_future<>();
|
||||
}
|
||||
|
||||
bool migration_manager::is_ready_for_bootstrap() {
|
||||
auto our_version = get_local_storage_proxy().get_db().local().get_version();
|
||||
bool match = false;
|
||||
for (auto& x : gms::get_local_gossiper().endpoint_state_map) {
|
||||
auto& endpoint = x.first;
|
||||
auto& eps = x.second;
|
||||
if (endpoint == utils::fb_utilities::get_broadcast_address() || !eps.is_alive()) {
|
||||
continue;
|
||||
}
|
||||
auto schema = eps.get_application_state(gms::application_state::SCHEMA);
|
||||
if (!schema) {
|
||||
return false;
|
||||
}
|
||||
utils::UUID remote_version{schema->value};
|
||||
if (our_version != remote_version) {
|
||||
return false;
|
||||
} else {
|
||||
match = true;
|
||||
}
|
||||
}
|
||||
return match;
|
||||
}
|
||||
|
||||
/**
|
||||
* If versions differ this node sends request with local migration list to the endpoint
|
||||
* and expecting to receive a list of migrations to apply locally.
|
||||
|
||||
@@ -113,6 +113,8 @@ public:
|
||||
static future<> passive_announce(utils::UUID version);
|
||||
|
||||
future<> stop();
|
||||
|
||||
bool is_ready_for_bootstrap();
|
||||
};
|
||||
|
||||
extern distributed<migration_manager> _the_migration_manager;
|
||||
|
||||
@@ -68,11 +68,16 @@ future<> pending_range_calculator_service::stop() {
|
||||
|
||||
future<> pending_range_calculator_service::update() {
|
||||
return smp::submit_to(0, [] {
|
||||
get_local_pending_range_calculator_service()._update_jobs++;
|
||||
get_local_pending_range_calculator_service().run();
|
||||
get_local_pending_range_calculator_service().do_update();
|
||||
});
|
||||
}
|
||||
|
||||
void pending_range_calculator_service::do_update() {
|
||||
assert(engine().cpu_id() == 0);
|
||||
get_local_pending_range_calculator_service()._update_jobs++;
|
||||
get_local_pending_range_calculator_service().run();
|
||||
}
|
||||
|
||||
future<> pending_range_calculator_service::block_until_finished() {
|
||||
// We want to be sure the job we're blocking for is actually finished and we can't trust the TPE's active job count
|
||||
return smp::submit_to(0, [] {
|
||||
|
||||
@@ -51,6 +51,7 @@ private:
|
||||
void run();
|
||||
public:
|
||||
pending_range_calculator_service(distributed<database>& db) : _db(db) {}
|
||||
void do_update();
|
||||
future<> update();
|
||||
future<> block_until_finished();
|
||||
future<> stop();
|
||||
|
||||
@@ -343,6 +343,19 @@ storage_proxy::storage_proxy(distributed<database>& db) : _db(db) {
|
||||
|
||||
storage_proxy::rh_entry::rh_entry(std::unique_ptr<abstract_write_response_handler>&& h, std::function<void()>&& cb) : handler(std::move(h)), expire_timer(std::move(cb)) {}
|
||||
|
||||
storage_proxy::unique_response_handler::unique_response_handler(storage_proxy& p_, response_id_type id_) : id(id_), p(p_) {}
|
||||
storage_proxy::unique_response_handler::unique_response_handler(unique_response_handler&& x) : id(x.id), p(x.p) { x.id = 0; };
|
||||
storage_proxy::unique_response_handler::~unique_response_handler() {
|
||||
if (id) {
|
||||
p.remove_response_handler(id);
|
||||
}
|
||||
}
|
||||
storage_proxy::response_id_type storage_proxy::unique_response_handler::release() {
|
||||
auto r = id;
|
||||
id = 0;
|
||||
return r;
|
||||
}
|
||||
|
||||
#if 0
|
||||
static
|
||||
{
|
||||
@@ -767,6 +780,12 @@ storage_proxy::create_write_response_handler(const mutation& m, db::consistency_
|
||||
std::vector<gms::inet_address> pending_endpoints =
|
||||
get_local_storage_service().get_token_metadata().pending_endpoints_for(m.token(), keyspace_name);
|
||||
|
||||
// filter out naturale_endpoints from pending_endpoint if later is not yet updated during node join
|
||||
auto itend = boost::range::remove_if(pending_endpoints, [&natural_endpoints] (gms::inet_address& p) {
|
||||
return boost::range::find(natural_endpoints, p) != natural_endpoints.end();
|
||||
});
|
||||
pending_endpoints.erase(itend, pending_endpoints.end());
|
||||
|
||||
auto all = boost::range::join(natural_endpoints, pending_endpoints);
|
||||
|
||||
if (std::find_if(all.begin(), all.end(), std::bind1st(std::mem_fn(&storage_proxy::cannot_hint), this)) != all.end()) {
|
||||
@@ -804,29 +823,27 @@ storage_proxy::hint_to_dead_endpoints(response_id_type id, db::consistency_level
|
||||
}
|
||||
|
||||
template<typename Range, typename CreateWriteHandler>
|
||||
future<std::vector<storage_proxy::response_id_type>> storage_proxy::mutate_prepare(const Range& mutations, db::consistency_level cl, db::write_type type, CreateWriteHandler create_handler) {
|
||||
std::vector<response_id_type> ids;
|
||||
|
||||
try {
|
||||
future<std::vector<storage_proxy::unique_response_handler>> storage_proxy::mutate_prepare(const Range& mutations, db::consistency_level cl, db::write_type type, CreateWriteHandler create_handler) {
|
||||
// apply is used to convert exceptions to exceptional future
|
||||
return futurize<std::vector<storage_proxy::unique_response_handler>>::apply([this] (const Range& mutations, db::consistency_level cl, db::write_type type, CreateWriteHandler create_handler) {
|
||||
std::vector<unique_response_handler> ids;
|
||||
ids.reserve(mutations.size());
|
||||
for (auto& m : mutations) {
|
||||
ids.emplace_back(create_handler(m, cl, type));
|
||||
ids.emplace_back(*this, create_handler(m, cl, type));
|
||||
}
|
||||
return make_ready_future<std::vector<response_id_type>>(std::move(ids));
|
||||
} catch(...) {
|
||||
boost::for_each(ids, std::bind(&storage_proxy::remove_response_handler, this, std::placeholders::_1));
|
||||
return make_exception_future<std::vector<response_id_type>>(std::current_exception());
|
||||
}
|
||||
return make_ready_future<std::vector<unique_response_handler>>(std::move(ids));
|
||||
}, mutations, cl, type, std::move(create_handler));
|
||||
}
|
||||
|
||||
future<std::vector<storage_proxy::response_id_type>> storage_proxy::mutate_prepare(std::vector<mutation>& mutations, db::consistency_level cl, db::write_type type) {
|
||||
future<std::vector<storage_proxy::unique_response_handler>> storage_proxy::mutate_prepare(std::vector<mutation>& mutations, db::consistency_level cl, db::write_type type) {
|
||||
return mutate_prepare<>(mutations, cl, type, [this] (const mutation& m, db::consistency_level cl, db::write_type type) {
|
||||
return create_write_response_handler(m, cl, type);
|
||||
});
|
||||
}
|
||||
|
||||
future<> storage_proxy::mutate_begin(std::vector<storage_proxy::response_id_type> ids, db::consistency_level cl) {
|
||||
return parallel_for_each(ids, [this, cl] (storage_proxy::response_id_type response_id) {
|
||||
future<> storage_proxy::mutate_begin(std::vector<unique_response_handler> ids, db::consistency_level cl) {
|
||||
return parallel_for_each(ids, [this, cl] (unique_response_handler& protected_response) {
|
||||
auto response_id = protected_response.id;
|
||||
// it is better to send first and hint afterwards to reduce latency
|
||||
// but request may complete before hint_to_dead_endpoints() is called and
|
||||
// response_id handler will be removed, so we will have to do hint with separate
|
||||
@@ -835,7 +852,7 @@ future<> storage_proxy::mutate_begin(std::vector<storage_proxy::response_id_type
|
||||
|
||||
// call before send_to_live_endpoints() for the same reason as above
|
||||
auto f = response_wait(response_id);
|
||||
send_to_live_endpoints(response_id);
|
||||
send_to_live_endpoints(protected_response.release()); // response is now running and it will either complete or timeout
|
||||
return std::move(f);
|
||||
});
|
||||
}
|
||||
@@ -846,7 +863,7 @@ future<> storage_proxy::mutate_end(future<> mutate_result, utils::latency_counte
|
||||
assert(mutate_result.available());
|
||||
_stats.write.mark(lc.stop().latency_in_nano());
|
||||
if (lc.is_start()) {
|
||||
_stats.estimated_write.add(lc.latency_in_nano(), _stats.write.count);
|
||||
_stats.estimated_write.add(lc.latency(), _stats.write.count);
|
||||
}
|
||||
try {
|
||||
mutate_result.get();
|
||||
@@ -885,7 +902,7 @@ storage_proxy::mutate(std::vector<mutation> mutations, db::consistency_level cl)
|
||||
utils::latency_counter lc;
|
||||
lc.start();
|
||||
|
||||
return mutate_prepare(mutations, cl, type).then([this, cl] (std::vector<storage_proxy::response_id_type> ids) {
|
||||
return mutate_prepare(mutations, cl, type).then([this, cl] (std::vector<storage_proxy::unique_response_handler> ids) {
|
||||
return mutate_begin(std::move(ids), cl);
|
||||
}).then_wrapped([p = shared_from_this(), lc] (future<> f) {
|
||||
return p->mutate_end(std::move(f), lc);
|
||||
@@ -959,7 +976,7 @@ storage_proxy::mutate_atomically(std::vector<mutation> mutations, db::consistenc
|
||||
return _p.mutate_prepare<>(std::array<mutation, 1>{std::move(m)}, cl, db::write_type::BATCH_LOG, [this] (const mutation& m, db::consistency_level cl, db::write_type type) {
|
||||
auto& ks = _p._db.local().find_keyspace(m.schema()->ks_name());
|
||||
return _p.create_write_response_handler(ks, cl, type, freeze(m), _batchlog_endpoints, {}, {});
|
||||
}).then([this, cl] (std::vector<response_id_type> ids) {
|
||||
}).then([this, cl] (std::vector<unique_response_handler> ids) {
|
||||
return _p.mutate_begin(std::move(ids), cl);
|
||||
});
|
||||
}
|
||||
@@ -979,16 +996,9 @@ storage_proxy::mutate_atomically(std::vector<mutation> mutations, db::consistenc
|
||||
};
|
||||
|
||||
future<> run() {
|
||||
return _p.mutate_prepare(_mutations, _cl, db::write_type::BATCH).then([this] (std::vector<response_id_type> ids) {
|
||||
return sync_write_to_batchlog().then_wrapped([this, ids = std::move(ids)] (future<> f) {
|
||||
try {
|
||||
f.get();
|
||||
return _p.mutate_begin(std::move(ids), _cl);
|
||||
} catch(...) {
|
||||
// writing batchlog failed, remove responce handlers that will not be used now
|
||||
boost::for_each(ids, std::bind(&storage_proxy::remove_response_handler, &_p, std::placeholders::_1));
|
||||
throw;
|
||||
}
|
||||
return _p.mutate_prepare(_mutations, _cl, db::write_type::BATCH).then([this] (std::vector<unique_response_handler> ids) {
|
||||
return sync_write_to_batchlog().then([this, ids = std::move(ids)] () mutable {
|
||||
return _p.mutate_begin(std::move(ids), _cl);
|
||||
}).then(std::bind(&context::async_remove_from_batchlog, this));
|
||||
});
|
||||
}
|
||||
@@ -1295,7 +1305,7 @@ future<> storage_proxy::schedule_repair(std::unordered_map<gms::inet_address, st
|
||||
return mutate_prepare<>(std::move(i.second), db::consistency_level::ONE, type, [ep = i.first, this] (const mutation& m, db::consistency_level cl, db::write_type type) {
|
||||
auto& ks = _db.local().find_keyspace(m.schema()->ks_name());
|
||||
return create_write_response_handler(ks, cl, type, freeze(m), std::unordered_set<gms::inet_address>({ep}, 1), {}, {});
|
||||
}).then([this] (std::vector<response_id_type> ids) {
|
||||
}).then([this] (std::vector<unique_response_handler> ids) {
|
||||
return mutate_begin(std::move(ids), db::consistency_level::ONE);
|
||||
}).then_wrapped([this, lc] (future<> f) {
|
||||
return mutate_end(std::move(f), lc);
|
||||
@@ -1523,7 +1533,7 @@ public:
|
||||
|
||||
// reconcile all versions
|
||||
boost::range::transform(boost::make_iterator_range(versions.begin(), versions.end()), std::back_inserter(reconciled_partitions), [this, schema] (std::vector<version>& v) {
|
||||
return boost::accumulate(v, mutation(v.front().par.mut().key(*schema), schema), [this, schema = std::move(schema)] (mutation& m, const version& ver) {
|
||||
return boost::accumulate(v, mutation(v.front().par.mut().key(*schema), schema), [this, schema] (mutation& m, const version& ver) {
|
||||
m.partition().apply(*schema, ver.par.mut().partition());
|
||||
return std::move(m);
|
||||
});
|
||||
@@ -2086,7 +2096,7 @@ storage_proxy::do_query(schema_ptr s,
|
||||
return query_singular(cmd, std::move(partition_ranges), cl).finally([lc, p] () mutable {
|
||||
p->_stats.read.mark(lc.stop().latency_in_nano());
|
||||
if (lc.is_start()) {
|
||||
p->_stats.estimated_read.add(lc.latency_in_nano(), p->_stats.read.count);
|
||||
p->_stats.estimated_read.add(lc.latency(), p->_stats.read.count);
|
||||
}
|
||||
});
|
||||
} catch (const no_such_column_family&) {
|
||||
@@ -2584,30 +2594,26 @@ void storage_proxy::init_messaging_service() {
|
||||
});
|
||||
ms.register_mutation([] (frozen_mutation in, std::vector<gms::inet_address> forward, gms::inet_address reply_to, unsigned shard, storage_proxy::response_id_type response_id) {
|
||||
do_with(std::move(in), get_local_shared_storage_proxy(), [forward = std::move(forward), reply_to, shard, response_id] (const frozen_mutation& m, shared_ptr<storage_proxy>& p) {
|
||||
return make_ready_future<>().then([&p, &m, reply_to, shard, response_id, forward = std::move(forward)] () mutable {
|
||||
return when_all(
|
||||
p->mutate_locally(m).then_wrapped([reply_to, shard, response_id] (future<> f) {
|
||||
try {
|
||||
f.get();
|
||||
auto& ms = net::get_local_messaging_service();
|
||||
ms.send_mutation_done(net::messaging_service::shard_id{reply_to, shard}, shard, response_id).then_wrapped([] (future<> f) {
|
||||
f.ignore_ready_future();
|
||||
});
|
||||
// return void, no need to wait for send to complete
|
||||
} catch (std::exception& e){
|
||||
logger.warn("MUTATION verb handler: {}", e.what());
|
||||
} catch(...) {
|
||||
logger.warn("MUTATION verb handler: unknown exception is thrown");
|
||||
}
|
||||
}),
|
||||
parallel_for_each(forward.begin(), forward.end(), [reply_to, shard, response_id, &m] (gms::inet_address forward) {
|
||||
auto& ms = net::get_local_messaging_service();
|
||||
return ms.send_mutation(net::messaging_service::shard_id{forward, 0}, m, {}, reply_to, shard, response_id).then_wrapped([] (future<> f) {
|
||||
f.ignore_ready_future();
|
||||
});
|
||||
})
|
||||
);
|
||||
});
|
||||
return when_all(
|
||||
// mutate_locally() may throw, putting it into apply() converts exception to a future.
|
||||
futurize<void>::apply([&p, &m] {
|
||||
return p->mutate_locally(m);
|
||||
}).then([reply_to, shard, response_id] {
|
||||
auto& ms = net::get_local_messaging_service();
|
||||
ms.send_mutation_done(net::messaging_service::shard_id{reply_to, shard}, shard, response_id).then_wrapped([] (future<> f) {
|
||||
f.ignore_ready_future();
|
||||
});
|
||||
// return void, no need to wait for send to complete
|
||||
}).handle_exception([] (std::exception_ptr eptr) {
|
||||
logger.warn("MUTATION verb handler: {}", eptr);
|
||||
}),
|
||||
parallel_for_each(forward.begin(), forward.end(), [reply_to, shard, response_id, &m] (gms::inet_address forward) {
|
||||
auto& ms = net::get_local_messaging_service();
|
||||
return ms.send_mutation(net::messaging_service::shard_id{forward, 0}, m, {}, reply_to, shard, response_id).then_wrapped([] (future<> f) {
|
||||
f.ignore_ready_future();
|
||||
});
|
||||
})
|
||||
);
|
||||
}).discard_result();
|
||||
|
||||
return net::messaging_service::no_wait();
|
||||
@@ -2642,8 +2648,7 @@ void storage_proxy::init_messaging_service() {
|
||||
});
|
||||
|
||||
ms.register_replication_finished([] (gms::inet_address from) {
|
||||
get_local_storage_service().confirm_replication(from);
|
||||
return make_ready_future<>();
|
||||
return get_local_storage_service().confirm_replication(from);
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
@@ -63,6 +63,18 @@ class storage_proxy : public seastar::async_sharded_service<storage_proxy> /*imp
|
||||
rh_entry(std::unique_ptr<abstract_write_response_handler>&& h, std::function<void()>&& cb);
|
||||
};
|
||||
|
||||
using response_id_type = uint64_t;
|
||||
struct unique_response_handler {
|
||||
response_id_type id;
|
||||
storage_proxy& p;
|
||||
unique_response_handler(storage_proxy& p_, response_id_type id_);
|
||||
unique_response_handler(const unique_response_handler&) = delete;
|
||||
unique_response_handler& operator=(const unique_response_handler&) = delete;
|
||||
unique_response_handler(unique_response_handler&& x);
|
||||
~unique_response_handler();
|
||||
response_id_type release();
|
||||
};
|
||||
|
||||
public:
|
||||
struct stats {
|
||||
uint64_t read_timeouts = 0;
|
||||
@@ -84,10 +96,9 @@ public:
|
||||
uint64_t reads = 0;
|
||||
uint64_t background_reads = 0; // client no longer waits for the read
|
||||
};
|
||||
using response_id_type = uint64_t;
|
||||
private:
|
||||
distributed<database>& _db;
|
||||
response_id_type _next_response_id = 0;
|
||||
response_id_type _next_response_id = 1; // 0 is reserved for unique_response_handler
|
||||
std::unordered_map<response_id_type, rh_entry> _response_handlers;
|
||||
constexpr static size_t _max_hints_in_progress = 128; // origin multiplies by FBUtilities.getAvailableProcessors() but we already sharded
|
||||
size_t _total_hints_in_progress = 0;
|
||||
@@ -136,9 +147,9 @@ private:
|
||||
std::vector<query::partition_range>&& partition_ranges,
|
||||
db::consistency_level cl);
|
||||
template<typename Range, typename CreateWriteHandler>
|
||||
future<std::vector<storage_proxy::response_id_type>> mutate_prepare(const Range& mutations, db::consistency_level cl, db::write_type type, CreateWriteHandler handler);
|
||||
future<std::vector<storage_proxy::response_id_type>> mutate_prepare(std::vector<mutation>& mutations, db::consistency_level cl, db::write_type type);
|
||||
future<> mutate_begin(const std::vector<storage_proxy::response_id_type> ids, db::consistency_level cl);
|
||||
future<std::vector<unique_response_handler>> mutate_prepare(const Range& mutations, db::consistency_level cl, db::write_type type, CreateWriteHandler handler);
|
||||
future<std::vector<unique_response_handler>> mutate_prepare(std::vector<mutation>& mutations, db::consistency_level cl, db::write_type type);
|
||||
future<> mutate_begin(std::vector<unique_response_handler> ids, db::consistency_level cl);
|
||||
future<> mutate_end(future<> mutate_result, utils::latency_counter);
|
||||
future<> schedule_repair(std::unordered_map<gms::inet_address, std::vector<mutation>> diffs);
|
||||
|
||||
@@ -197,8 +208,6 @@ public:
|
||||
std::vector<query::partition_range>&& partition_ranges,
|
||||
db::consistency_level cl);
|
||||
|
||||
future<foreign_ptr<lw_shared_ptr<query::result>>> query_local(lw_shared_ptr<query::read_command> cmd, std::vector<query::partition_range>&& partition_ranges);
|
||||
|
||||
future<foreign_ptr<lw_shared_ptr<reconcilable_result>>> query_mutations_locally(
|
||||
lw_shared_ptr<query::read_command> cmd, const query::partition_range&);
|
||||
|
||||
|
||||
@@ -85,11 +85,6 @@ static int get_generation_number() {
|
||||
return generation_number;
|
||||
}
|
||||
|
||||
bool is_replacing() {
|
||||
// FIXME: DatabaseDescriptor.isReplacing()
|
||||
return false;
|
||||
}
|
||||
|
||||
bool storage_service::is_auto_bootstrap() {
|
||||
return _db.local().get_config().auto_bootstrap();
|
||||
}
|
||||
@@ -100,34 +95,46 @@ std::set<inet_address> get_seeds() {
|
||||
return gossiper.get_seeds();
|
||||
}
|
||||
|
||||
std::set<inet_address> get_replace_tokens() {
|
||||
// FIXME: DatabaseDescriptor.getReplaceTokens()
|
||||
return {};
|
||||
std::unordered_set<token> get_replace_tokens() {
|
||||
std::unordered_set<token> ret;
|
||||
std::unordered_set<sstring> tokens;
|
||||
auto tokens_string = get_local_storage_service().db().local().get_config().replace_token();
|
||||
try {
|
||||
boost::split(tokens, tokens_string, boost::is_any_of(sstring(",")));
|
||||
} catch (...) {
|
||||
throw std::runtime_error(sprint("Unable to parse replace_token=%s", tokens_string));
|
||||
}
|
||||
tokens.erase("");
|
||||
for (auto token_string : tokens) {
|
||||
auto token = dht::global_partitioner().from_sstring(token_string);
|
||||
ret.insert(token);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
std::experimental::optional<UUID> get_replace_node() {
|
||||
// FIXME: DatabaseDescriptor.getReplaceNode()
|
||||
return {};
|
||||
}
|
||||
|
||||
std::experimental::optional<inet_address> get_replace_address() {
|
||||
// FIXME: DatabaseDescriptor.getReplaceAddress()
|
||||
return {};
|
||||
auto replace_node = get_local_storage_service().db().local().get_config().replace_node();
|
||||
if (replace_node.empty()) {
|
||||
return std::experimental::nullopt;
|
||||
}
|
||||
try {
|
||||
return utils::UUID(replace_node);
|
||||
} catch (...) {
|
||||
logger.error("Format of host-id = {} is incorrect {}", std::current_exception());
|
||||
throw;
|
||||
}
|
||||
}
|
||||
|
||||
bool get_property_join_ring() {
|
||||
// FIXME: Boolean.parseBoolean(System.getProperty("cassandra.join_ring", "true")))
|
||||
return true;
|
||||
return get_local_storage_service().db().local().get_config().join_ring();
|
||||
}
|
||||
|
||||
bool get_property_rangemovement() {
|
||||
// FIXME: Boolean.parseBoolean(System.getProperty("cassandra.consistent.rangemovement", "true")
|
||||
return true;
|
||||
return get_local_storage_service().db().local().get_config().consistent_rangemovement();
|
||||
}
|
||||
|
||||
bool get_property_load_ring_state() {
|
||||
// FIXME: Boolean.parseBoolean(System.getProperty("cassandra.load_ring_state", "true"))
|
||||
return true;
|
||||
return get_local_storage_service().db().local().get_config().load_ring_state();
|
||||
}
|
||||
|
||||
bool storage_service::should_bootstrap() {
|
||||
@@ -141,13 +148,13 @@ future<> storage_service::prepare_to_join() {
|
||||
|
||||
auto app_states = make_shared<std::map<gms::application_state, gms::versioned_value>>();
|
||||
auto f = make_ready_future<>();
|
||||
if (is_replacing() && !get_property_join_ring()) {
|
||||
if (db().local().is_replacing() && !get_property_join_ring()) {
|
||||
throw std::runtime_error("Cannot set both join_ring=false and attempt to replace a node");
|
||||
}
|
||||
if (get_replace_tokens().size() > 0 || get_replace_node()) {
|
||||
throw std::runtime_error("Replace method removed; use cassandra.replace_address instead");
|
||||
}
|
||||
if (is_replacing()) {
|
||||
if (db().local().is_replacing()) {
|
||||
if (db::system_keyspace::bootstrap_complete()) {
|
||||
throw std::runtime_error("Cannot replace address with a node that is already bootstrapped");
|
||||
}
|
||||
@@ -171,8 +178,7 @@ future<> storage_service::prepare_to_join() {
|
||||
return db::system_keyspace::get_local_host_id();
|
||||
}).then([this, app_states] (auto local_host_id) mutable {
|
||||
_token_metadata.update_host_id(local_host_id, this->get_broadcast_address());
|
||||
// FIXME: DatabaseDescriptor.getBroadcastRpcAddress()
|
||||
auto broadcast_rpc_address = this->get_broadcast_address();
|
||||
auto broadcast_rpc_address = utils::fb_utilities::get_broadcast_rpc_address();
|
||||
app_states->emplace(gms::application_state::NET_VERSION, value_factory.network_version());
|
||||
app_states->emplace(gms::application_state::HOST_ID, value_factory.host_id(local_host_id));
|
||||
app_states->emplace(gms::application_state::RPC_ADDRESS, value_factory.rpcaddress(broadcast_rpc_address));
|
||||
@@ -183,7 +189,7 @@ future<> storage_service::prepare_to_join() {
|
||||
gossiper.register_(this->shared_from_this());
|
||||
// FIXME: SystemKeyspace.incrementAndGetGeneration()
|
||||
print("Start gossiper service ...\n");
|
||||
return gossiper.start(get_generation_number(), *app_states).then([this] {
|
||||
return gossiper.start_gossiping(get_generation_number(), *app_states).then([this] {
|
||||
#if SS_DEBUG
|
||||
gms::get_local_gossiper().debug_show();
|
||||
_token_metadata.debug_show();
|
||||
@@ -244,38 +250,53 @@ void storage_service::join_token_ring(int delay) {
|
||||
}
|
||||
sleep(std::chrono::seconds(1)).get();
|
||||
}
|
||||
#if 0
|
||||
// if our schema hasn't matched yet, keep sleeping until it does
|
||||
// (post CASSANDRA-1391 we don't expect this to be necessary very often, but it doesn't hurt to be careful)
|
||||
while (!MigrationManager.isReadyForBootstrap())
|
||||
{
|
||||
while (!get_local_migration_manager().is_ready_for_bootstrap()) {
|
||||
set_mode(mode::JOINING, "waiting for schema information to complete", true);
|
||||
Uninterruptibles.sleepUninterruptibly(1, TimeUnit.SECONDS);
|
||||
sleep(std::chrono::seconds(1)).get();
|
||||
}
|
||||
#endif
|
||||
set_mode(mode::JOINING, "schema complete, ready to bootstrap", true);
|
||||
set_mode(mode::JOINING, "waiting for pending range calculation", true);
|
||||
get_local_pending_range_calculator_service().block_until_finished().get();
|
||||
set_mode(mode::JOINING, "calculation complete, ready to bootstrap", true);
|
||||
logger.debug("... got ring + schema info");
|
||||
#if 0
|
||||
if (Boolean.parseBoolean(System.getProperty("cassandra.consistent.rangemovement", "true")) &&
|
||||
(
|
||||
_token_metadata.getBootstrapTokens().valueSet().size() > 0 ||
|
||||
_token_metadata.getLeavingEndpoints().size() > 0 ||
|
||||
_token_metadata.getMovingEndpoints().size() > 0
|
||||
))
|
||||
throw new UnsupportedOperationException("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while cassandra.consistent.rangemovement is true");
|
||||
#endif
|
||||
|
||||
if (!is_replacing()) {
|
||||
auto t = gms::gossiper::clk::now();
|
||||
while (get_property_rangemovement() &&
|
||||
(!_token_metadata.get_bootstrap_tokens().empty() ||
|
||||
!_token_metadata.get_leaving_endpoints().empty() ||
|
||||
!_token_metadata.get_moving_endpoints().empty())) {
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(gms::gossiper::clk::now() - t).count();
|
||||
logger.info("Checking bootstrapping/leaving/moving nodes: tokens {}, leaving {}, moving {}, sleep 1 second and check again ({} seconds elpased)",
|
||||
_token_metadata.get_bootstrap_tokens().size(),
|
||||
_token_metadata.get_leaving_endpoints().size(),
|
||||
_token_metadata.get_moving_endpoints().size(),
|
||||
elapsed);
|
||||
|
||||
sleep(std::chrono::seconds(1)).get();
|
||||
|
||||
if (gms::gossiper::clk::now() > t + std::chrono::seconds(60)) {
|
||||
throw std::runtime_error("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while cassandra.consistent.rangemovement is true");
|
||||
}
|
||||
|
||||
// Check the schema and pending range again
|
||||
while (!get_local_migration_manager().is_ready_for_bootstrap()) {
|
||||
set_mode(mode::JOINING, "waiting for schema information to complete", true);
|
||||
sleep(std::chrono::seconds(1)).get();
|
||||
}
|
||||
get_local_pending_range_calculator_service().block_until_finished().get();
|
||||
}
|
||||
logger.info("Checking bootstrapping/leaving/moving nodes: ok");
|
||||
|
||||
if (!db().local().is_replacing()) {
|
||||
if (_token_metadata.is_member(get_broadcast_address())) {
|
||||
throw std::runtime_error("This node is already a member of the token ring; bootstrap aborted. (If replacing a dead node, remove the old one from the ring first.)");
|
||||
}
|
||||
set_mode(mode::JOINING, "getting bootstrap token", true);
|
||||
_bootstrap_tokens = boot_strapper::get_bootstrap_tokens(_token_metadata, _db.local());
|
||||
} else {
|
||||
auto replace_addr = get_replace_address();
|
||||
auto replace_addr = db().local().get_replace_address();
|
||||
if (replace_addr && *replace_addr != get_broadcast_address()) {
|
||||
// Sleep additionally to make sure that the server actually is not alive
|
||||
// and giving it more time to gossip if alive.
|
||||
@@ -376,7 +397,7 @@ future<> storage_service::join_ring() {
|
||||
|
||||
future<bool> storage_service::is_joined() {
|
||||
return run_with_read_api_lock([] (storage_service& ss) {
|
||||
return ss._joined;
|
||||
return ss._joined && !ss._is_survey_mode;
|
||||
});
|
||||
}
|
||||
|
||||
@@ -387,7 +408,7 @@ void storage_service::bootstrap(std::unordered_set<token> tokens) {
|
||||
// DON'T use set_token, that makes us part of the ring locally which is incorrect until we are done bootstrapping
|
||||
db::system_keyspace::update_tokens(tokens).get();
|
||||
auto& gossiper = gms::get_local_gossiper();
|
||||
if (!is_replacing()) {
|
||||
if (!db().local().is_replacing()) {
|
||||
// if not an existing token then bootstrap
|
||||
gossiper.add_local_application_state(gms::application_state::TOKENS, value_factory.tokens(tokens)).get();
|
||||
gossiper.add_local_application_state(gms::application_state::STATUS, value_factory.bootstrapping(tokens)).get();
|
||||
@@ -396,7 +417,7 @@ void storage_service::bootstrap(std::unordered_set<token> tokens) {
|
||||
} else {
|
||||
// Dont set any state for the node which is bootstrapping the existing token...
|
||||
_token_metadata.update_normal_tokens(tokens, get_broadcast_address());
|
||||
auto replace_addr = get_replace_address();
|
||||
auto replace_addr = db().local().get_replace_address();
|
||||
if (replace_addr) {
|
||||
db::system_keyspace::remove_endpoint(*replace_addr).get();
|
||||
}
|
||||
@@ -462,10 +483,10 @@ void storage_service::handle_state_normal(inet_address endpoint) {
|
||||
if (gossiper.uses_host_id(endpoint)) {
|
||||
auto host_id = gossiper.get_host_id(endpoint);
|
||||
auto existing = _token_metadata.get_endpoint_for_host_id(host_id);
|
||||
if (is_replacing() &&
|
||||
get_replace_address() &&
|
||||
gossiper.get_endpoint_state_for_endpoint(get_replace_address().value()) &&
|
||||
(host_id == gossiper.get_host_id(get_replace_address().value()))) {
|
||||
if (db().local().is_replacing() &&
|
||||
db().local().get_replace_address() &&
|
||||
gossiper.get_endpoint_state_for_endpoint(db().local().get_replace_address().value()) &&
|
||||
(host_id == gossiper.get_host_id(db().local().get_replace_address().value()))) {
|
||||
logger.warn("Not updating token metadata for {} because I am replacing it", endpoint);
|
||||
} else {
|
||||
if (existing && *existing != endpoint) {
|
||||
@@ -522,11 +543,17 @@ void storage_service::handle_state_normal(inet_address endpoint) {
|
||||
}
|
||||
|
||||
bool is_moving = _token_metadata.is_moving(endpoint); // capture because updateNormalTokens clears moving status
|
||||
|
||||
// Update pending ranges after update of normal tokens immediately to avoid
|
||||
// a race where natural endpoint was updated to contain node A, but A was
|
||||
// not yet removed from pending endpoints
|
||||
_token_metadata.update_normal_tokens(tokens_to_update_in_metadata, endpoint);
|
||||
get_local_pending_range_calculator_service().do_update();
|
||||
|
||||
for (auto ep : endpoints_to_remove) {
|
||||
remove_endpoint(ep);
|
||||
auto replace_addr = get_replace_address();
|
||||
if (is_replacing() && replace_addr && *replace_addr == ep) {
|
||||
auto replace_addr = db().local().get_replace_address();
|
||||
if (db().local().is_replacing() && replace_addr && *replace_addr == ep) {
|
||||
gossiper.replacement_quarantine(ep); // quarantine locally longer than normally; see CASSANDRA-8260
|
||||
}
|
||||
}
|
||||
@@ -545,7 +572,7 @@ void storage_service::handle_state_normal(inet_address endpoint) {
|
||||
db::system_keyspace::update_local_tokens(std::unordered_set<dht::token>(), local_tokens_to_remove).discard_result().get();
|
||||
}
|
||||
|
||||
if (is_moving) {
|
||||
if (is_moving || _operation_mode == mode::MOVING) {
|
||||
_token_metadata.remove_from_moving(endpoint);
|
||||
get_storage_service().invoke_on_all([endpoint] (auto&& ss) {
|
||||
for (auto&& subscriber : ss._lifecycle_subscribers) {
|
||||
@@ -702,7 +729,8 @@ void storage_service::on_change(inet_address endpoint, application_state state,
|
||||
sstring move_name = pieces[0];
|
||||
if (move_name == sstring(versioned_value::STATUS_BOOTSTRAPPING)) {
|
||||
handle_state_bootstrap(endpoint);
|
||||
} else if (move_name == sstring(versioned_value::STATUS_NORMAL)) {
|
||||
} else if (move_name == sstring(versioned_value::STATUS_NORMAL) ||
|
||||
move_name == sstring(versioned_value::SHUTDOWN)) {
|
||||
handle_state_normal(endpoint);
|
||||
} else if (move_name == sstring(versioned_value::REMOVING_TOKEN) ||
|
||||
move_name == sstring(versioned_value::REMOVED_TOKEN)) {
|
||||
@@ -721,11 +749,13 @@ void storage_service::on_change(inet_address endpoint, application_state state,
|
||||
logger.debug("Ignoring state change for dead or unknown endpoint: {}", endpoint);
|
||||
return;
|
||||
}
|
||||
do_update_system_peers_table(endpoint, state, value);
|
||||
if (state == application_state::SCHEMA) {
|
||||
get_local_migration_manager().schedule_schema_pull(endpoint, *ep_state).handle_exception([endpoint] (auto ep) {
|
||||
logger.warn("Fail to pull schmea from {}: {}", endpoint, ep);
|
||||
});
|
||||
if (get_token_metadata().is_member(endpoint)) {
|
||||
do_update_system_peers_table(endpoint, state, value);
|
||||
if (state == application_state::SCHEMA) {
|
||||
get_local_migration_manager().schedule_schema_pull(endpoint, *ep_state).handle_exception([endpoint] (auto ep) {
|
||||
logger.warn("Fail to pull schmea from {}: {}", endpoint, ep);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
replicate_to_all_cores().get();
|
||||
@@ -740,9 +770,7 @@ void storage_service::on_remove(gms::inet_address endpoint) {
|
||||
|
||||
void storage_service::on_dead(gms::inet_address endpoint, gms::endpoint_state state) {
|
||||
logger.debug("on_dead endpoint={}", endpoint);
|
||||
#if 0
|
||||
MessagingService.instance().convict(endpoint);
|
||||
#endif
|
||||
net::get_local_messaging_service().remove_rpc_client(net::shard_id{endpoint, 0});
|
||||
get_storage_service().invoke_on_all([endpoint] (auto&& ss) {
|
||||
for (auto&& subscriber : ss._lifecycle_subscribers) {
|
||||
subscriber->on_down(endpoint);
|
||||
@@ -845,11 +873,15 @@ void storage_service::set_tokens(std::unordered_set<token> tokens) {
|
||||
db::system_keyspace::update_tokens(tokens).get();
|
||||
_token_metadata.update_normal_tokens(tokens, get_broadcast_address());
|
||||
auto local_tokens = get_local_tokens();
|
||||
set_gossip_tokens(local_tokens);
|
||||
set_mode(mode::NORMAL, "node is now in normal status", true);
|
||||
replicate_to_all_cores().get();
|
||||
}
|
||||
|
||||
void storage_service::set_gossip_tokens(const std::unordered_set<dht::token>& local_tokens) {
|
||||
auto& gossiper = gms::get_local_gossiper();
|
||||
gossiper.add_local_application_state(gms::application_state::TOKENS, value_factory.tokens(local_tokens)).get();
|
||||
gossiper.add_local_application_state(gms::application_state::STATUS, value_factory.normal(local_tokens)).get();
|
||||
set_mode(mode::NORMAL, false);
|
||||
replicate_to_all_cores().get();
|
||||
}
|
||||
|
||||
void storage_service::register_subscriber(endpoint_lifecycle_subscriber* subscriber)
|
||||
@@ -1037,33 +1069,50 @@ future<> storage_service::check_for_endpoint_collision() {
|
||||
if (!MessagingService.instance().isListening())
|
||||
MessagingService.instance().listen(FBUtilities.getLocalAddress());
|
||||
#endif
|
||||
auto& gossiper = gms::get_local_gossiper();
|
||||
return gossiper.do_shadow_round().then([this, &gossiper] {
|
||||
auto addr = get_broadcast_address();
|
||||
auto eps = gossiper.get_endpoint_state_for_endpoint(addr);
|
||||
if (eps && !gossiper.is_dead_state(*eps) && !gossiper.is_gossip_only_member(addr)) {
|
||||
throw std::runtime_error(sprint("A node with address %s already exists, cancelling join. "
|
||||
"Use cassandra.replace_address if you want to replace this node.", addr));
|
||||
}
|
||||
if (dht::range_streamer::use_strict_consistency()) {
|
||||
for (auto& x : gossiper.get_endpoint_states()) {
|
||||
auto status = x.second.get_application_state(application_state::STATUS);
|
||||
if (!status) {
|
||||
continue;
|
||||
}
|
||||
return seastar::async([this] {
|
||||
auto& gossiper = gms::get_local_gossiper();
|
||||
auto t = gms::gossiper::clk::now();
|
||||
bool found_bootstrapping_node = false;
|
||||
do {
|
||||
gossiper.do_shadow_round().get();
|
||||
auto addr = get_broadcast_address();
|
||||
auto eps = gossiper.get_endpoint_state_for_endpoint(addr);
|
||||
if (eps && !gossiper.is_dead_state(*eps) && !gossiper.is_gossip_only_member(addr)) {
|
||||
throw std::runtime_error(sprint("A node with address %s already exists, cancelling join. "
|
||||
"Use cassandra.replace_address if you want to replace this node.", addr));
|
||||
}
|
||||
if (dht::range_streamer::use_strict_consistency()) {
|
||||
found_bootstrapping_node = false;
|
||||
for (auto& x : gossiper.get_endpoint_states()) {
|
||||
auto status = x.second.get_application_state(application_state::STATUS);
|
||||
if (!status) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<sstring> pieces;
|
||||
boost::split(pieces, status.value().value, boost::is_any_of(sstring(versioned_value::DELIMITER_STR)));
|
||||
assert(pieces.size() > 0);
|
||||
auto state = pieces[0];
|
||||
logger.debug("Check node={}, state={}", x.first, state);
|
||||
if (state == sstring(versioned_value::STATUS_BOOTSTRAPPING) ||
|
||||
state == sstring(versioned_value::STATUS_LEAVING) ||
|
||||
state == sstring(versioned_value::STATUS_MOVING)) {
|
||||
throw std::runtime_error("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while cassandra.consistent.rangemovement is true");
|
||||
std::vector<sstring> pieces;
|
||||
boost::split(pieces, status.value().value, boost::is_any_of(sstring(versioned_value::DELIMITER_STR)));
|
||||
assert(pieces.size() > 0);
|
||||
auto state = pieces[0];
|
||||
logger.debug("Checking node={}, status={} (check_for_endpoint_collision)", x.first, state);
|
||||
if (state == sstring(versioned_value::STATUS_BOOTSTRAPPING) ||
|
||||
state == sstring(versioned_value::STATUS_LEAVING) ||
|
||||
state == sstring(versioned_value::STATUS_MOVING)) {
|
||||
if (gms::gossiper::clk::now() > t + std::chrono::seconds(60)) {
|
||||
throw std::runtime_error("Other bootstrapping/leaving/moving nodes detected, cannot bootstrap while cassandra.consistent.rangemovement is true (check_for_endpoint_collision)");
|
||||
} else {
|
||||
gossiper.goto_shadow_round();
|
||||
gossiper.reset_endpoint_state_map();
|
||||
found_bootstrapping_node = true;
|
||||
auto elapsed = std::chrono::duration_cast<std::chrono::seconds>(gms::gossiper::clk::now() - t).count();
|
||||
logger.info("Checking node={}, status={}, sleep 1 second and check again ({} seconds elpased) (check_for_endpoint_collision)", x.first, state, elapsed);
|
||||
sleep(std::chrono::seconds(1)).get();
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} while (found_bootstrapping_node);
|
||||
logger.info("Checking bootstrapping/leaving/moving nodes: ok (check_for_endpoint_collision)");
|
||||
gossiper.reset_endpoint_state_map();
|
||||
});
|
||||
}
|
||||
@@ -1083,10 +1132,10 @@ void storage_service::remove_endpoint(inet_address endpoint) {
|
||||
}
|
||||
|
||||
future<std::unordered_set<token>> storage_service::prepare_replacement_info() {
|
||||
if (!get_replace_address()) {
|
||||
if (!db().local().get_replace_address()) {
|
||||
throw std::runtime_error(sprint("replace_address is empty"));
|
||||
}
|
||||
auto replace_address = get_replace_address().value();
|
||||
auto replace_address = db().local().get_replace_address().value();
|
||||
logger.info("Gathering node replacement information for {}", replace_address);
|
||||
|
||||
// if (!MessagingService.instance().isListening())
|
||||
@@ -1242,13 +1291,16 @@ future<bool> storage_service::is_gossip_running() {
|
||||
|
||||
future<> storage_service::start_gossiping() {
|
||||
return run_with_write_api_lock([] (storage_service& ss) {
|
||||
if (!ss._initialized) {
|
||||
logger.warn("Starting gossip by operator request");
|
||||
return gms::get_local_gossiper().start(get_generation_number()).then([&ss] {
|
||||
ss._initialized = true;
|
||||
});
|
||||
}
|
||||
return make_ready_future<>();
|
||||
return seastar::async([&ss] {
|
||||
if (!ss._initialized) {
|
||||
logger.warn("Starting gossip by operator request");
|
||||
ss.set_gossip_tokens(ss.get_local_tokens());
|
||||
gms::get_local_gossiper().force_newer_generation();
|
||||
gms::get_local_gossiper().start_gossiping(get_generation_number()).then([&ss] {
|
||||
ss._initialized = true;
|
||||
}).get();
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -1256,7 +1308,7 @@ future<> storage_service::stop_gossiping() {
|
||||
return run_with_write_api_lock([] (storage_service& ss) {
|
||||
if (ss._initialized) {
|
||||
logger.warn("Stopping gossip by operator request");
|
||||
return gms::get_local_gossiper().stop().then([&ss] {
|
||||
return gms::get_local_gossiper().stop_gossiping().then([&ss] {
|
||||
ss._initialized = false;
|
||||
});
|
||||
}
|
||||
@@ -1559,6 +1611,10 @@ future<> storage_service::decommission() {
|
||||
throw std::runtime_error("no other normal nodes in the ring; decommission would be pointless");
|
||||
}
|
||||
|
||||
if (ss._operation_mode != mode::NORMAL) {
|
||||
throw std::runtime_error(sprint("Node in %s state; wait for status to become normal or restart", ss._operation_mode));
|
||||
}
|
||||
|
||||
get_local_pending_range_calculator_service().block_until_finished().get();
|
||||
|
||||
auto non_system_keyspaces = db.get_non_system_keyspaces();
|
||||
@@ -1579,7 +1635,7 @@ future<> storage_service::decommission() {
|
||||
|
||||
// FIXME: proper shutdown
|
||||
ss.shutdown_client_servers().get();
|
||||
gms::get_local_gossiper().stop().get();
|
||||
gms::get_local_gossiper().stop_gossiping().get();
|
||||
// MessagingService.instance().shutdown();
|
||||
// StageManager.shutdownNow();
|
||||
ss.set_mode(mode::DECOMMISSIONED, true);
|
||||
@@ -1630,6 +1686,7 @@ future<> storage_service::remove_node(sstring host_id_string) {
|
||||
auto& ks = ss.db().local().find_keyspace(keyspace_name);
|
||||
// if the replication factor is 1 the data is lost so we shouldn't wait for confirmation
|
||||
if (ks.get_replication_strategy().get_replication_factor() == 1) {
|
||||
logger.warn("keyspace={} has replication factor 1, the data is probably lost", keyspace_name);
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -1653,7 +1710,7 @@ future<> storage_service::remove_node(sstring host_id_string) {
|
||||
|
||||
// the gossiper will handle spoofing this node's state to REMOVING_TOKEN for us
|
||||
// we add our own token so other nodes to let us know when they're done
|
||||
gossiper.advertise_removing(endpoint, host_id, local_host_id);
|
||||
gossiper.advertise_removing(endpoint, host_id, local_host_id).get();
|
||||
|
||||
// kick off streaming commands
|
||||
ss.restore_replica_count(endpoint, my_address).get();
|
||||
@@ -1667,7 +1724,7 @@ future<> storage_service::remove_node(sstring host_id_string) {
|
||||
ss.excise(std::move(tmp), endpoint);
|
||||
|
||||
// gossiper will indicate the token has left
|
||||
gossiper.advertise_token_removed(endpoint, host_id);
|
||||
gossiper.advertise_token_removed(endpoint, host_id).get();
|
||||
|
||||
ss._replicating_nodes.clear();
|
||||
ss._removing_node = {};
|
||||
@@ -2034,15 +2091,17 @@ future<> storage_service::send_replication_notification(inet_address remote) {
|
||||
);
|
||||
}
|
||||
|
||||
void storage_service::confirm_replication(inet_address node) {
|
||||
// replicatingNodes can be empty in the case where this node used to be a removal coordinator,
|
||||
// but restarted before all 'replication finished' messages arrived. In that case, we'll
|
||||
// still go ahead and acknowledge it.
|
||||
if (!_replicating_nodes.empty()) {
|
||||
_replicating_nodes.erase(node);
|
||||
} else {
|
||||
logger.info("Received unexpected REPLICATION_FINISHED message from {}. Was this node recently a removal coordinator?", node);
|
||||
}
|
||||
future<> storage_service::confirm_replication(inet_address node) {
|
||||
return run_with_no_api_lock([node] (storage_service& ss) {
|
||||
// replicatingNodes can be empty in the case where this node used to be a removal coordinator,
|
||||
// but restarted before all 'replication finished' messages arrived. In that case, we'll
|
||||
// still go ahead and acknowledge it.
|
||||
if (!ss._replicating_nodes.empty()) {
|
||||
ss._replicating_nodes.erase(node);
|
||||
} else {
|
||||
logger.info("Received unexpected REPLICATION_FINISHED message from {}. Was this node recently a removal coordinator?", node);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
// Runs inside seastar::async context
|
||||
|
||||
@@ -248,6 +248,7 @@ public:
|
||||
|
||||
/** This method updates the local token on disk */
|
||||
void set_tokens(std::unordered_set<token> tokens);
|
||||
void set_gossip_tokens(const std::unordered_set<dht::token>& local_tokens);
|
||||
#if 0
|
||||
|
||||
public void registerDaemon(CassandraDaemon daemon)
|
||||
@@ -768,7 +769,7 @@ private:
|
||||
*/
|
||||
std::unordered_multimap<inet_address, range<token>> get_new_source_ranges(const sstring& keyspaceName, const std::vector<range<token>>& ranges);
|
||||
public:
|
||||
void confirm_replication(inet_address node);
|
||||
future<> confirm_replication(inet_address node);
|
||||
|
||||
private:
|
||||
|
||||
|
||||
@@ -45,10 +45,13 @@
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <vector>
|
||||
#include <chrono>
|
||||
|
||||
namespace sstables {
|
||||
|
||||
struct estimated_histogram {
|
||||
using clock = std::chrono::steady_clock;
|
||||
using duration = clock::duration;
|
||||
/**
|
||||
* The series of values to which the counts in `buckets` correspond:
|
||||
* 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 14, 17, 20, etc.
|
||||
@@ -126,7 +129,7 @@ public:
|
||||
if (low == bucket_offsets.end()) {
|
||||
low--;
|
||||
}
|
||||
auto pos = low - bucket_offsets.begin();
|
||||
auto pos = std::distance(bucket_offsets.begin(), low);
|
||||
buckets.at(pos)++;
|
||||
_count++;
|
||||
}
|
||||
@@ -138,7 +141,8 @@ public:
|
||||
* to the new count
|
||||
* @param n
|
||||
*/
|
||||
void add(int64_t n, int64_t new_count) {
|
||||
void add_nano(int64_t n, int64_t new_count) {
|
||||
n /= 1000;
|
||||
if (new_count <= _count) {
|
||||
return;
|
||||
}
|
||||
@@ -146,11 +150,15 @@ public:
|
||||
if (low == bucket_offsets.end()) {
|
||||
low--;
|
||||
}
|
||||
auto pos = low - bucket_offsets.begin();
|
||||
auto pos = std::distance(bucket_offsets.begin(), low);
|
||||
buckets.at(pos)+= new_count - _count;
|
||||
_count = new_count;
|
||||
}
|
||||
|
||||
void add(duration latency, int64_t new_count) {
|
||||
add_nano(std::chrono::duration_cast<std::chrono::nanoseconds>(latency).count(), new_count);
|
||||
}
|
||||
|
||||
/**
|
||||
* @return the smallest value that could have been added to this histogram
|
||||
*/
|
||||
|
||||
@@ -202,11 +202,14 @@ public:
|
||||
auto current_first = current->get_first_decorated_key(s);
|
||||
|
||||
if (previous != nullptr && current_first.tri_compare(s, previous->get_last_decorated_key(s)) <= 0) {
|
||||
#if 0
|
||||
logger.warn(String.format("At level %d, %s [%s, %s] overlaps %s [%s, %s]. This could be caused by a bug in Cassandra 1.1.0 .. 1.1.3 or due to the fact that you have dropped sstables from another node into the data directory. " +
|
||||
"Sending back to L0. If you didn't drop in sstables, and have not yet run scrub, you should do so since you may also have rows out-of-order within an sstable",
|
||||
level, previous, previous.first, previous.last, current, current.first, current.last));
|
||||
#endif
|
||||
|
||||
logger.warn("At level {}, {} [{}, {}] overlaps {} [{}, {}]. This could be caused by a bug in Cassandra 1.1.0 .. 1.1.3 " \
|
||||
"or due to the fact that you have dropped sstables from another node into the data directory. " \
|
||||
"Sending back to L0. If you didn't drop in sstables, and have not yet run scrub, you should do so since you may also " \
|
||||
"have rows out-of-order within an sstable",
|
||||
level, previous->get_filename(), previous->get_first_partition_key(s), previous->get_last_partition_key(s),
|
||||
current->get_filename(), current->get_first_partition_key(s), current->get_last_partition_key(s));
|
||||
|
||||
out_of_order_sstables.push_back(current);
|
||||
} else {
|
||||
previous = &*current;
|
||||
@@ -605,9 +608,9 @@ public:
|
||||
std::vector<sstables::shared_sstable> get_candidates_for(int level) {
|
||||
const schema& s = *_schema;
|
||||
assert(!get_level(level).empty());
|
||||
#if 0
|
||||
logger.debug("Choosing candidates for L{}", level);
|
||||
|
||||
logger.debug("Choosing candidates for L{}", level);
|
||||
#if 0
|
||||
final Set<SSTableReader> compacting = cfs.getDataTracker().getCompacting();
|
||||
#endif
|
||||
if (level == 0) {
|
||||
|
||||
@@ -347,8 +347,13 @@ public:
|
||||
};
|
||||
|
||||
data_consume_context::~data_consume_context() = default;
|
||||
data_consume_context::data_consume_context(data_consume_context&&) = default;
|
||||
data_consume_context& data_consume_context::operator=(data_consume_context&&) = default;
|
||||
data_consume_context::data_consume_context(data_consume_context&& o) noexcept
|
||||
: _pimpl(std::move(o._pimpl))
|
||||
{ }
|
||||
data_consume_context& data_consume_context::operator=(data_consume_context&& o) noexcept {
|
||||
_pimpl = std::move(o._pimpl);
|
||||
return *this;
|
||||
}
|
||||
data_consume_context::data_consume_context(std::unique_ptr<impl> p) : _pimpl(std::move(p)) { }
|
||||
future<> data_consume_context::read() {
|
||||
return _pimpl->read();
|
||||
|
||||
@@ -39,6 +39,7 @@
|
||||
#include "index_reader.hh"
|
||||
#include "remove.hh"
|
||||
#include "memtable.hh"
|
||||
#include "range.hh"
|
||||
#include "downsampling.hh"
|
||||
#include <boost/filesystem/operations.hpp>
|
||||
#include <boost/algorithm/string.hpp>
|
||||
@@ -821,7 +822,7 @@ future<> sstable::read_simple(T& component) {
|
||||
auto file_path = filename(Type);
|
||||
sstlog.debug(("Reading " + _component_map[Type] + " file {} ").c_str(), file_path);
|
||||
return engine().open_file_dma(file_path, open_flags::ro).then([this, &component] (file f) {
|
||||
auto r = make_lw_shared<file_random_access_reader>(std::move(f), 4096);
|
||||
auto r = make_lw_shared<file_random_access_reader>(std::move(f), sstable_buffer_size);
|
||||
auto fut = parse(*r, component);
|
||||
return fut.finally([r = std::move(r)] {
|
||||
return r->close();
|
||||
@@ -1701,23 +1702,6 @@ remove_by_toc_name(sstring sstable_toc_name) {
|
||||
});
|
||||
}
|
||||
|
||||
static future<bool>
|
||||
file_exists(sstring filename) {
|
||||
return engine().open_file_dma(filename, open_flags::ro).then([] (file f) {
|
||||
return f.close().finally([f] {});
|
||||
}).then_wrapped([] (future<> f) {
|
||||
bool exists = true;
|
||||
try {
|
||||
f.get();
|
||||
} catch (std::system_error& e) {
|
||||
if (e.code() == std::error_code(ENOENT, std::system_category())) {
|
||||
exists = false;
|
||||
}
|
||||
}
|
||||
return make_ready_future<bool>(exists);
|
||||
});
|
||||
}
|
||||
|
||||
future<>
|
||||
sstable::remove_sstable_with_temp_toc(sstring ks, sstring cf, sstring dir, int64_t generation, version_types v, format_types f) {
|
||||
return seastar::async([ks, cf, dir, generation, v, f] {
|
||||
@@ -1759,4 +1743,21 @@ sstable::remove_sstable_with_temp_toc(sstring ks, sstring cf, sstring dir, int64
|
||||
});
|
||||
}
|
||||
|
||||
future<range<partition_key>>
|
||||
sstable::get_sstable_key_range(const schema& s, sstring ks, sstring cf, sstring dir, int64_t generation, version_types v, format_types f) {
|
||||
auto sst = std::make_unique<sstable>(ks, cf, dir, generation, v, f);
|
||||
auto fut = sst->read_summary();
|
||||
return std::move(fut).then([sst = std::move(sst), &s] () mutable {
|
||||
auto first = sst->get_first_partition_key(s);
|
||||
auto last = sst->get_last_partition_key(s);
|
||||
return make_ready_future<range<partition_key>>(range<partition_key>::make(first, last));
|
||||
});
|
||||
}
|
||||
|
||||
void sstable::mark_sstable_for_deletion(sstring ks, sstring cf, sstring dir, int64_t generation, version_types v, format_types f) {
|
||||
auto sst = sstable(ks, cf, dir, generation, v, f);
|
||||
sstlog.info("sstable {} not relevant for this shard, ignoring", sst.get_filename());
|
||||
sst.mark_for_deletion();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -76,8 +76,8 @@ public:
|
||||
// Define (as defaults) the destructor and move operations in the source
|
||||
// file, so here we don't need to know the incomplete impl type.
|
||||
~data_consume_context();
|
||||
data_consume_context(data_consume_context&&);
|
||||
data_consume_context& operator=(data_consume_context&&);
|
||||
data_consume_context(data_consume_context&&) noexcept;
|
||||
data_consume_context& operator=(data_consume_context&&) noexcept;
|
||||
};
|
||||
|
||||
// mutation_reader is an object returned by sstable::read_rows() et al. which
|
||||
@@ -507,6 +507,15 @@ public:
|
||||
|
||||
future<> mutate_sstable_level(uint32_t);
|
||||
|
||||
// Return sstable key range as range<partition_key> reading only the summary component.
|
||||
static future<range<partition_key>>
|
||||
get_sstable_key_range(const schema& s, sstring ks, sstring cf, sstring dir, int64_t generation, version_types v, format_types f);
|
||||
|
||||
// Used to mark a sstable for deletion that is not relevant to the current shard.
|
||||
// It doesn't mean that the sstable will be deleted, but that the sstable is not
|
||||
// relevant to the current shard, thus can be deleted by the deletion manager.
|
||||
static void mark_sstable_for_deletion(sstring ks, sstring cf, sstring dir, int64_t generation, version_types v, format_types f);
|
||||
|
||||
// Allow the test cases from sstable_test.cc to test private methods. We use
|
||||
// a placeholder to avoid cluttering this class too much. The sstable_test class
|
||||
// will then re-export as public every method it needs.
|
||||
|
||||
@@ -93,14 +93,22 @@ void stream_transfer_task::start() {
|
||||
return stop_iteration::yes;
|
||||
}
|
||||
sslog.debug("[Stream #{}] SEND STREAM_MUTATION to {}, cf_id={}", plan_id, id, cf_id);
|
||||
session->ms().send_stream_mutation(id, session->plan_id(), *fm, session->dst_cpu_id).then_wrapped([&msg, this, plan_id, id, fm] (auto&& f) {
|
||||
session->ms().send_stream_mutation(id, session->plan_id(), *fm, session->dst_cpu_id).then_wrapped([&msg, this, cf_id, plan_id, id, fm] (auto&& f) {
|
||||
try {
|
||||
f.get();
|
||||
sslog.debug("[Stream #{}] GOT STREAM_MUTATION Reply", plan_id);
|
||||
msg.mutations_done.signal();
|
||||
} catch (...) {
|
||||
sslog.error("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION to {}: {}", plan_id, id, std::current_exception());
|
||||
msg.mutations_done.broken();
|
||||
} catch (std::exception& e) {
|
||||
auto err = std::string(e.what());
|
||||
// Seastar RPC does not provide exception type info, so we can not catch no_such_column_family here
|
||||
// Need to compare the exception error msg
|
||||
if (err.find("Can't find a column family with UUID") != std::string::npos) {
|
||||
sslog.info("[Stream #{}] remote node {} does not have the cf_id = {}", plan_id, id, cf_id);
|
||||
msg.mutations_done.signal();
|
||||
} else {
|
||||
sslog.error("[Stream #{}] stream_transfer_task: Fail to send STREAM_MUTATION to {}: {}", plan_id, id, err);
|
||||
msg.mutations_done.broken();
|
||||
}
|
||||
}
|
||||
}).finally([] {
|
||||
get_local_stream_manager().mutation_send_limiter().signal();
|
||||
|
||||
@@ -42,18 +42,24 @@
|
||||
|
||||
using namespace db;
|
||||
|
||||
typedef std::pair<tmpdir, commitlog> tmplog;
|
||||
typedef lw_shared_ptr<tmplog> tmplog_ptr;
|
||||
|
||||
// create tmp dir + commit log
|
||||
static future<tmplog_ptr> make_commitlog(commitlog::config cfg =
|
||||
commitlog::config()) {
|
||||
template<typename Func>
|
||||
static future<> cl_test(commitlog::config cfg, Func && f) {
|
||||
tmpdir tmp;
|
||||
cfg.commit_log_location = tmp.path;
|
||||
return commitlog::create_commitlog(cfg).then(
|
||||
[tmp = std::move(tmp)](commitlog log) mutable {
|
||||
return make_ready_future<tmplog_ptr>(make_lw_shared<tmplog>(std::move(tmp), std::move(log)));
|
||||
return commitlog::create_commitlog(cfg).then([f = std::forward<Func>(f)](commitlog log) mutable {
|
||||
return do_with(std::move(log), [f = std::forward<Func>(f)](commitlog& log) {
|
||||
return futurize<std::result_of_t<Func(commitlog&)>>::apply(f, log).finally([&log] {
|
||||
return log.clear();
|
||||
});
|
||||
});
|
||||
}).finally([tmp = std::move(tmp)] {
|
||||
});
|
||||
}
|
||||
|
||||
template<typename Func>
|
||||
static future<> cl_test(Func && f) {
|
||||
commitlog::config cfg;
|
||||
return cl_test(cfg, std::forward<Func>(f));
|
||||
}
|
||||
|
||||
#if 0
|
||||
@@ -63,102 +69,14 @@ static int loggo = [] {
|
||||
}();
|
||||
#endif
|
||||
|
||||
class file_lister {
|
||||
file _f;
|
||||
subscription<directory_entry> _listing;
|
||||
public:
|
||||
file_lister(file f)
|
||||
: _f(std::move(f)), _listing(
|
||||
_f.list_directory(
|
||||
[this] (directory_entry de) {return report(de);})) {
|
||||
}
|
||||
future<> done() {
|
||||
return _listing.done();
|
||||
}
|
||||
const std::vector<directory_entry> & contents() const {
|
||||
return _contents;
|
||||
}
|
||||
private:
|
||||
std::vector<directory_entry> _contents;
|
||||
|
||||
future<> report(directory_entry de) {
|
||||
_contents.emplace_back(de);
|
||||
return make_ready_future<>();
|
||||
}
|
||||
};
|
||||
|
||||
static future<lw_shared_ptr<file_lister>> list_files(sstring path) {
|
||||
return engine().open_directory(path).then([](auto dir) {
|
||||
auto l = make_lw_shared<file_lister>(std::move(dir));
|
||||
return l->done().then([l]() {
|
||||
return make_ready_future<lw_shared_ptr<file_lister>>(l);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
future<std::experimental::optional<directory_entry_type>> entry_type(const sstring & path, const directory_entry & de) {
|
||||
if (!de.type && !de.name.empty()) {
|
||||
return engine().file_type(path + "/" + de.name);
|
||||
}
|
||||
return make_ready_future<std::experimental::optional<directory_entry_type>>(de.type);
|
||||
};
|
||||
|
||||
static future<size_t> count_files(sstring path) {
|
||||
return list_files(path).then([path](auto l) {
|
||||
auto n = make_lw_shared<size_t>(0);
|
||||
return parallel_for_each(l->contents(), [n, path](auto de) {
|
||||
return entry_type(path, de).then([n](auto type) {
|
||||
if (type == directory_entry_type::regular) {
|
||||
++(*n);
|
||||
}
|
||||
});
|
||||
}).then([n] {
|
||||
return make_ready_future<size_t>(*n);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
static future<size_t> count_files_with_size(sstring path) {
|
||||
return list_files(path).then([path](auto l) {
|
||||
auto n = make_lw_shared<size_t>(0);
|
||||
return parallel_for_each(l->contents().begin(), l->contents().end(), [n, path](directory_entry de) {
|
||||
return entry_type(path, de).then([n, path, de](auto type) {
|
||||
if (type == directory_entry_type::regular) {
|
||||
return engine().open_file_dma(path + "/" + de.name, open_flags::ro).then([n](file f) {
|
||||
return do_with(std::move(f), [n] (auto& f) {
|
||||
return f.stat().then([n](struct stat s) {
|
||||
if (s.st_size > 0) {
|
||||
++(*n);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
return make_ready_future();
|
||||
});
|
||||
}).then([n]() {
|
||||
return make_ready_future<size_t>(*n);;
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
namespace db {
|
||||
template<typename... Args>
|
||||
inline std::basic_ostream<Args...> & operator<<(std::basic_ostream<Args...> & os, const db::replay_position & rp) {
|
||||
return os << "[" << rp.id << ", " << rp.pos << "]" << std::endl;
|
||||
|
||||
}
|
||||
}
|
||||
// just write in-memory...
|
||||
SEASTAR_TEST_CASE(test_create_commitlog){
|
||||
return make_commitlog().then([](tmplog_ptr log) {
|
||||
return cl_test([](commitlog& log) {
|
||||
sstring tmp = "hej bubba cow";
|
||||
return log->second.add_mutation(utils::UUID_gen::get_time_UUID(), tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
return log.add_mutation(utils::UUID_gen::get_time_UUID(), tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
dst.write(tmp.begin(), tmp.end());
|
||||
}).then([](db::replay_position rp) {
|
||||
BOOST_CHECK_NE(rp, db::replay_position());
|
||||
}).finally([log]() {
|
||||
return log->second.clear().then([log] {});
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -167,39 +85,33 @@ SEASTAR_TEST_CASE(test_create_commitlog){
|
||||
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_batch){
|
||||
commitlog::config cfg;
|
||||
cfg.mode = commitlog::sync_mode::BATCH;
|
||||
return make_commitlog(cfg).then([](tmplog_ptr log) {
|
||||
return cl_test(cfg, [](commitlog& log) {
|
||||
sstring tmp = "hej bubba cow";
|
||||
return log->second.add_mutation(utils::UUID_gen::get_time_UUID(), tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
return log.add_mutation(utils::UUID_gen::get_time_UUID(), tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
dst.write(tmp.begin(), tmp.end());
|
||||
}).then([log](replay_position rp) {
|
||||
}).then([&log](replay_position rp) {
|
||||
BOOST_CHECK_NE(rp, db::replay_position());
|
||||
return count_files_with_size(log->first.path).then([log](size_t n) {
|
||||
BOOST_REQUIRE(n > 0);
|
||||
});
|
||||
}).finally([log]() {
|
||||
return log->second.clear().then([log] {});
|
||||
auto n = log.get_flush_count();
|
||||
BOOST_REQUIRE(n > 0);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_commitlog_written_to_disk_periodic){
|
||||
return make_commitlog().then([](tmplog_ptr log) {
|
||||
return cl_test([](commitlog& log) {
|
||||
auto state = make_lw_shared(false);
|
||||
auto uuid = utils::UUID_gen::get_time_UUID();
|
||||
return do_until([state]() {return *state;},
|
||||
[log, state, uuid]() {
|
||||
[&log, state, uuid]() {
|
||||
sstring tmp = "hej bubba cow";
|
||||
return log->second.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
return log.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
dst.write(tmp.begin(), tmp.end());
|
||||
}).then([log, state](replay_position rp) {
|
||||
}).then([&log, state](replay_position rp) {
|
||||
BOOST_CHECK_NE(rp, db::replay_position());
|
||||
return count_files_with_size(log->first.path).then([state](size_t n) {
|
||||
*state = n > 0;
|
||||
});
|
||||
auto n = log.get_flush_count();
|
||||
*state = n > 0;
|
||||
});
|
||||
|
||||
}).finally([log]() {
|
||||
return log->second.clear().then([log] {});
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -207,34 +119,39 @@ SEASTAR_TEST_CASE(test_commitlog_written_to_disk_periodic){
|
||||
SEASTAR_TEST_CASE(test_commitlog_new_segment){
|
||||
commitlog::config cfg;
|
||||
cfg.commitlog_segment_size_in_mb = 1;
|
||||
return make_commitlog(cfg).then([](tmplog_ptr log) {
|
||||
return do_with(std::unordered_set<db::segment_id_type>(), [log](auto& set) {
|
||||
return cl_test(cfg, [](commitlog& log) {
|
||||
return do_with(std::unordered_set<db::segment_id_type>(), [&log](auto& set) {
|
||||
auto uuid = utils::UUID_gen::get_time_UUID();
|
||||
return do_until([&set]() { return set.size() > 1; }, [log, &set, uuid]() {
|
||||
return do_until([&set]() { return set.size() > 1; }, [&log, &set, uuid]() {
|
||||
sstring tmp = "hej bubba cow";
|
||||
return log->second.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
return log.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
dst.write(tmp.begin(), tmp.end());
|
||||
}).then([log, &set](replay_position rp) {
|
||||
}).then([&set](replay_position rp) {
|
||||
BOOST_CHECK_NE(rp, db::replay_position());
|
||||
set.insert(rp.id);
|
||||
});
|
||||
});
|
||||
}).then([log] {
|
||||
return count_files(log->first.path).then([](size_t n) {
|
||||
BOOST_REQUIRE(n > 1);
|
||||
});
|
||||
}).finally([log] {
|
||||
return log->second.clear().then([log] {});
|
||||
}).then([&log] {
|
||||
auto n = log.get_active_segment_names().size();
|
||||
BOOST_REQUIRE(n > 1);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
typedef std::vector<sstring> segment_names;
|
||||
|
||||
static segment_names segment_diff(commitlog& log, segment_names prev = {}) {
|
||||
segment_names now = log.get_active_segment_names();
|
||||
segment_names diff;
|
||||
std::set_difference(prev.begin(), prev.end(), now.begin(), now.end(), std::back_inserter(diff));
|
||||
return diff;
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_commitlog_discard_completed_segments){
|
||||
//logging::logger_registry().set_logger_level("commitlog", logging::log_level::trace);
|
||||
commitlog::config cfg;
|
||||
cfg.commitlog_segment_size_in_mb = 1;
|
||||
return make_commitlog(cfg).then([](tmplog_ptr log) {
|
||||
return cl_test(cfg, [](commitlog& log) {
|
||||
struct state_type {
|
||||
std::vector<utils::UUID> uuids;
|
||||
std::unordered_map<utils::UUID, replay_position> rps;
|
||||
@@ -254,57 +171,54 @@ SEASTAR_TEST_CASE(test_commitlog_discard_completed_segments){
|
||||
|
||||
auto state = make_lw_shared<state_type>();
|
||||
return do_until([state]() { return state->ids.size() > 1; },
|
||||
[log, state]() {
|
||||
[&log, state]() {
|
||||
sstring tmp = "hej bubba cow";
|
||||
auto uuid = state->next_uuid();
|
||||
return log->second.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
return log.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
dst.write(tmp.begin(), tmp.end());
|
||||
}).then([log, state, uuid](replay_position pos) {
|
||||
}).then([state, uuid](replay_position pos) {
|
||||
state->ids.insert(pos.id);
|
||||
state->rps[uuid] = pos;
|
||||
});
|
||||
}).then([log, state]() {
|
||||
return count_files(log->first.path).then([log, state](size_t n) {
|
||||
BOOST_REQUIRE(n > 1);
|
||||
// sync all so we have no outstanding async sync ops that
|
||||
// might prevent discard_completed_segments to actually dispose
|
||||
// of clean segments (shared_ptr in task)
|
||||
return log->second.sync_all_segments().then([log, state, n] {
|
||||
for (auto & p : state->rps) {
|
||||
log->second.discard_completed_segments(p.first, p.second);
|
||||
}
|
||||
size_t nn = log->second.get_num_segments_destroyed();
|
||||
BOOST_REQUIRE(nn > 0);
|
||||
BOOST_REQUIRE(nn <= n);
|
||||
});
|
||||
});
|
||||
}).finally([log]() {
|
||||
return log->second.clear().then([log] {});
|
||||
}).then([&log, state]() {
|
||||
auto names = log.get_active_segment_names();
|
||||
BOOST_REQUIRE(names.size() > 1);
|
||||
// sync all so we have no outstanding async sync ops that
|
||||
// might prevent discard_completed_segments to actually dispose
|
||||
// of clean segments (shared_ptr in task)
|
||||
return log.sync_all_segments().then([&log, state, names] {
|
||||
for (auto & p : state->rps) {
|
||||
log.discard_completed_segments(p.first, p.second);
|
||||
}
|
||||
auto diff = segment_diff(log, names);
|
||||
auto nn = diff.size();
|
||||
auto dn = log.get_num_segments_destroyed();
|
||||
|
||||
BOOST_REQUIRE(nn > 0);
|
||||
BOOST_REQUIRE(nn <= names.size());
|
||||
BOOST_REQUIRE(dn <= nn);
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_equal_record_limit){
|
||||
return make_commitlog().then([](tmplog_ptr log) {
|
||||
auto size = log->second.max_record_size();
|
||||
return log->second.add_mutation(utils::UUID_gen::get_time_UUID(), size, [size](db::commitlog::output& dst) {
|
||||
return cl_test([](commitlog& log) {
|
||||
auto size = log.max_record_size();
|
||||
return log.add_mutation(utils::UUID_gen::get_time_UUID(), size, [size](db::commitlog::output& dst) {
|
||||
dst.write(char(1), size);
|
||||
}).then([](db::replay_position rp) {
|
||||
BOOST_CHECK_NE(rp, db::replay_position());
|
||||
}).finally([log]() {
|
||||
return log->second.clear().then([log] {});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_exceed_record_limit){
|
||||
return make_commitlog().then([](tmplog_ptr log) {
|
||||
auto size = log->second.max_record_size() + 1;
|
||||
return log->second.add_mutation(utils::UUID_gen::get_time_UUID(), size, [size](db::commitlog::output& dst) {
|
||||
return cl_test([](commitlog& log) {
|
||||
auto size = log.max_record_size() + 1;
|
||||
return log.add_mutation(utils::UUID_gen::get_time_UUID(), size, [size](db::commitlog::output& dst) {
|
||||
dst.write(char(1), size);
|
||||
}).then([](db::replay_position rp) {
|
||||
// should not reach.
|
||||
}).then_wrapped([](future<> f) {
|
||||
}).then_wrapped([](future<db::replay_position> f) {
|
||||
try {
|
||||
f.get();
|
||||
} catch (...) {
|
||||
@@ -312,59 +226,65 @@ SEASTAR_TEST_CASE(test_exceed_record_limit){
|
||||
return make_ready_future();
|
||||
}
|
||||
throw std::runtime_error("Did not get expected exception from writing too large record");
|
||||
}).finally([log]() {
|
||||
return log->second.clear().then([log] {});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_commitlog_delete_when_over_disk_limit){
|
||||
SEASTAR_TEST_CASE(test_commitlog_delete_when_over_disk_limit) {
|
||||
commitlog::config cfg;
|
||||
cfg.commitlog_segment_size_in_mb = 2;
|
||||
cfg.commitlog_total_space_in_mb = 1;
|
||||
return make_commitlog(cfg).then([](tmplog_ptr log) {
|
||||
cfg.commitlog_sync_period_in_ms = 1;
|
||||
return cl_test(cfg, [](commitlog& log) {
|
||||
auto sem = make_lw_shared<semaphore>(0);
|
||||
auto segments = make_lw_shared<segment_names>();
|
||||
|
||||
// add a flush handler that simply says we're done with the range.
|
||||
auto r = log->second.add_flush_handler([log, sem](cf_id_type id, replay_position pos) {
|
||||
log->second.discard_completed_segments(id, pos);
|
||||
auto r = log.add_flush_handler([&log, sem, segments](cf_id_type id, replay_position pos) {
|
||||
*segments = log.get_active_segment_names();
|
||||
log.discard_completed_segments(id, pos);
|
||||
sem->signal();
|
||||
});
|
||||
|
||||
auto set = make_lw_shared<std::set<segment_id_type>>();
|
||||
auto uuid = utils::UUID_gen::get_time_UUID();
|
||||
return do_until([set, sem]() {return set->size() > 1 && sem->try_wait();},
|
||||
[log, set, uuid]() {
|
||||
return do_until([set]() {return set->size() > 2;},
|
||||
[&log, set, uuid]() {
|
||||
sstring tmp = "hej bubba cow";
|
||||
return log->second.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
return log.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
dst.write(tmp.begin(), tmp.end());
|
||||
}).then([log, set](replay_position rp) {
|
||||
}).then([set](replay_position rp) {
|
||||
BOOST_CHECK_NE(rp, db::replay_position());
|
||||
set->insert(rp.id);
|
||||
});
|
||||
}).then([log]() {
|
||||
auto n = log->second.get_active_segment_names().size();
|
||||
auto d = log->second.get_num_segments_destroyed();
|
||||
BOOST_REQUIRE(n > 0);
|
||||
BOOST_REQUIRE(d > 0);
|
||||
}).finally([log, r = std::move(r)]() {
|
||||
return log->second.clear().then([log] {});
|
||||
}).then([&log, sem, segments]() {
|
||||
auto names = log.get_active_segment_names();
|
||||
auto diff = segment_diff(log, *segments);
|
||||
auto nn = diff.size();
|
||||
auto dn = log.get_num_segments_destroyed();
|
||||
|
||||
BOOST_REQUIRE(nn > 0);
|
||||
BOOST_REQUIRE(nn <= names.size());
|
||||
BOOST_REQUIRE(dn <= nn);
|
||||
}).finally([r = std::move(r)] {
|
||||
});
|
||||
}).then([]{});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_commitlog_reader){
|
||||
commitlog::config cfg;
|
||||
cfg.commitlog_segment_size_in_mb = 1;
|
||||
return make_commitlog(cfg).then([](tmplog_ptr log) {
|
||||
return cl_test(cfg, [](commitlog& log) {
|
||||
auto set = make_lw_shared<std::set<segment_id_type>>();
|
||||
auto count = make_lw_shared<size_t>(0);
|
||||
auto count2 = make_lw_shared<size_t>(0);
|
||||
auto uuid = utils::UUID_gen::get_time_UUID();
|
||||
return do_until([count, set]() {return set->size() > 1;},
|
||||
[log, uuid, count, set]() {
|
||||
[&log, uuid, count, set]() {
|
||||
sstring tmp = "hej bubba cow";
|
||||
return log->second.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
return log.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
dst.write(tmp.begin(), tmp.end());
|
||||
}).then([log, set, count](replay_position rp) {
|
||||
}).then([&log, set, count](replay_position rp) {
|
||||
BOOST_CHECK_NE(rp, db::replay_position());
|
||||
set->insert(rp.id);
|
||||
if (set->size() == 1) {
|
||||
@@ -372,34 +292,134 @@ SEASTAR_TEST_CASE(test_commitlog_reader){
|
||||
}
|
||||
});
|
||||
|
||||
}).then([log]() {
|
||||
return count_files(log->first.path).then([](size_t n) {
|
||||
BOOST_REQUIRE(n > 1);
|
||||
}).then([&log, set, count2]() {
|
||||
auto segments = log.get_active_segment_names();
|
||||
BOOST_REQUIRE(segments.size() > 1);
|
||||
|
||||
auto id = *set->begin();
|
||||
auto i = std::find_if(segments.begin(), segments.end(), [id](sstring filename) {
|
||||
commitlog::descriptor desc(filename);
|
||||
return desc.id == id;
|
||||
});
|
||||
if (i == segments.end()) {
|
||||
throw std::runtime_error("Did not find expected log file");
|
||||
}
|
||||
return db::commitlog::read_log_file(*i, [count2](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
sstring str(buf.get(), buf.size());
|
||||
BOOST_CHECK_EQUAL(str, "hej bubba cow");
|
||||
(*count2)++;
|
||||
return make_ready_future<>();
|
||||
}).then([](auto s) {
|
||||
return do_with(std::move(s), [](auto& s) {
|
||||
return s->done();
|
||||
});
|
||||
});
|
||||
}).then([log, set, count2] {
|
||||
// TODO, meh, hard coded name...
|
||||
auto findme = sstring("CommitLog-1-") + std::to_string(*set->begin()) + ".log";
|
||||
return list_files(log->first.path).then([log, findme, count2](auto l) {
|
||||
for (auto & de : l->contents()) {
|
||||
if (de.name == findme) {
|
||||
auto path = log->first.path + "/" + de.name;
|
||||
return db::commitlog::read_log_file(path, [count2](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
sstring str(buf.get(), buf.size());
|
||||
BOOST_CHECK_EQUAL(str, "hej bubba cow");
|
||||
(*count2)++;
|
||||
return make_ready_future<>();
|
||||
}).then([log](auto s) {
|
||||
auto ss = make_lw_shared(std::move(s));
|
||||
return ss->done().then([ss] {});
|
||||
});
|
||||
}
|
||||
}
|
||||
throw std::runtime_error("Did not find expected log file");
|
||||
});
|
||||
}).then([count, count2] {
|
||||
BOOST_CHECK_EQUAL(*count, *count2);
|
||||
}).finally([log]() {
|
||||
return log->second.clear().then([log] {});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
static future<> corrupt_segment(sstring seg, uint64_t off, uint32_t value) {
|
||||
return engine().open_file_dma(seg, open_flags::rw).then([off, value](file f) {
|
||||
size_t size = align_up<size_t>(off, 4096);
|
||||
return do_with(std::move(f), [size, off, value](file& f) {
|
||||
return f.dma_read_exactly<char>(0, size).then([&f, off, value](auto buf) {
|
||||
*reinterpret_cast<uint32_t *>(buf.get_write() + off) = value;
|
||||
auto dst = buf.get();
|
||||
auto size = buf.size();
|
||||
return f.dma_write(0, dst, size).then([buf = std::move(buf)](size_t) {});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_commitlog_entry_corruption){
|
||||
commitlog::config cfg;
|
||||
cfg.commitlog_segment_size_in_mb = 1;
|
||||
return cl_test(cfg, [](commitlog& log) {
|
||||
auto count = make_lw_shared<size_t>(0);
|
||||
auto rps = make_lw_shared<std::vector<db::replay_position>>();
|
||||
return do_until([count]() {return *count > 1;},
|
||||
[&log, count, rps]() {
|
||||
auto uuid = utils::UUID_gen::get_time_UUID();
|
||||
sstring tmp = "hej bubba cow";
|
||||
return log.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
dst.write(tmp.begin(), tmp.end());
|
||||
}).then([&log, rps, count](replay_position rp) {
|
||||
BOOST_CHECK_NE(rp, db::replay_position());
|
||||
rps->push_back(rp);
|
||||
++(*count);
|
||||
});
|
||||
}).then([&log, rps]() {
|
||||
return log.sync_all_segments();
|
||||
}).then([&log, rps] {
|
||||
auto segments = log.get_active_segment_names();
|
||||
BOOST_REQUIRE(!segments.empty());
|
||||
auto seg = segments[0];
|
||||
return corrupt_segment(seg, rps->at(1).pos + 4, 0x451234ab).then([seg, rps, &log] {
|
||||
return db::commitlog::read_log_file(seg, [rps](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
BOOST_CHECK_EQUAL(rp, rps->at(0));
|
||||
return make_ready_future<>();
|
||||
}).then([](auto s) {
|
||||
return do_with(std::move(s), [](auto& s) {
|
||||
return s->done();
|
||||
});
|
||||
}).then_wrapped([](auto&& f) {
|
||||
try {
|
||||
f.get();
|
||||
BOOST_FAIL("Expected exception");
|
||||
} catch (commitlog::segment_data_corruption_error& e) {
|
||||
// ok.
|
||||
BOOST_REQUIRE(e.bytes() > 0);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_commitlog_chunk_corruption){
|
||||
commitlog::config cfg;
|
||||
cfg.commitlog_segment_size_in_mb = 1;
|
||||
return cl_test(cfg, [](commitlog& log) {
|
||||
auto count = make_lw_shared<size_t>(0);
|
||||
auto rps = make_lw_shared<std::vector<db::replay_position>>();
|
||||
return do_until([count]() {return *count > 1;},
|
||||
[&log, count, rps]() {
|
||||
auto uuid = utils::UUID_gen::get_time_UUID();
|
||||
sstring tmp = "hej bubba cow";
|
||||
return log.add_mutation(uuid, tmp.size(), [tmp](db::commitlog::output& dst) {
|
||||
dst.write(tmp.begin(), tmp.end());
|
||||
}).then([&log, rps, count](replay_position rp) {
|
||||
BOOST_CHECK_NE(rp, db::replay_position());
|
||||
rps->push_back(rp);
|
||||
++(*count);
|
||||
});
|
||||
}).then([&log, rps]() {
|
||||
return log.sync_all_segments();
|
||||
}).then([&log, rps] {
|
||||
auto segments = log.get_active_segment_names();
|
||||
BOOST_REQUIRE(!segments.empty());
|
||||
auto seg = segments[0];
|
||||
return corrupt_segment(seg, rps->at(0).pos - 4, 0x451234ab).then([seg, rps, &log] {
|
||||
return db::commitlog::read_log_file(seg, [rps](temporary_buffer<char> buf, db::replay_position rp) {
|
||||
BOOST_FAIL("Should not reach");
|
||||
return make_ready_future<>();
|
||||
}).then([](auto s) {
|
||||
return do_with(std::move(s), [](auto& s) {
|
||||
return s->done();
|
||||
});
|
||||
}).then_wrapped([](auto&& f) {
|
||||
try {
|
||||
f.get();
|
||||
BOOST_FAIL("Expected exception");
|
||||
} catch (commitlog::segment_data_corruption_error& e) {
|
||||
// ok.
|
||||
BOOST_REQUIRE(e.bytes() > 0);
|
||||
}
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
}
|
||||
@@ -412,9 +432,9 @@ SEASTAR_TEST_CASE(test_commitlog_counters) {
|
||||
});
|
||||
};
|
||||
BOOST_CHECK_EQUAL(count_cl_counters(), 0);
|
||||
return make_commitlog().then([&](tmplog_ptr log) {
|
||||
return cl_test([count_cl_counters](commitlog& log) {
|
||||
BOOST_CHECK_GT(count_cl_counters(), 0);
|
||||
}).finally([&]() {
|
||||
}).finally([count_cl_counters] {
|
||||
BOOST_CHECK_EQUAL(count_cl_counters(), 0);
|
||||
});
|
||||
}
|
||||
@@ -422,9 +442,8 @@ SEASTAR_TEST_CASE(test_commitlog_counters) {
|
||||
#ifndef DEFAULT_ALLOCATOR
|
||||
|
||||
SEASTAR_TEST_CASE(test_allocation_failure){
|
||||
commitlog::config cfg;
|
||||
return make_commitlog(cfg).then([](tmplog_ptr log) {
|
||||
auto size = log->second.max_record_size() - 1;
|
||||
return cl_test([](commitlog& log) {
|
||||
auto size = log.max_record_size() - 1;
|
||||
|
||||
auto junk = make_lw_shared<std::list<std::unique_ptr<char[]>>>();
|
||||
|
||||
@@ -435,11 +454,9 @@ SEASTAR_TEST_CASE(test_allocation_failure){
|
||||
}
|
||||
} catch (std::bad_alloc&) {
|
||||
}
|
||||
return log->second.add_mutation(utils::UUID_gen::get_time_UUID(), size, [size](db::commitlog::output& dst) {
|
||||
return log.add_mutation(utils::UUID_gen::get_time_UUID(), size, [size](db::commitlog::output& dst) {
|
||||
dst.write(char(1), size);
|
||||
}).then([](db::replay_position rp) {
|
||||
// should not reach.
|
||||
}).then_wrapped([junk](future<> f) {
|
||||
}).then_wrapped([junk](future<db::replay_position> f) {
|
||||
try {
|
||||
f.get();
|
||||
} catch (std::bad_alloc&) {
|
||||
@@ -447,11 +464,8 @@ SEASTAR_TEST_CASE(test_allocation_failure){
|
||||
junk->clear();
|
||||
return make_ready_future();
|
||||
} catch (...) {
|
||||
throw std::runtime_error("Did not get expected exception from writing too large record");
|
||||
}
|
||||
return make_ready_future<>();
|
||||
}).finally([log]() {
|
||||
return log->second.clear().then([log] {});
|
||||
throw std::runtime_error("Did not get expected exception from writing too large record");
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
@@ -283,6 +283,7 @@ public:
|
||||
}
|
||||
return seastar::async([this] {
|
||||
utils::fb_utilities::set_broadcast_address(gms::inet_address("localhost"));
|
||||
utils::fb_utilities::set_broadcast_rpc_address(gms::inet_address("localhost"));
|
||||
locator::i_endpoint_snitch::create_snitch("SimpleSnitch").get();
|
||||
auto db = ::make_shared<distributed<database>>();
|
||||
init_once(db).get();
|
||||
|
||||
@@ -43,6 +43,7 @@ future<> one_test(const std::string& property_fname, bool exp_result) {
|
||||
fname /= path(property_fname);
|
||||
|
||||
utils::fb_utilities::set_broadcast_address(gms::inet_address("localhost"));
|
||||
utils::fb_utilities::set_broadcast_rpc_address(gms::inet_address("localhost"));
|
||||
|
||||
return i_endpoint_snitch::create_snitch<const sstring&>(
|
||||
"EC2Snitch",
|
||||
|
||||
@@ -65,6 +65,7 @@ int main(int ac, char ** av) {
|
||||
logging::logger_registry().set_logger_level("gossip", logging::log_level::trace);
|
||||
const gms::inet_address listen = gms::inet_address(config["listen-address"].as<std::string>());
|
||||
utils::fb_utilities::set_broadcast_address(listen);
|
||||
utils::fb_utilities::set_broadcast_rpc_address(listen);
|
||||
auto vv = std::make_shared<gms::versioned_value::factory>();
|
||||
locator::i_endpoint_snitch::create_snitch("SimpleSnitch").then([&db] {
|
||||
return service::init_storage_service(db);
|
||||
@@ -96,7 +97,7 @@ int main(int ac, char ** av) {
|
||||
using namespace std::chrono;
|
||||
auto now = high_resolution_clock::now().time_since_epoch();
|
||||
int generation_number = duration_cast<seconds>(now).count();
|
||||
return gossiper.start(generation_number, app_states);
|
||||
return gossiper.start_gossiping(generation_number, app_states);
|
||||
}).then([vv] {
|
||||
return seastar::async([vv] {
|
||||
static double load = 0.5;
|
||||
|
||||
@@ -43,6 +43,7 @@ future<> one_test(const std::string& property_fname, bool exp_result) {
|
||||
fname /= path(property_fname);
|
||||
|
||||
utils::fb_utilities::set_broadcast_address(gms::inet_address("localhost"));
|
||||
utils::fb_utilities::set_broadcast_rpc_address(gms::inet_address("localhost"));
|
||||
|
||||
return i_endpoint_snitch::create_snitch<const sstring&>(
|
||||
"org.apache.cassandra.locator.GossipingPropertyFileSnitch",
|
||||
|
||||
@@ -41,3 +41,27 @@ static inline
|
||||
mutation_assertion assert_that(const mutation& m) {
|
||||
return { m };
|
||||
}
|
||||
|
||||
class mutation_opt_assertions {
|
||||
mutation_opt _mo;
|
||||
public:
|
||||
mutation_opt_assertions(mutation_opt mo) : _mo(std::move(mo)) {}
|
||||
|
||||
mutation_assertion has_mutation() {
|
||||
if (!_mo) {
|
||||
BOOST_FAIL("Expected engaged mutation_opt, but found not");
|
||||
}
|
||||
return { *_mo };
|
||||
}
|
||||
|
||||
void has_no_mutation() {
|
||||
if (_mo) {
|
||||
BOOST_FAIL("Expected disengaged mutation_opt");
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
static inline
|
||||
mutation_opt_assertions assert_that(mutation_opt mo) {
|
||||
return { std::move(mo) };
|
||||
}
|
||||
|
||||
@@ -157,6 +157,7 @@ void full_ring_check(const std::vector<ring_point>& ring_points,
|
||||
|
||||
future<> simple_test() {
|
||||
utils::fb_utilities::set_broadcast_address(gms::inet_address("localhost"));
|
||||
utils::fb_utilities::set_broadcast_rpc_address(gms::inet_address("localhost"));
|
||||
|
||||
// Create the RackInferringSnitch
|
||||
return i_endpoint_snitch::create_snitch("RackInferringSnitch").then(
|
||||
@@ -230,6 +231,7 @@ future<> simple_test() {
|
||||
|
||||
future<> heavy_origin_test() {
|
||||
utils::fb_utilities::set_broadcast_address(gms::inet_address("localhost"));
|
||||
utils::fb_utilities::set_broadcast_rpc_address(gms::inet_address("localhost"));
|
||||
|
||||
// Create the RackInferringSnitch
|
||||
return i_endpoint_snitch::create_snitch("RackInferringSnitch").then(
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
#define BOOST_TEST_DYN_LINK
|
||||
|
||||
#include <boost/test/unit_test.hpp>
|
||||
#include <seastar/core/sleep.hh>
|
||||
|
||||
#include "tests/test-utils.hh"
|
||||
#include "tests/mutation_assertions.hh"
|
||||
@@ -33,6 +34,8 @@
|
||||
#include "core/thread.hh"
|
||||
#include "memtable.hh"
|
||||
|
||||
using namespace std::chrono_literals;
|
||||
|
||||
static schema_ptr make_schema() {
|
||||
return schema_builder("ks", "cf")
|
||||
.with_column("pk", bytes_type, column_kind::partition_key)
|
||||
@@ -49,11 +52,6 @@ mutation make_new_mutation(schema_ptr s, partition_key key) {
|
||||
return m;
|
||||
}
|
||||
|
||||
static
|
||||
mutation make_key_mutation(schema_ptr s, bytes key) {
|
||||
return make_new_mutation(s, partition_key::from_single_value(*s, key));
|
||||
}
|
||||
|
||||
static
|
||||
partition_key new_key(schema_ptr s) {
|
||||
static thread_local int next = 0;
|
||||
@@ -116,20 +114,22 @@ struct decorated_key_order {
|
||||
}
|
||||
};
|
||||
|
||||
static std::vector<mutation> make_ring(schema_ptr s, int n_mutations) {
|
||||
std::vector<mutation> mutations;
|
||||
for (int i = 0; i < n_mutations; ++i) {
|
||||
mutations.push_back(make_new_mutation(s));
|
||||
}
|
||||
std::sort(mutations.begin(), mutations.end(), mutation_decorated_key_less_comparator());
|
||||
return mutations;
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_query_of_incomplete_range_goes_to_underlying) {
|
||||
return seastar::async([] {
|
||||
auto s = make_schema();
|
||||
|
||||
std::vector<mutation> mutations = {
|
||||
make_key_mutation(s, "key1"),
|
||||
make_key_mutation(s, "key2"),
|
||||
make_key_mutation(s, "key3")
|
||||
};
|
||||
|
||||
std::sort(mutations.begin(), mutations.end(), mutation_decorated_key_less_comparator());
|
||||
std::vector<mutation> mutations = make_ring(s, 3);
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
|
||||
for (auto&& m : mutations) {
|
||||
mt->apply(m);
|
||||
}
|
||||
@@ -173,16 +173,10 @@ SEASTAR_TEST_CASE(test_single_key_queries_after_population_in_reverse_order) {
|
||||
return seastar::async([] {
|
||||
auto s = make_schema();
|
||||
|
||||
std::vector<mutation> mutations = {
|
||||
make_key_mutation(s, "key1"),
|
||||
make_key_mutation(s, "key2"),
|
||||
make_key_mutation(s, "key3")
|
||||
};
|
||||
|
||||
std::sort(mutations.begin(), mutations.end(), mutation_decorated_key_less_comparator());
|
||||
|
||||
auto mt = make_lw_shared<memtable>(s);
|
||||
|
||||
std::vector<mutation> mutations = make_ring(s, 3);
|
||||
|
||||
for (auto&& m : mutations) {
|
||||
mt->apply(m);
|
||||
}
|
||||
@@ -257,7 +251,8 @@ SEASTAR_TEST_CASE(test_eviction) {
|
||||
}
|
||||
|
||||
bool has_key(row_cache& cache, const dht::decorated_key& key) {
|
||||
auto reader = cache.make_reader(query::partition_range::make_singular(key));
|
||||
auto range = query::partition_range::make_singular(key);
|
||||
auto reader = cache.make_reader(range);
|
||||
auto mo = reader().get0();
|
||||
return bool(mo);
|
||||
}
|
||||
@@ -271,7 +266,8 @@ void verify_does_not_have(row_cache& cache, const dht::decorated_key& key) {
|
||||
}
|
||||
|
||||
void verify_has(row_cache& cache, const mutation& m) {
|
||||
auto reader = cache.make_reader(query::partition_range::make_singular(m.decorated_key()));
|
||||
auto range = query::partition_range::make_singular(m.decorated_key());
|
||||
auto reader = cache.make_reader(range);
|
||||
auto mo = reader().get0();
|
||||
BOOST_REQUIRE(bool(mo));
|
||||
assert_that(*mo).is_equal_to(m);
|
||||
@@ -359,3 +355,182 @@ SEASTAR_TEST_CASE(test_update) {
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
class throttle {
|
||||
unsigned _block_counter = 0;
|
||||
promise<> _p; // valid when _block_counter != 0, resolves when goes down to 0
|
||||
public:
|
||||
future<> enter() {
|
||||
if (_block_counter) {
|
||||
promise<> p1;
|
||||
promise<> p2;
|
||||
|
||||
auto f1 = p1.get_future();
|
||||
|
||||
p2.get_future().then([p1 = std::move(p1), p3 = std::move(_p)] () mutable {
|
||||
p1.set_value();
|
||||
p3.set_value();
|
||||
});
|
||||
_p = std::move(p2);
|
||||
|
||||
return f1;
|
||||
} else {
|
||||
return make_ready_future<>();
|
||||
}
|
||||
}
|
||||
|
||||
void block() {
|
||||
++_block_counter;
|
||||
_p = promise<>();
|
||||
}
|
||||
|
||||
void unblock() {
|
||||
assert(_block_counter);
|
||||
if (--_block_counter == 0) {
|
||||
_p.set_value();
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
class throttled_mutation_source {
|
||||
private:
|
||||
class impl : public enable_lw_shared_from_this<impl> {
|
||||
mutation_source _underlying;
|
||||
::throttle _throttle;
|
||||
private:
|
||||
class reader : public mutation_reader::impl {
|
||||
throttle& _throttle;
|
||||
mutation_reader _reader;
|
||||
public:
|
||||
reader(throttle& t, mutation_reader r)
|
||||
: _throttle(t)
|
||||
, _reader(std::move(r))
|
||||
{}
|
||||
|
||||
virtual future<mutation_opt> operator()() override {
|
||||
return _reader().finally([this] () {
|
||||
return _throttle.enter();
|
||||
});
|
||||
}
|
||||
};
|
||||
public:
|
||||
impl(mutation_source underlying)
|
||||
: _underlying(std::move(underlying))
|
||||
{ }
|
||||
|
||||
mutation_reader make_reader(const query::partition_range& pr) {
|
||||
return make_mutation_reader<reader>(_throttle, _underlying(pr));
|
||||
}
|
||||
|
||||
::throttle& throttle() { return _throttle; }
|
||||
};
|
||||
lw_shared_ptr<impl> _impl;
|
||||
public:
|
||||
throttled_mutation_source(mutation_source underlying)
|
||||
: _impl(make_lw_shared<impl>(std::move(underlying)))
|
||||
{ }
|
||||
|
||||
void block() {
|
||||
_impl->throttle().block();
|
||||
}
|
||||
|
||||
void unblock() {
|
||||
_impl->throttle().unblock();
|
||||
}
|
||||
|
||||
mutation_reader operator()(const query::partition_range& pr) {
|
||||
return _impl->make_reader(pr);
|
||||
}
|
||||
};
|
||||
|
||||
static std::vector<mutation> updated_ring(std::vector<mutation>& mutations) {
|
||||
std::vector<mutation> result;
|
||||
for (auto&& m : mutations) {
|
||||
result.push_back(make_new_mutation(m.schema(), m.key()));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
SEASTAR_TEST_CASE(test_cache_population_and_update_race) {
|
||||
return seastar::async([] {
|
||||
auto s = make_schema();
|
||||
std::vector<lw_shared_ptr<memtable>> memtables;
|
||||
auto memtables_data_source = [&] (const query::partition_range& pr) {
|
||||
std::vector<mutation_reader> readers;
|
||||
for (auto&& mt : memtables) {
|
||||
readers.emplace_back(mt->make_reader(pr));
|
||||
}
|
||||
return make_combined_reader(std::move(readers));
|
||||
};
|
||||
auto memtables_key_source = [&] (const query::partition_range& pr) {
|
||||
std::vector<key_reader> readers;
|
||||
for (auto&& mt : memtables) {
|
||||
readers.emplace_back(mt->as_key_source()(pr));
|
||||
}
|
||||
return make_combined_reader(s, std::move(readers));
|
||||
};
|
||||
throttled_mutation_source cache_source(memtables_data_source);
|
||||
cache_tracker tracker;
|
||||
row_cache cache(s, cache_source, memtables_key_source, tracker);
|
||||
|
||||
auto mt1 = make_lw_shared<memtable>(s);
|
||||
memtables.push_back(mt1);
|
||||
auto ring = make_ring(s, 3);
|
||||
for (auto&& m : ring) {
|
||||
mt1->apply(m);
|
||||
}
|
||||
|
||||
auto mt2 = make_lw_shared<memtable>(s);
|
||||
auto ring2 = updated_ring(ring);
|
||||
for (auto&& m : ring2) {
|
||||
mt2->apply(m);
|
||||
}
|
||||
|
||||
cache_source.block();
|
||||
|
||||
auto m0_range = query::partition_range::make_singular(ring[0].ring_position());
|
||||
auto rd1 = cache.make_reader(m0_range);
|
||||
auto rd1_result = rd1();
|
||||
|
||||
auto rd2 = cache.make_reader();
|
||||
auto rd2_result = rd2();
|
||||
|
||||
sleep(10ms).get();
|
||||
auto mt2_flushed = make_lw_shared<memtable>(s);
|
||||
mt2_flushed->apply(*mt2).get();
|
||||
memtables.push_back(mt2_flushed);
|
||||
|
||||
// This update should miss on all partitions
|
||||
auto update_future = cache.update(*mt2, make_default_partition_presence_checker());
|
||||
|
||||
auto rd3 = cache.make_reader();
|
||||
|
||||
// rd2, which is in progress, should not prevent forward progress of update()
|
||||
cache_source.unblock();
|
||||
update_future.get();
|
||||
|
||||
// Reads started before memtable flush should return previous value, otherwise this test
|
||||
// doesn't trigger the conditions it is supposed to protect against.
|
||||
assert_that(rd1_result.get0()).has_mutation().is_equal_to(ring[0]);
|
||||
|
||||
assert_that(rd2_result.get0()).has_mutation().is_equal_to(ring[0]);
|
||||
assert_that(rd2().get0()).has_mutation().is_equal_to(ring2[1]);
|
||||
assert_that(rd2().get0()).has_mutation().is_equal_to(ring2[2]);
|
||||
assert_that(rd2().get0()).has_no_mutation();
|
||||
|
||||
// Reads started after update was started but before previous populations completed
|
||||
// should already see the new data
|
||||
assert_that(std::move(rd3))
|
||||
.produces(ring2[0])
|
||||
.produces(ring2[1])
|
||||
.produces(ring2[2])
|
||||
.produces_end_of_stream();
|
||||
|
||||
// Reads started after flush should see new data
|
||||
assert_that(cache.make_reader())
|
||||
.produces(ring2[0])
|
||||
.produces(ring2[1])
|
||||
.produces(ring2[2])
|
||||
.produces_end_of_stream();
|
||||
});
|
||||
}
|
||||
|
||||
@@ -38,6 +38,7 @@ future<> one_test(const std::string& property_fname1,
|
||||
using namespace boost::filesystem;
|
||||
|
||||
utils::fb_utilities::set_broadcast_address(gms::inet_address("localhost"));
|
||||
utils::fb_utilities::set_broadcast_rpc_address(gms::inet_address("localhost"));
|
||||
|
||||
printf("Testing %s and %s property files. Expected result is %s\n",
|
||||
property_fname1.c_str(), property_fname2.c_str(),
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user