Remove xfail from all tests for #5361, as the issue is fixed. Remove xfail from test_group_by_clustering_prefix_with_limit It references #5362, but is fixed by #17237. Refs #17237
99 lines
4.7 KiB
Python
99 lines
4.7 KiB
Python
# Copyright 2023-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: AGPL-3.0-or-later
|
|
|
|
#############################################################################
|
|
# Tests for the SELECT DISTINCT feature
|
|
#############################################################################
|
|
|
|
import pytest
|
|
from util import new_test_table, unique_key_int, random_string
|
|
from cassandra.protocol import InvalidRequest
|
|
|
|
@pytest.fixture(scope="module")
|
|
def table1(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
yield table
|
|
|
|
# Simple test for SELECT DISTINCT inside a single partition (returning just
|
|
# that single partition, or nothing)
|
|
def test_distinct_in_partition(cql, table1):
|
|
p = unique_key_int()
|
|
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)")
|
|
cql.execute(stmt, [p, 1, 1])
|
|
cql.execute(stmt, [p, 2, 2])
|
|
cql.execute(stmt, [p, 3, 3])
|
|
# Without DISTINCT, three matches, with DISTINCT just one:
|
|
assert [(p,),(p,),(p,)] == list(cql.execute(f"select p from {table1} where p = {p}"))
|
|
assert [(p,)] == list(cql.execute(f"select distinct p from {table1} where p = {p}"))
|
|
p2 = unique_key_int()
|
|
assert [] == list(cql.execute(f"select distinct p from {table1} where p = {p2}"))
|
|
|
|
# When we have "select distinct p", adding a "group by p" without any
|
|
# aggregation function is allowed, but doesn't change anything:
|
|
def test_distinct_in_partition_group_by(cql, table1):
|
|
p = unique_key_int()
|
|
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)")
|
|
cql.execute(stmt, [p, 1, 1])
|
|
cql.execute(stmt, [p, 2, 2])
|
|
cql.execute(stmt, [p, 3, 3])
|
|
assert [(p,)] == list(cql.execute(f"select distinct p from {table1} where p = {p} group by p"))
|
|
p2 = unique_key_int()
|
|
assert [] == list(cql.execute(f"select distinct p from {table1} where p = {p2} group by p"))
|
|
|
|
# "select distinct p" with "group by p,c" doesn't makes sense (we always
|
|
# have one value for p, we can't split it per c), and should not be allowed.
|
|
def test_distinct_in_partition_group_by_c(cql, table1):
|
|
p = unique_key_int()
|
|
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)")
|
|
cql.execute(stmt, [p, 1, 1])
|
|
cql.execute(stmt, [p, 2, 2])
|
|
cql.execute(stmt, [p, 3, 3])
|
|
# Cassandra reports the error message: "Grouping on clustering columns
|
|
# is not allowed for SELECT DISTINCT queries".
|
|
with pytest.raises(InvalidRequest, match='SELECT DISTINCT'):
|
|
cql.execute(f"select distinct p from {table1} where p = {p} group by p,c")
|
|
|
|
# Test combination of SELECT DISTINCT with LIMIT
|
|
# This test involves a whole-table scan, so we need a fresh table.
|
|
def test_distinct_limit(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
stmt = cql.prepare(f"insert into {table} (p, c, v) values (?, ?, ?)")
|
|
N = 10
|
|
ps = [unique_key_int() for i in range(N)]
|
|
for i in range(N):
|
|
cql.execute(stmt, [ps[i], 1, 7])
|
|
cql.execute(stmt, [ps[i], 2, 7])
|
|
# SELECT DISTINCT should produce the N results all:
|
|
all = [(p,) for p in ps]
|
|
assert sorted(all) == sorted(list(cql.execute(f"select distinct p from {table}")))
|
|
# WITH LIMIT 0<n<=N we should get only n results
|
|
for i in range(N):
|
|
n = i + 1
|
|
results = list(cql.execute(f"select distinct p from {table} limit {n}"))
|
|
assert n == len(results)
|
|
assert set(results).issubset(set(all))
|
|
|
|
# Test combination of SELECT DISTINCT, COUNT, GROUP BY and LIMIT.
|
|
# COUNT + GROUP BY means generate one count per partition, and the
|
|
# SELECT DISTINCT simply hands the counter just one row per partition
|
|
# to count, so all the counts come up 1. Adding a LIMIT to all of
|
|
# this exposes the same bug of limiting COUNT + GROUP BY, without the
|
|
# SELECT DISTINCT, reported in issue #5361:
|
|
def test_distinct_count_group_by_limit(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
stmt = cql.prepare(f"insert into {table} (p, c, v) values (?, ?, ?)")
|
|
N = 10
|
|
ps = [unique_key_int() for i in range(N)]
|
|
for i in range(N):
|
|
cql.execute(stmt, [ps[i], 1, 7])
|
|
cql.execute(stmt, [ps[i], 2, 7])
|
|
# SELECT DISTINCT should produce the N results ps:
|
|
all = [(p,1) for p in ps]
|
|
assert sorted(all) == sorted(list(cql.execute(f"select distinct p, count(p) from {table} group by p")))
|
|
for i in range(N):
|
|
n = i + 1
|
|
results = list(cql.execute(f"select distinct p, count(p) from {table} group by p limit {n}"))
|
|
assert n == len(results)
|
|
assert set(results).issubset(set(all))
|