Remove many unused "import" statements or parts of import statement. All of them were detected by Copilot, but I verified each one manually and prepared this patch. Signed-off-by: Nadav Har'El <nyh@scylladb.com> Closes scylladb/scylladb#27675
105 lines
5.1 KiB
Python
105 lines
5.1 KiB
Python
# Copyright 2023-present ScyllaDB
|
|
#
|
|
# SPDX-License-Identifier: LicenseRef-ScyllaDB-Source-Available-1.0
|
|
|
|
#############################################################################
|
|
# Tests for the SELECT DISTINCT feature
|
|
#############################################################################
|
|
|
|
import pytest
|
|
from .util import new_test_table, unique_key_int
|
|
from cassandra.protocol import InvalidRequest
|
|
|
|
@pytest.fixture(scope="module")
|
|
def table1(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
yield table
|
|
|
|
# Simple test for SELECT DISTINCT inside a single partition (returning just
|
|
# that single partition, or nothing)
|
|
def test_distinct_in_partition(cql, table1):
|
|
p = unique_key_int()
|
|
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)")
|
|
cql.execute(stmt, [p, 1, 1])
|
|
cql.execute(stmt, [p, 2, 2])
|
|
cql.execute(stmt, [p, 3, 3])
|
|
# Without DISTINCT, three matches, with DISTINCT just one:
|
|
assert [(p,),(p,),(p,)] == list(cql.execute(f"select p from {table1} where p = {p}"))
|
|
assert [(p,)] == list(cql.execute(f"select distinct p from {table1} where p = {p}"))
|
|
p2 = unique_key_int()
|
|
assert [] == list(cql.execute(f"select distinct p from {table1} where p = {p2}"))
|
|
|
|
# When we have "select distinct p", adding a "group by p" without any
|
|
# aggregation function is allowed, but doesn't change anything:
|
|
def test_distinct_in_partition_group_by(cql, table1):
|
|
p = unique_key_int()
|
|
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)")
|
|
cql.execute(stmt, [p, 1, 1])
|
|
cql.execute(stmt, [p, 2, 2])
|
|
cql.execute(stmt, [p, 3, 3])
|
|
assert [(p,)] == list(cql.execute(f"select distinct p from {table1} where p = {p} group by p"))
|
|
p2 = unique_key_int()
|
|
assert [] == list(cql.execute(f"select distinct p from {table1} where p = {p2} group by p"))
|
|
|
|
# "select distinct p" with "group by p,c" doesn't makes sense (we always
|
|
# have one value for p, we can't split it per c), and should not be allowed.
|
|
def test_distinct_in_partition_group_by_c(cql, table1):
|
|
p = unique_key_int()
|
|
stmt = cql.prepare(f"insert into {table1} (p, c, v) values (?, ?, ?)")
|
|
cql.execute(stmt, [p, 1, 1])
|
|
cql.execute(stmt, [p, 2, 2])
|
|
cql.execute(stmt, [p, 3, 3])
|
|
# Cassandra reports the error message: "Grouping on clustering columns
|
|
# is not allowed for SELECT DISTINCT queries".
|
|
with pytest.raises(InvalidRequest, match='SELECT DISTINCT'):
|
|
cql.execute(f"select distinct p from {table1} where p = {p} group by p,c")
|
|
|
|
# Test combination of SELECT DISTINCT with LIMIT
|
|
# This test involves a whole-table scan, so we need a fresh table.
|
|
def test_distinct_limit(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
stmt = cql.prepare(f"insert into {table} (p, c, v) values (?, ?, ?)")
|
|
N = 10
|
|
ps = [unique_key_int() for i in range(N)]
|
|
for i in range(N):
|
|
cql.execute(stmt, [ps[i], 1, 7])
|
|
cql.execute(stmt, [ps[i], 2, 7])
|
|
# SELECT DISTINCT should produce the N results all:
|
|
all = [(p,) for p in ps]
|
|
assert sorted(all) == sorted(list(cql.execute(f"select distinct p from {table}")))
|
|
# WITH LIMIT 0<n<=N we should get only n results
|
|
for i in range(N):
|
|
n = i + 1
|
|
results = list(cql.execute(f"select distinct p from {table} limit {n}"))
|
|
assert n == len(results)
|
|
assert set(results).issubset(set(all))
|
|
|
|
# Test that SELECT DISTINCT and PER PARTITION LIMIT is not allowed in the same query
|
|
# reproduces #15109
|
|
def test_distinct_and_per_partition_limit(cql, table1):
|
|
with pytest.raises(InvalidRequest, match='PER PARTITION LIMIT is not allowed with SELECT DISTINCT queries'):
|
|
cql.execute(f"select distinct p from {table1} per partition limit 1")
|
|
|
|
# Test combination of SELECT DISTINCT, COUNT, GROUP BY and LIMIT.
|
|
# COUNT + GROUP BY means generate one count per partition, and the
|
|
# SELECT DISTINCT simply hands the counter just one row per partition
|
|
# to count, so all the counts come up 1. Adding a LIMIT to all of
|
|
# this exposes the same bug of limiting COUNT + GROUP BY, without the
|
|
# SELECT DISTINCT, reported in issue #5361:
|
|
def test_distinct_count_group_by_limit(cql, test_keyspace):
|
|
with new_test_table(cql, test_keyspace, "p int, c int, v int, PRIMARY KEY (p, c)") as table:
|
|
stmt = cql.prepare(f"insert into {table} (p, c, v) values (?, ?, ?)")
|
|
N = 10
|
|
ps = [unique_key_int() for i in range(N)]
|
|
for i in range(N):
|
|
cql.execute(stmt, [ps[i], 1, 7])
|
|
cql.execute(stmt, [ps[i], 2, 7])
|
|
# SELECT DISTINCT should produce the N results ps:
|
|
all = [(p,1) for p in ps]
|
|
assert sorted(all) == sorted(list(cql.execute(f"select distinct p, count(p) from {table} group by p")))
|
|
for i in range(N):
|
|
n = i + 1
|
|
results = list(cql.execute(f"select distinct p, count(p) from {table} group by p limit {n}"))
|
|
assert n == len(results)
|
|
assert set(results).issubset(set(all))
|