Merge 'scylla_cluster.py: fix read_last_line' from Gusev Petr

This is a follow-up to #13399, the patch
addresses the issues mentioned there:
* linesep can be split between blocks;
* linesep can be part of UTF-8 sequence;
* avoid excessively long lines, limit to 256 chars;
* the logic of the function made simpler and more maintainable.

Closes #13427

* github.com:scylladb/scylladb:
  pylib_test: add tests for read_last_line
  pytest: add pylib_test directory
  scylla_cluster.py: fix read_last_line
  scylla_cluster.py: move read_last_line to util.py

(cherry picked from commit 70f2b09397)
This commit is contained in:
Kamil Braun
2023-05-05 13:29:15 +02:00
parent bcf99a37cd
commit e49a531aaa
7 changed files with 68 additions and 24 deletions

View File

@@ -25,7 +25,7 @@ from io import BufferedWriter
from test.pylib.host_registry import Host, HostRegistry
from test.pylib.pool import Pool
from test.pylib.rest_client import ScyllaRESTAPIClient, HTTPError
from test.pylib.util import LogPrefixAdapter
from test.pylib.util import LogPrefixAdapter, read_last_line
from test.pylib.internal_types import ServerNum, IPAddress, HostID, ServerInfo
import aiohttp
import aiohttp.web
@@ -385,29 +385,6 @@ class ScyllaServer:
sleep_interval = 0.1
cql_up_state = CqlUpState.NOT_CONNECTED
def read_last_line(file_path: pathlib.Path):
block_size = 4 * 1024
file_size = os.stat(file_path).st_size
pos = file_size
blocks = []
linesep = os.linesep.encode()
with file_path.open('rb') as f:
linesep_index = -1
while pos > 0 and linesep_index == -1:
next_pos = max(pos - block_size, 0)
f.seek(next_pos, os.SEEK_SET)
block = f.read(pos - next_pos)
# ignore the last empty line if any
if pos == file_size and block.endswith(linesep):
block = block[:-len(linesep)]
linesep_index = block.rfind(linesep)
blocks.append(block)
pos = next_pos
if linesep_index != -1:
blocks[-1] = block[linesep_index + len(linesep):]
blocks.reverse()
return b''.join(blocks).decode()
def report_error(message: str):
message += f", server_id {self.server_id}, IP {self.ip_addr}, workdir {self.workdir.name}"
message += f", host_id {self.host_id if hasattr(self, 'host_id') else '<missing>'}"

View File

@@ -6,6 +6,8 @@
import time
import asyncio
import logging
import pathlib
import os
from typing import Callable, Awaitable, Optional, TypeVar, Generic
@@ -76,5 +78,21 @@ async def wait_for_cql_and_get_hosts(cql: Session, servers: list[ServerInfo], de
return hosts
def read_last_line(file_path: pathlib.Path, max_line_bytes = 512):
file_size = os.stat(file_path).st_size
with file_path.open('rb') as f:
f.seek(max(0, file_size - max_line_bytes), os.SEEK_SET)
line_bytes = f.read()
line_str = line_bytes.decode('utf-8', errors='ignore')
linesep = os.linesep
if line_str.endswith(linesep):
line_str = line_str[:-len(linesep)]
linesep_index = line_str.rfind(linesep)
if linesep_index != -1:
line_str = line_str[linesep_index + len(linesep):]
elif file_size > max_line_bytes:
line_str = '...' + line_str
return line_str
unique_name.last_ms = 0

View File

View File

@@ -0,0 +1,9 @@
# Pytest configuration file. If we don't have one in this directory,
# pytest will look for one in our ancestor directories, and may find
# something irrelevant. So we should have one here, even if empty.
[pytest]
asyncio_mode = auto
log_cli = true
log_format = %(asctime)s.%(msecs)03d %(levelname)s> %(message)s
log_date_format = %H:%M:%S

11
test/pylib_test/run Executable file
View File

@@ -0,0 +1,11 @@
#!/usr/bin/env python3
# Use the run.py library from ../cql-pytest:
import sys
sys.path.insert(1, sys.path[0] + '/../cql-pytest')
import run
success = run.run_pytest(sys.path[0], sys.argv[1:])
run.summary = 'Pylib tests pass' if success else 'Pylib tests failure'
exit(0 if success else 1)

View File

@@ -0,0 +1 @@
type: Run

View File

@@ -0,0 +1,28 @@
import os
import tempfile
import pathlib
from test.pylib.util import read_last_line
def test_read_last_line():
test_cases = [
(b"This is the first line.\nThis is the second line.\nThis is the third line.", 'This is the third line.'),
(b"This is another file.\nIt has a few lines.\nThe last line is what we're interested in.", 'The last line is what we\'re interested in.'),
(b"This file has only one line.", 'This file has only one line.'),
(b"\n", ""),
(b"\n\n\n", ""),
(b"", ""),
(b"abc\n", 'abc'),
(b"abc", '...bc', 2),
(b"lalala\nbububu", "bububu"),
(b"line1\nline2\nline3\n", "...line3", 6),
(b"line1\nline2\nline3", "line3", 6),
(b"line1\nline2\nline3\n", "line3", 7),
(b"\xbe\xbe\xbe\xbebububu\n", "bububu")
]
for test_case in test_cases:
with tempfile.NamedTemporaryFile(dir=os.getenv('TMPDIR', '/tmp')) as f:
f.write(test_case[0])
f.flush()
file_path = pathlib.Path(f.name)
actual = read_last_line(file_path, test_case[2]) if len(test_case) == 3 else read_last_line(file_path)
assert(actual == test_case[1])