From e10b29921ada71111447597d0a9ddb70b75d2c15 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 28 Jun 2015 10:24:48 +0300 Subject: [PATCH 1/7] README: fix typos and paramter syntax Noticed by Gleb. --- README-DPDK.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README-DPDK.md b/README-DPDK.md index 39b1c5d10c..79d8862bc6 100644 --- a/README-DPDK.md +++ b/README-DPDK.md @@ -4,11 +4,11 @@ Seastar and DPDK Seastar uses the Data Plane Development Kit to drive NIC hardware directly. This provides an enormous performance boost. -To enable DPDK, specify `--enable-dpdk` to `./configure.py`, and `--dpdk-pmd 1` as a +To enable DPDK, specify `--enable-dpdk` to `./configure.py`, and `--dpdk-pmd` as a run-time parameter. This will use the DPDK package provided as a git submodule with the seastar sources. -To use your own seld-compiled DPDK package, follow this procedure: +To use your own self-compiled DPDK package, follow this procedure: 1. Setup host to compile DPDK: - Ubuntu From 7327e246c1ce0b8664585e2c48ddf13196df9944 Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 28 Jun 2015 12:23:13 +0300 Subject: [PATCH 2/7] tests: convert fileiotest.cc to Boost test case convert fileiotest.cc to Boost test case, making it easier to see what is being tested, and to add more file io tests. Signed-off-by: Nadav Har'El --- configure.py | 2 +- tests/fileiotest.cc | 67 ++++++++++++++++++++++----------------------- 2 files changed, 33 insertions(+), 36 deletions(-) diff --git a/configure.py b/configure.py index b4e5a2d772..66ccf53b74 100755 --- a/configure.py +++ b/configure.py @@ -285,7 +285,7 @@ deps = { 'apps/httpd/httpd': ['apps/httpd/demo.json', 'apps/httpd/main.cc'] + http + libnet + core, 'apps/memcached/memcached': ['apps/memcached/memcache.cc'] + memcache_base, 'tests/memcached/test_ascii_parser': ['tests/memcached/test_ascii_parser.cc'] + memcache_base + boost_test_lib, - 'tests/fileiotest': ['tests/fileiotest.cc'] + core, + 'tests/fileiotest': ['tests/fileiotest.cc'] + core + boost_test_lib, 'tests/directory_test': ['tests/directory_test.cc'] + core, 'tests/linecount': ['tests/linecount.cc'] + core, 'tests/echotest': ['tests/echotest.cc'] + core + libnet, diff --git a/tests/fileiotest.cc b/tests/fileiotest.cc index 19b289a56f..08b2ecfd2a 100644 --- a/tests/fileiotest.cc +++ b/tests/fileiotest.cc @@ -16,12 +16,14 @@ * under the License. */ /* - * Copyright (C) 2014 Cloudius Systems, Ltd. + * Copyright (C) 2014-2015 Cloudius Systems, Ltd. */ -#include +#include "tests/test-utils.hh" + +#include "core/semaphore.hh" +#include "core/file.hh" #include "core/reactor.hh" -#include "core/app-template.hh" struct file_test { file_test(file&& f) : f(std::move(f)) {} @@ -30,41 +32,36 @@ struct file_test { semaphore par = { 1000 }; }; -int main(int ac, char** av) { - app_template app; - - return app.run(ac, av, [&app] { - static constexpr auto max = 10000; - engine().open_file_dma("testfile.tmp", open_flags::rw | open_flags::create).then([] (file f) { - auto ft = new file_test{std::move(f)}; - for (size_t i = 0; i < max; ++i) { - ft->par.wait().then([ft, i] { - auto wbuf = allocate_aligned_buffer(4096, 4096); - std::fill(wbuf.get(), wbuf.get() + 4096, i); - auto wb = wbuf.get(); - ft->f.dma_write(i * 4096, wb, 4096).then( - [ft, i, wbuf = std::move(wbuf)] (size_t ret) mutable { - assert(ret == 4096); - auto rbuf = allocate_aligned_buffer(4096, 4096); - auto rb = rbuf.get(); - ft->f.dma_read(i * 4096, rb, 4096).then( - [ft, i, rbuf = std::move(rbuf), wbuf = std::move(wbuf)] (size_t ret) mutable { - assert(ret == 4096); - bool eq = std::equal(rbuf.get(), rbuf.get() + 4096, wbuf.get()); - assert(eq); - ft->sem.signal(1); - ft->par.signal(); - }); +SEASTAR_TEST_CASE(test1) { + // Note: this tests generates a file "testfile.tmp" with size 4096 * max (= 40 MB). + static constexpr auto max = 10000; + return open_file_dma("testfile.tmp", open_flags::rw | open_flags::create).then([] (file f) { + auto ft = new file_test{std::move(f)}; + for (size_t i = 0; i < max; ++i) { + ft->par.wait().then([ft, i] { + auto wbuf = allocate_aligned_buffer(4096, 4096); + std::fill(wbuf.get(), wbuf.get() + 4096, i); + auto wb = wbuf.get(); + ft->f.dma_write(i * 4096, wb, 4096).then( + [ft, i, wbuf = std::move(wbuf)] (size_t ret) mutable { + BOOST_REQUIRE(ret == 4096); + auto rbuf = allocate_aligned_buffer(4096, 4096); + auto rb = rbuf.get(); + ft->f.dma_read(i * 4096, rb, 4096).then( + [ft, i, rbuf = std::move(rbuf), wbuf = std::move(wbuf)] (size_t ret) mutable { + BOOST_REQUIRE(ret == 4096); + BOOST_REQUIRE(std::equal(rbuf.get(), rbuf.get() + 4096, wbuf.get())); + ft->sem.signal(1); + ft->par.signal(); }); }); - } - ft->sem.wait(max).then([ft] () mutable { - return ft->f.flush(); - }).then([ft] () mutable { - std::cout << "done\n"; - delete ft; - engine().exit(0); }); + } + return ft->sem.wait(max).then([ft] () mutable { + return ft->f.flush(); + }).then([ft] () mutable { + std::cout << "done\n"; + delete ft; }); }); } From ab63138134b8047913d3558faefd81439251e201 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 28 Jun 2015 14:39:17 +0300 Subject: [PATCH 3/7] tests: add fileiotest to regression test suite --- test.py | 1 + 1 file changed, 1 insertion(+) diff --git a/test.py b/test.py index b92034d6ed..05083590ce 100755 --- a/test.py +++ b/test.py @@ -34,6 +34,7 @@ boost_tests = [ 'foreign_ptr_test', 'semaphore_test', 'shared_ptr_test', + 'fileiotest', ] other_tests = [ From 20d2154c4d9bdad605e27739213f695c5695a7bb Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 28 Jun 2015 15:09:44 +0300 Subject: [PATCH 4/7] shared_ptr: add use_count() Add a use_count() method for shared_ptr, like std::shared_ptr has. We already had such a method for lw_shared_ptr, but not for shared_ptr. Signed-off-by: Nadav Har'El --- core/shared_ptr.hh | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/core/shared_ptr.hh b/core/shared_ptr.hh index 9e47c1c0d4..56219a07ea 100644 --- a/core/shared_ptr.hh +++ b/core/shared_ptr.hh @@ -432,6 +432,13 @@ public: T* get() const noexcept { return _p; } + long use_count() const noexcept { + if (_b) { + return _b->count; + } else { + return 0; + } + } template struct make_helper; From 6ac01770df12869cca6edbf44e2d844ad84805c1 Mon Sep 17 00:00:00 2001 From: Avi Kivity Date: Sun, 28 Jun 2015 17:14:43 +0300 Subject: [PATCH 5/7] core: avoid fstat() call in reactor thread When creating a file object, we call fstat() to determine whether it's a block device or a regular file. While unlikely, the fstat() call can block. Use an ioctl() that we expect to fail on regular files instead. --- core/file.hh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/core/file.hh b/core/file.hh index e1ab3649ad..991b47ef4c 100644 --- a/core/file.hh +++ b/core/file.hh @@ -104,9 +104,8 @@ public: inline std::unique_ptr make_file_impl(int fd) { - struct stat st; - ::fstat(fd, &st); - if (S_ISBLK(st.st_mode)) { + auto r = ::ioctl(fd, BLKGETSIZE); + if (r != -1) { return std::unique_ptr(new blockdev_file_impl(fd)); } else { return std::unique_ptr(new posix_file_impl(fd)); From b306cee41e2abbf00704627ba59de636ae690e8c Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 28 Jun 2015 19:06:40 +0300 Subject: [PATCH 6/7] fstream: fix behavior of large writes to output stream Our file-output code contains various layers making sometimes contradictory assumptions, and it is a real art-form to make it all work together. They usually do work together well, but there was one undetected bug for large writes to a file output stream: The problem is what happens when we try to write a large buffer (larger than the output stream's buffer) in one output_stream::write() call. By default, output_stream uses the efficient, zero-copy, implementation which calls the underlying data sink's put function on the entire written buffer, without copying it to the stream's buffer first. Unfortunately, this solution does NOT work on *file* output streams. Because of our use of AIO and O_DIRECT, we can only write from aligned buffers, and at aligned (multiple of dma_alignment) sizes. Even a large size cannot be fully written if not a multiple of dma_alignment, and the need to align the buffers, and data already on the output_stream, complicate things further. Amazingly, we already had an option "_trim_to_size" in output_stream to do the right thing, and we just need to enable it for file output stream. In special cases (aligned position, aligned input buffer) it might be possible to do something even more efficient - zero copy and just one write request - but in the general case, _trim_to_size is exactly what we needed. Signed-off-by: Nadav Har'El --- core/fstream.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/fstream.cc b/core/fstream.cc index c375a75c19..c3aefc8761 100644 --- a/core/fstream.cc +++ b/core/fstream.cc @@ -141,6 +141,6 @@ output_stream make_file_output_stream(lw_shared_ptr f, size_t buffer } output_stream make_file_output_stream(lw_shared_ptr f, file_output_stream_options options) { - return output_stream(file_data_sink(std::move(f), options), options.buffer_size); + return output_stream(file_data_sink(std::move(f), options), options.buffer_size, true); } From f687ec40a37273cf1fa20fb002502f36308a05ef Mon Sep 17 00:00:00 2001 From: Nadav Har'El Date: Sun, 28 Jun 2015 18:34:37 +0300 Subject: [PATCH 7/7] fstream: catch bug early the file_data_sink_impl::put() code assumes it is always called on buffers with size multiple of dma alignment (4096), except the *last* one. After writing one unaligned-size buffer, further writes cannot continue because the offset into the file no longer has the right alignment! If a caller does try to do that, there is a bug in the caller (it's not a run-time error, it's a design bug), and better discover it quickly with an assert, as I do in this patch. I had such a caller in an example application, and it took me a whole day of debugging just to figure out that this is where the caller actually had a bug. Signed-off-by: Nadav Har'El Reviewed-by: Raphael S. Carvalho --- core/fstream.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/core/fstream.cc b/core/fstream.cc index c3aefc8761..729dfa77f9 100644 --- a/core/fstream.cc +++ b/core/fstream.cc @@ -92,6 +92,10 @@ public: return temporary_buffer::aligned(file::dma_alignment, size); } virtual future<> put(temporary_buffer buf) override { + // put() must usually be of chunks multiple of file::dma_alignment. + // Only the last part can have an unaligned length. If put() was + // called again with an unaligned pos, we have a bug in the caller. + assert(!(_pos & (file::dma_alignment - 1))); bool truncate = false; auto pos = _pos; _pos += buf.size();