Benchmark & speed up SampleIndexIterator

I'm seeing what is possible performance-wise in the current C++ before
trying out Go and Rust implementations.

* use the google benchmark framework and some real data.

* use release builds - I hadn't done this in a while, and there were a
  few compile errors that manifested only in release mode. Update the
  readme to suggest using a release build.

* optimize the varint decoder and SampleIndexIterator to branch less.

* enable link-time optimization for release builds.

* add some support for feedback-directed optimization. Ideally "make"
  would automatically produce the "generate" build outputs with a
  different object/library/executable suffix, run the generate
  benchmark, and then produce the "use" builds. This is not that fancy;
  you have to run an arcane command:

  alias cmake='cmake -DCMAKE_BUILD_TYPE=Release'
  cmake -DPROFILE_GENERATE=true -DPROFILE_USE=false .. && \
  make recording-bench && \
  src/recording-bench && \
  cmake -DPROFILE_GENERATE=false -DPROFILE_USE=true .. && \
  make recording-bench && \
  perf stat -e cycles,instructions,branches,branch-misses \
      src/recording-bench --benchmark_repetitions=5

  That said, the results are dramatic - at least 50% improvement. (The
  results weren't stable before as small tweaks to the code caused a
  huge shift in performance, presumably something something branch
  alignment something something.)
This commit is contained in:
Scott Lamb 2016-05-19 22:53:23 -07:00
parent d083797e42
commit 0aadf227c1
20 changed files with 273 additions and 91 deletions

1
.gitignore vendored
View File

@ -1,6 +1,7 @@
*.swp
build
debug
release
obj-*
cameras.sql
debian/files

View File

@ -39,8 +39,25 @@ else()
set(CMAKE_CXX_STANDARD 11)
endif()
set(CMAKE_CXX_FLAGS "-Wall -Werror -pedantic-errors ${CMAKE_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -ggdb")
set(CMAKE_CXX_FLAGS "-Wall -Werror -pedantic-errors -ggdb ${CMAKE_CXX_FLAGS}")
option(LTO "Use link-time optimization" ON)
option(FPROFILE_GENERATE "Compile executable to generate usage data" OFF)
option(FPROFILE_USE "Compile executable using generated usage data" OFF)
if(LTO)
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -flto")
set(CMAKE_AR "gcc-ar")
set(CMAKE_RANLIB "gcc-ranlib")
set(CMAKE_LD "gcc-ld")
endif()
if(PROFILE_GENERATE)
set(CMAKE_CXX_FLAGS "-fprofile-generate ${CMAKE_CXX_FLAGS}")
endif()
if(PROFILE_USE)
set(CMAKE_CXX_FLAGS "-fprofile-use -fprofile-correction ${CMAKE_CXX_FLAGS}")
endif()
#
# Dependencies.
@ -115,6 +132,21 @@ set_target_properties(GMock PROPERTIES
IMPORTED_LOCATION "${binary_dir}/${CMAKE_STATIC_LIBRARY_PREFIX}gmock${CMAKE_STATIC_LIBRARY_SUFFIX}"
IMPORTED_LINK_INTERFACE_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
ExternalProject_Add(
GBenchmarkProject
URL "https://github.com/google/benchmark/archive/v1.0.0.tar.gz"
URL_HASH "SHA1=4f778985dce02d2e63262e6f388a24b595254a93"
CMAKE_ARGS
-DCMAKE_BUILD_TYPE:STRING=${CMAKE_BUILD_TYPE}
INSTALL_COMMAND "")
ExternalProject_Get_Property(GBenchmarkProject source_dir binary_dir)
set(GBenchmark_INCLUDE_DIR ${source_dir}/include)
add_library(GBenchmark STATIC IMPORTED)
add_dependencies(GBenchmark GBenchmarkProject)
set_target_properties(GBenchmark PROPERTIES
IMPORTED_LOCATION "${binary_dir}/src/${CMAKE_STATIC_LIBRARY_PREFIX}benchmark${CMAKE_STATIC_LIBRARY_SUFFIX}"
IMPORTED_LINK_INTERFACE_LIBRARIES "${CMAKE_THREAD_LIBS_INIT}")
#
# Subdirectories.
#

View File

@ -142,9 +142,9 @@ For instructions, you can skip to "[Camera configuration and hard disk mounting]
Once prerequisites are installed, Moonfire NVR can be built as follows:
$ mkdir build
$ cd build
$ cmake ..
$ mkdir release
$ cd release
$ cmake -DCMAKE_BUILD_TYPE=Release ..
$ make
$ sudo make install

View File

@ -169,9 +169,9 @@ fi
#
if [ "${FORCE_BUILD:-0}" -eq 1 ]; then
# Remove previous build, if any
[ -d build ] && rm -fr build 2>/dev/null
mkdir build; cd build
cmake .. && make && sudo make install
[ -d release ] && rm -fr release 2>/dev/null
mkdir release; cd release
cmake -DCMAKE_BUILD_TYPE=Release .. && make && sudo make install
if [ -x "${SERVICE_BIN}" ]; then
echo "Binary installed..."; echo
else

View File

@ -73,6 +73,7 @@ install_programs(/bin FILES moonfire-nvr)
# Tests.
include_directories(${GTest_INCLUDE_DIR})
include_directories(${GMock_INCLUDE_DIR})
include_directories(${GBenchmark_INCLUDE_DIR})
set(MOONFIRE_NVR_TESTS
coding
@ -88,8 +89,13 @@ set(MOONFIRE_NVR_TESTS
foreach(test ${MOONFIRE_NVR_TESTS})
add_executable(${test}-test ${test}-test.cc testutil.cc)
target_link_libraries(${test}-test GTest GMock moonfire-nvr-lib )
target_link_libraries(${test}-test GTest GMock moonfire-nvr-lib)
add_test(NAME ${test}-test
COMMAND ${test}-test
WORKING_DIRECTORY ${CMAKE_BINARY_DIR})
endforeach(test)
foreach(bench recording)
add_executable(${bench}-bench ${bench}-bench.cc testutil.cc)
target_link_libraries(${bench}-bench GTest GMock GBenchmark moonfire-nvr-lib)
endforeach(bench)

View File

@ -58,18 +58,53 @@ TEST(VarintTest, Simple) {
EXPECT_EQ(UINT32_C(1), out);
EXPECT_TRUE(DecodeVar32(&p, &out, &error_message));
EXPECT_EQ(UINT32_C(300), out);
EXPECT_EQ(0, p.size());
}
TEST(VarintTest, AllDecodeSizes) {
std::string error_message;
const uint32_t kToDecode[]{
1,
1 | (2 << 7),
1 | (2 << 7) | (3 << 14),
1 | (2 << 7) | (3 << 14) | (4 << 21),
1 | (2 << 7) | (3 << 14) | (4 << 21) | (5 << 28),
};
for (size_t i = 0; i < sizeof(kToDecode) / sizeof(kToDecode[0]); ++i) {
auto in = kToDecode[i];
std::string foo;
AppendVar32(in, &foo);
ASSERT_EQ(i + 1, foo.size());
re2::StringPiece p(foo);
uint32_t out;
// Slow path: last bytes of the buffer.
DecodeVar32(&p, &out, &error_message);
EXPECT_EQ(in, out) << "i: " << i;
EXPECT_EQ(0, p.size()) << "i: " << i;
// Fast path: plenty of bytes in the buffer.
foo.append(4, 0);
p = foo;
DecodeVar32(&p, &out, &error_message);
EXPECT_EQ(in, out);
EXPECT_EQ(4, p.size());
}
}
TEST(VarintTest, DecodeErrors) {
re2::StringPiece empty;
uint32_t out;
std::string error_message;
EXPECT_FALSE(DecodeVar32(&empty, &out, &error_message));
EXPECT_EQ("buffer underrun", error_message);
re2::StringPiece partial("\x80", 1);
EXPECT_FALSE(DecodeVar32(&partial, &out, &error_message));
for (auto input :
{re2::StringPiece("", 0), re2::StringPiece("\x80", 1),
re2::StringPiece("\x80\x80", 2), re2::StringPiece("\x80\x80\x80", 3),
re2::StringPiece("\x80\x80\x80\x80", 4)}) {
EXPECT_FALSE(DecodeVar32(&input, &out, &error_message)) << "input: "
<< input;
EXPECT_EQ("buffer underrun", error_message);
}
re2::StringPiece too_big("\x80\x80\x80\x80\x10", 5);
EXPECT_FALSE(DecodeVar32(&too_big, &out, &error_message));

View File

@ -31,6 +31,7 @@
// coding.cc: see coding.h.
#include "coding.h"
#include "common.h"
namespace moonfire_nvr {
@ -50,24 +51,56 @@ void AppendVar32Slow(uint32_t in, std::string *out) {
bool DecodeVar32Slow(re2::StringPiece *in, uint32_t *out_p,
std::string *error_message) {
// The fast path is inlined; this function is called only when
// byte 0 is present and >= 0x80.
size_t left = in->size() - 1;
auto p = reinterpret_cast<uint8_t const *>(in->data());
auto end = p + in->size();
uint32_t out = 0;
int shift = 0;
do {
if (p == end) {
*error_message = "buffer underrun";
return false;
}
if (shift == 28 && *p & 0xf0) {
uint32_t v = uint32_t(p[0] & 0x7f);
size_t size = 1;
// Aid branch prediction in two ways:
// * have a faster path which doesn't check for buffer underrun on every
// byte if there's plenty of bytes left or the last byte is not continued.
// * fully unroll the loop
if (left >= 4 || (p[left] & 0x80) == 0) {
v |= uint32_t(p[size] & 0x7f) << 7;
if (p[size++] & 0x80) {
v |= uint32_t(p[size] & 0x7f) << 14;
if (p[size++] & 0x80) {
v |= uint32_t(p[size] & 0x7f) << 21;
if (p[size++] & 0x80) {
if (UNLIKELY(p[size] & 0xf0)) {
*error_message = "integer overflow";
return false;
}
out |= uint32_t(*p & 0x7f) << shift;
shift += 7;
} while ((*p++ & 0x80) != 0);
*out_p = out;
in->remove_prefix(reinterpret_cast<char const *>(p) - in->data());
v |= uint32_t(p[size++] & 0x7f) << 28;
}
}
}
*out_p = v;
in->remove_prefix(size);
return true;
}
// Slowest path.
if (LIKELY(left)) {
v |= uint32_t(p[size] & 0x7f) << 7;
if (p[size++] & 0x80 && --left > 0) {
v |= uint32_t(p[size] & 0x7f) << 14;
if (p[size++] & 0x80 && --left > 0) {
v |= uint32_t(p[size] & 0x7f) << 21;
if (p[size++] & 0x80) {
--left;
}
}
}
}
if (UNLIKELY(left == 0 && p[size - 1] & 0x80)) {
*error_message = "buffer underrun";
return false;
}
*out_p = v;
in->remove_prefix(size);
return true;
}

View File

@ -33,6 +33,9 @@
#ifndef MOONFIRE_NVR_COMMON_H
#define MOONFIRE_NVR_COMMON_H
#define LIKELY(x) __builtin_expect((x), 1)
#define UNLIKELY(x) __builtin_expect((x), 0)
namespace moonfire_nvr {
// Return value for *ForEach callbacks.

View File

@ -45,7 +45,6 @@
#include <event2/buffer.h>
#include <event2/http.h>
#include <glog/logging.h>
#include <gmock/gmock.h>
#include <re2/stringpiece.h>
#include "common.h"
@ -96,42 +95,6 @@ class File {
virtual int Write(re2::StringPiece data, size_t *bytes_written) = 0;
};
class MockFile : public File {
public:
MOCK_CONST_METHOD0(name, const std::string &());
MOCK_METHOD3(Access, int(const char *, int, int));
MOCK_METHOD0(Close, int());
// The std::unique_ptr<File> variants of Open are wrapped here because gmock's
// SetArgPointee doesn't work well with std::unique_ptr.
int Open(const char *path, int flags, std::unique_ptr<File> *f) final {
File *f_tmp = nullptr;
int ret = OpenRaw(path, flags, &f_tmp);
f->reset(f_tmp);
return ret;
}
int Open(const char *path, int flags, mode_t mode,
std::unique_ptr<File> *f) final {
File *f_tmp = nullptr;
int ret = OpenRaw(path, flags, mode, &f_tmp);
f->reset(f_tmp);
return ret;
}
MOCK_METHOD3(Open, int(const char *, int, int *));
MOCK_METHOD4(Open, int(const char *, int, mode_t, int *));
MOCK_METHOD3(OpenRaw, int(const char *, int, File **));
MOCK_METHOD4(OpenRaw, int(const char *, int, mode_t, File **));
MOCK_METHOD3(Read, int(void *, size_t, size_t *));
MOCK_METHOD1(Stat, int(struct stat *));
MOCK_METHOD0(Sync, int());
MOCK_METHOD1(Truncate, int(off_t));
MOCK_METHOD1(Unlink, int(const char *));
MOCK_METHOD2(Write, int(re2::StringPiece, size_t *));
};
// Interface to the local filesystem. There's typically one per program,
// but it's an abstract class for testability. Thread-safe.
class Filesystem {

View File

@ -387,7 +387,11 @@ void HttpServe(const std::shared_ptr<VirtualFile> &file, evhttp_request *req) {
<< ": Client requested whole file of size " << file->size();
http_status = HTTP_OK;
http_status_str = "OK";
break;
}
default:
LOG(FATAL) << "unexpected range_type: " << static_cast<int>(range_type);
}
// Successful reply started; add common headers and send.

View File

@ -61,6 +61,7 @@
#define MOONFIRE_NVR_MOONFIRE_DB_H
#include <functional>
#include <map>
#include <memory>
#include <string>
#include <vector>

View File

@ -36,6 +36,7 @@
#ifndef MOONFIRE_NVR_MP4_H
#define MOONFIRE_NVR_MP4_H
#include <limits>
#include <memory>
#include <vector>

66
src/recording-bench.cc Normal file
View File

@ -0,0 +1,66 @@
// This file is part of Moonfire NVR, a security camera network video recorder.
// Copyright (C) 2016 Scott Lamb <slamb@slamb.org>
//
// This program is free software: you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// In addition, as a special exception, the copyright holders give
// permission to link the code of portions of this program with the
// OpenSSL library under certain conditions as described in each
// individual source file, and distribute linked combinations including
// the two.
//
// You must obey the GNU General Public License in all respects for all
// of the code used other than OpenSSL. If you modify file(s) with this
// exception, you may extend this exception to your version of the
// file(s), but you are not obligated to do so. If you do not wish to do
// so, delete this exception statement from your version. If you delete
// this exception statement from all source files in the program, then
// also delete it here.
//
// This program is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with this program. If not, see <http://www.gnu.org/licenses/>.
//
// recording-bench.cc: benchmarks of the recording.h interface.
#include <benchmark/benchmark.h>
#include <gflags/gflags.h>
#include "recording.h"
#include "testutil.h"
DECLARE_bool(alsologtostderr);
static void BM_Iterator(benchmark::State &state) {
using moonfire_nvr::ReadFileOrDie;
using moonfire_nvr::SampleIndexIterator;
// state.PauseTiming();
std::string index = ReadFileOrDie("../src/testdata/video_sample_index.bin");
// state.ResumeTiming();
while (state.KeepRunning()) {
SampleIndexIterator it(index);
while (!it.done()) it.Next();
CHECK(!it.has_error()) << it.error();
}
state.SetBytesProcessed(int64_t(state.iterations()) * int64_t(index.size()));
}
BENCHMARK(BM_Iterator);
int main(int argc, char **argv) {
FLAGS_alsologtostderr = true;
// Sadly, these two flag-parsing libraries don't appear to get along.
// google::ParseCommandLineFlags(&argc, &argv, true);
benchmark::Initialize(&argc, argv);
google::InitGoogleLogging(argv[0]);
benchmark::RunSpecifiedBenchmarks();
return 0;
}

View File

@ -40,6 +40,7 @@
#include "recording.h"
#include "string.h"
#include "testutil.h"
DECLARE_bool(alsologtostderr);

View File

@ -36,6 +36,7 @@
#include <sys/stat.h>
#include <sys/types.h>
#include "common.h"
#include "coding.h"
#include "string.h"
@ -81,22 +82,23 @@ void SampleIndexEncoder::AddSample(int32_t duration_90k, int32_t bytes,
void SampleIndexIterator::Next() {
uint32_t raw1;
uint32_t raw2;
pos_ += bytes_internal();
if (data_.empty() || !DecodeVar32(&data_, &raw1, &error_) ||
!DecodeVar32(&data_, &raw2, &error_)) {
pos_ += bytes_;
if (UNLIKELY(data_.empty()) ||
UNLIKELY(!DecodeVar32(&data_, &raw1, &error_)) ||
UNLIKELY(!DecodeVar32(&data_, &raw2, &error_))) {
done_ = true;
return;
}
start_90k_ += duration_90k_;
int32_t duration_90k_delta = Unzigzag32(raw1 >> 1);
duration_90k_ += duration_90k_delta;
if (duration_90k_ < 0) {
if (UNLIKELY(duration_90k_ < 0)) {
error_ = StrCat("negative duration ", duration_90k_,
" after applying delta ", duration_90k_delta);
done_ = true;
return;
}
if (duration_90k_ == 0 && !data_.empty()) {
if (UNLIKELY(duration_90k_ == 0 && !data_.empty())) {
error_ = StrCat("zero duration only allowed at end; have ", data_.size(),
"bytes left.");
done_ = true;
@ -104,15 +106,15 @@ void SampleIndexIterator::Next() {
}
is_key_ = raw1 & 0x01;
int32_t bytes_delta = Unzigzag32(raw2);
if (is_key_) {
bytes_key_ += bytes_delta;
if (UNLIKELY(is_key_)) {
bytes_ = bytes_key_ += bytes_delta;
} else {
bytes_nonkey_ += bytes_delta;
bytes_ = bytes_nonkey_ += bytes_delta;
}
if (bytes_internal() <= 0) {
error_ = StrCat("non-positive bytes ", bytes_internal(),
" after applying delta ", bytes_delta, " to ",
(is_key_ ? "key" : "non-key"), " frame at ts ", start_90k_);
if (UNLIKELY(bytes_ <= 0)) {
error_ = StrCat("non-positive bytes ", bytes_, " after applying delta ",
bytes_delta, " to ", (is_key_ ? "key" : "non-key"),
" frame at ts ", start_90k_);
done_ = true;
return;
}
@ -128,6 +130,7 @@ void SampleIndexIterator::Clear() {
duration_90k_ = 0;
bytes_key_ = 0;
bytes_nonkey_ = 0;
bytes_ = 0;
is_key_ = false;
done_ = true;
}

View File

@ -134,7 +134,7 @@ class SampleIndexIterator {
int32_t end_90k() const { return start_90k_ + duration_90k(); }
int32_t bytes() const {
DCHECK(!done_);
return bytes_internal();
return bytes_;
}
bool is_key() const {
DCHECK(!done_);
@ -144,16 +144,12 @@ class SampleIndexIterator {
private:
void Clear();
// Return the bytes taken by the current sample, or 0 after Clear().
int64_t bytes_internal() const {
return is_key_ ? bytes_key_ : bytes_nonkey_;
}
re2::StringPiece data_;
std::string error_;
int64_t pos_;
int32_t start_90k_;
int32_t duration_90k_;
int32_t bytes_; // bytes taken by the current sample, or 0 after Clear().
int32_t bytes_key_;
int32_t bytes_nonkey_;
bool is_key_;

View File

@ -376,7 +376,7 @@ Statement Database::Prepare(re2::StringPiece sql, size_t *used,
bool RunStatements(DatabaseContext *ctx, re2::StringPiece stmts,
std::string *error_message) {
while (true) {
size_t used;
size_t used = 0;
auto stmt = ctx->db()->Prepare(stmts, &used, error_message);
if (!stmt.valid()) {
// Statement didn't parse. If |error_message| is empty, there are just no

BIN
src/testdata/video_sample_index.bin vendored Normal file

Binary file not shown.

View File

@ -37,7 +37,9 @@
#include <gmock/gmock.h>
#include <re2/stringpiece.h>
#include "filesystem.h"
#include "http.h"
#include "uuid.h"
namespace moonfire_nvr {
@ -106,6 +108,47 @@ class ScopedMockLog : public google::LogSink {
LogEntry pending_;
};
class MockUuidGenerator : public UuidGenerator {
public:
MOCK_METHOD0(Generate, Uuid());
};
class MockFile : public File {
public:
MOCK_CONST_METHOD0(name, const std::string &());
MOCK_METHOD3(Access, int(const char *, int, int));
MOCK_METHOD0(Close, int());
// The std::unique_ptr<File> variants of Open are wrapped here because gmock's
// SetArgPointee doesn't work well with std::unique_ptr.
int Open(const char *path, int flags, std::unique_ptr<File> *f) final {
File *f_tmp = nullptr;
int ret = OpenRaw(path, flags, &f_tmp);
f->reset(f_tmp);
return ret;
}
int Open(const char *path, int flags, mode_t mode,
std::unique_ptr<File> *f) final {
File *f_tmp = nullptr;
int ret = OpenRaw(path, flags, mode, &f_tmp);
f->reset(f_tmp);
return ret;
}
MOCK_METHOD3(Open, int(const char *, int, int *));
MOCK_METHOD4(Open, int(const char *, int, mode_t, int *));
MOCK_METHOD3(OpenRaw, int(const char *, int, File **));
MOCK_METHOD4(OpenRaw, int(const char *, int, mode_t, File **));
MOCK_METHOD3(Read, int(void *, size_t, size_t *));
MOCK_METHOD1(Stat, int(struct stat *));
MOCK_METHOD0(Sync, int());
MOCK_METHOD1(Truncate, int(off_t));
MOCK_METHOD1(Unlink, int(const char *));
MOCK_METHOD2(Write, int(re2::StringPiece, size_t *));
};
} // namespace moonfire_nvr
#endif // MOONFIRE_NVR_TESTUTIL_H

View File

@ -34,7 +34,6 @@
#ifndef MOONFIRE_NVR_UUID_H
#define MOONFIRE_NVR_UUID_H
#include <gmock/gmock.h>
#include <re2/stringpiece.h>
#include <uuid/uuid.h>
@ -75,11 +74,6 @@ class UuidGenerator {
virtual Uuid Generate() = 0;
};
class MockUuidGenerator : public UuidGenerator {
public:
MOCK_METHOD0(Generate, Uuid());
};
UuidGenerator *GetRealUuidGenerator();
} // namespace moonfire_nvr