From f5120af4b736e7a5ff106c8dc60d9e8dc0d4b748 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 15:53:33 -0700 Subject: [PATCH 01/21] Update Beast subtree to v41 --- CHANGELOG.md | 6 ++++++ extras/beast | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 124698c..a5e3e59 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,9 @@ +1.0.3 + +* Update Beast subtree to v41 + +-------------------------------------------------------------------------------- + 1.0.2 * Better CMakeLists.txt for finding Boost diff --git a/extras/beast b/extras/beast index 2f9a844..823aee2 160000 --- a/extras/beast +++ b/extras/beast @@ -1 +1 @@ -Subproject commit 2f9a8440c2432d8a196571d6300404cb76314125 +Subproject commit 823aee222a6d923299cf680403d8adcd0c53b1e3 From 42b5fa9b0a72779f8453ba7dd6b82f249d79b084 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 14:42:46 -0700 Subject: [PATCH 02/21] Update Travis targets and build scripts --- .travis.yml | 106 ++++++++++++++++++++++++++++++++----------------- CHANGELOG.md | 21 +++++----- CMakeLists.txt | 29 +++++++------- Jamroot | 22 ++-------- 4 files changed, 98 insertions(+), 80 deletions(-) diff --git a/.travis.yml b/.travis.yml index afc790f..65fa179 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,7 @@ +notifications: + email: + false + language: cpp env: @@ -13,27 +17,28 @@ env: - BOOST_ROOT=$HOME/boost_1_61_0 - BOOST_URL='http://sourceforge.net/projects/boost/files/boost/1.61.0/boost_1_61_0.tar.gz' -packages: &gcc5_pkgs - - gcc-5 - - g++-5 - - python-software-properties - - libssl-dev - - libffi-dev - - libstdc++6 - - binutils-gold - # Provides a backtrace if the unittests crash - - gdb - # Needed for installing valgrind - - subversion - - automake - - autotools-dev - - libc6-dbg - # Needed to build rocksdb - - libsnappy-dev +addons: + apt: + sources: &base_sources + - ubuntu-toolchain-r-test + packages: &base_packages + - python-software-properties + - libffi-dev + - libstdc++6 + - binutils-gold + # Provides a backtrace if the unittests crash + - gdb + # Needed for installing valgrind + - subversion + - automake + - autotools-dev + - libc6-dbg + # Needed to build rocksdb + - libsnappy-dev matrix: include: - # GCC/Coverage/Autobahn + # gcc coverage (+valgrind if master or develop) - compiler: gcc env: - GCC_VER=5 @@ -41,33 +46,66 @@ matrix: - ADDRESS_MODEL=64 - BUILD_SYSTEM=cmake - PATH=$PWD/cmake/bin:$PATH - addons: &ao_gcc5 + addons: apt: - sources: ['ubuntu-toolchain-r-test'] - packages: *gcc5_pkgs + packages: + - gcc-5 + - g++-5 + - *base_packages + sources: + - *base_sources - # Clang/UndefinedBehaviourSanitizer - - compiler: clang + # older GCC, release + - compiler: gcc env: - - GCC_VER=5 - - VARIANT=usan - - CLANG_VER=3.8 + - GCC_VER=4.9 + - VARIANT=release + - ADDRESS_MODEL=64 + addons: + apt: + packages: + - gcc-4.9 + - g++-4.9 + - *base_packages + sources: + - *base_sources + + # later GCC + - compiler: gcc + env: + - GCC_VER=6 + - VARIANT=release - ADDRESS_MODEL=64 - - UBSAN_OPTIONS='print_stacktrace=1' - BUILD_SYSTEM=cmake - PATH=$PWD/cmake/bin:$PATH - - PATH=$PWD/llvm-$LLVM_VERSION/bin:$PATH - addons: *ao_gcc5 + addons: + apt: + packages: + - gcc-6 + - g++-6 + - *base_packages + sources: + - *base_sources - # Clang/AddressSanitizer + # clang ubsan+asan - compiler: clang env: - GCC_VER=5 - - VARIANT=asan + - VARIANT=ubasan - CLANG_VER=3.8 - ADDRESS_MODEL=64 + - UBSAN_OPTIONS='print_stacktrace=1' + - BUILD_SYSTEM=cmake + - PATH=$PWD/cmake/bin:$PATH - PATH=$PWD/llvm-$LLVM_VERSION/bin:$PATH - addons: *ao_gcc5 + addons: + apt: + packages: + - gcc-5 + - g++-5 + - *base_packages + sources: + - *base_sources cache: directories: @@ -84,7 +122,3 @@ script: after_script: - cat nohup.out || echo "nohup.out already deleted" - -notifications: - email: - false diff --git a/CHANGELOG.md b/CHANGELOG.md index a5e3e59..1b2ce74 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ 1.0.3 * Update Beast subtree to v41 +* Update Travis targets and build scripts -------------------------------------------------------------------------------- @@ -11,33 +12,33 @@ * Remove error::success (API Change) * Update Travis to Boost 1.61.0 ---- +-------------------------------------------------------------------------------- 1.0.1 * Travis: Limit the number of jobs ---- +-------------------------------------------------------------------------------- 1.0.0 * First Official Release! * Fix badge markdown in README.md ---- +-------------------------------------------------------------------------------- 1.0.0-b7 * Fix doc typos * Improve file creation on POSIX ---- +-------------------------------------------------------------------------------- 1.0.0-b6 * Fix incorrect file deletion in create() ---- +-------------------------------------------------------------------------------- 1.0.0-b5 @@ -52,7 +53,7 @@ * Improved commit process * Dynamic block size in custom allocator ---- +-------------------------------------------------------------------------------- 1.0.0-b4 @@ -61,12 +62,12 @@ * Throw on API calls when no database open * Benchmarks vs. RocksDB -### API Changes: +API Changes: * `insert` sets `error::key_exists` instead of returning `false` * `fetch` sets `error::key_not_found` instead of returning `false` ---- +-------------------------------------------------------------------------------- 1.0.0-b3 @@ -77,14 +78,14 @@ * Add visit test * Improved coverage ---- +-------------------------------------------------------------------------------- 1.0.0-b2 * Minor documentation and tidying * Add CHANGELOG ---- +-------------------------------------------------------------------------------- 1.0.0-b1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ea7a04..6ded137 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,25 +82,24 @@ endfunction () #------------------------------------------------------------------------------- if ("${VARIANT}" STREQUAL "coverage") - set (CMAKE_CXX_FLAGS + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage") - set (CMAKE_BUILD_TYPE RELWITHDEBINFO) + set(CMAKE_BUILD_TYPE RELWITHDEBINFO) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lgcov") -elseif ("${VARIANT}" STREQUAL "asan") - set (CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address") - set (CMAKE_BUILD_TYPE RELWITHDEBINFO) -elseif ("${VARIANT}" STREQUAL "usan") - set (CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-omit-frame-pointer") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") - set (CMAKE_BUILD_TYPE RELWITHDEBINFO) + +elseif ("${VARIANT}" STREQUAL "ubasan") + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -funsigned-char -fno-omit-frame-pointer -fsanitize=address,undefined") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address,undefined") + set(CMAKE_BUILD_TYPE RELWITHDEBINFO) + elseif ("${VARIANT}" STREQUAL "debug") - set (CMAKE_BUILD_TYPE DEBUG) + set(CMAKE_BUILD_TYPE DEBUG) + elseif ("${VARIANT}" STREQUAL "release") - set (CMAKE_BUILD_TYPE RELEASE) -endif () + set(CMAKE_BUILD_TYPE RELEASE) + +endif() include_directories ( include diff --git a/Jamroot b/Jamroot index 7c140b8..8f13a5c 100644 --- a/Jamroot +++ b/Jamroot @@ -19,28 +19,12 @@ variant coverage "-lgcov" ; -variant asan +variant ubasan : release : - "-fsanitize=address -fno-omit-frame-pointer" - "-fsanitize=address" - ; - -variant msan - : - debug - : - "-fsanitize=memory -fno-omit-frame-pointer -fsanitize-memory-track-origins=2 -fsanitize-memory-use-after-dtor" - "-fsanitize=memory" - ; - -variant usan - : - debug - : - "-fsanitize=undefined -fno-omit-frame-pointer" - "-fsanitize=undefined" + "-funsigned-char -fno-omit-frame-pointer -fsanitize=address,undefined" + "-fsanitize=address,undefined" ; project nudb From 7b8c8566c114c5446dc9f41cb0696d275863f9bb Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 14:35:27 -0700 Subject: [PATCH 03/21] Update example programs --- CHANGELOG.md | 1 + examples/CMakeLists.txt | 24 +++++++ examples/Jamfile | 8 +++ examples/crash.cpp | 45 ++++++++++++++ examples/example.cpp | 135 +++++++++++++++++++++++++++++----------- examples/recover.cpp | 68 ++++++++++++++++++++ 6 files changed, 245 insertions(+), 36 deletions(-) create mode 100644 examples/crash.cpp create mode 100644 examples/recover.cpp diff --git a/CHANGELOG.md b/CHANGELOG.md index 1b2ce74..9423135 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ * Update Beast subtree to v41 * Update Travis targets and build scripts +* Update example programs -------------------------------------------------------------------------------- diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ab7e100..93cd446 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,6 +4,18 @@ GroupSources (include/nudb nudb) GroupSources (extras/nudb extras) GroupSources (examples/ "/") +add_executable (crash + ${NUDB_INCLUDES} + ${EXTRAS_INCLUDES} + crash.cpp +) + +if (WIN32) + target_link_libraries (crash ${Boost_LIBRARIES}) +else () + target_link_libraries (crash ${Boost_LIBRARIES} rt Threads::Threads) +endif () + add_executable (example ${NUDB_INCLUDES} ${EXTRAS_INCLUDES} @@ -15,3 +27,15 @@ if (WIN32) else () target_link_libraries (example ${Boost_LIBRARIES} rt Threads::Threads) endif () + +add_executable (recover + ${NUDB_INCLUDES} + ${EXTRAS_INCLUDES} + recover.cpp +) + +if (WIN32) + target_link_libraries (recover ${Boost_LIBRARIES}) +else () + target_link_libraries (recover ${Boost_LIBRARIES} rt Threads::Threads) +endif () diff --git a/examples/Jamfile b/examples/Jamfile index d165cc9..977b4c3 100644 --- a/examples/Jamfile +++ b/examples/Jamfile @@ -7,6 +7,14 @@ import os ; +exe crash : + crash.cpp + ; + exe example : example.cpp ; + +exe recover : + recover.cpp + ; diff --git a/examples/crash.cpp b/examples/crash.cpp new file mode 100644 index 0000000..647a526 --- /dev/null +++ b/examples/crash.cpp @@ -0,0 +1,45 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +//[ crash + +// simulates system crash while in use. + +#include + +#include // store + +#include +#include // std::uint32_t, std::uint64_t +#include // std::abort + +int main(){ + boost::system::error_code ec; + + // key type for this example + using ssn = std::uint64_t; + + // Open an existing database + nudb::store db; + db.open("db.dat", "db.key", "db.log", ec); + if(ec){ + std::cerr << "open failed: " << ec.message() << std::endl; + return 1; + } + std::cerr << "open successful" << '\n'; + + ssn key = 777777777L; + const char * name = "george"; + + // insert a ssn/name pair + db.insert(& key, name, std::strlen(name), ec); + + // simulate a crash + std::abort(); + + return 1; +} +//] diff --git a/examples/example.cpp b/examples/example.cpp index c9a9696..85a3446 100644 --- a/examples/example.cpp +++ b/examples/example.cpp @@ -4,43 +4,106 @@ // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // +//[ simple_example +#include -#include -#include -#include - -int main() -{ - using namespace nudb; - std::size_t constexpr N = 1000; - using key_type = std::uint32_t; - error_code ec; - auto const dat_path = "db.dat"; - auto const key_path = "db.key"; - auto const log_path = "db.log"; - create( - dat_path, key_path, log_path, - 1, - make_salt(), - sizeof(key_type), - block_size("."), - 0.5f, - ec); - store db; +#include // xxhasher +#include // create +#include // store +#include // path_type + +#include +#include // uint32_t, uint64_t +#include // pair +#include // string +#include // fill, copy_n, min +#include + +int main(){ + // error code returned by NuDb operations + boost::system::error_code ec; + + // key type for this example - a social security number + using ssn = std::uint64_t; + + // (1) File Names + const nudb::path_type dat_path = "db.dat"; + const nudb::path_type key_path = "db.key"; + const nudb::path_type log_path = "db.log"; + + // (2) Create a new database + // given names of data, key and log files + nudb::create( + dat_path, // path name of data file + key_path, // path name of key file + log_path, // path name of log file + 1, // application number + nudb::make_salt(), // random seed + sizeof(ssn), + nudb::block_size("."), // block size of current directory + 0.5f, // load factor + ec // reference to return code + ); + if(ec){ + std::cerr << "creation failed:" << ec.message() << std::endl; + return 1; + } + std::cerr << "creation successful" << '\n'; + + // (3) Open an existing database + nudb::store db; db.open(dat_path, key_path, log_path, ec); - char data = 0; - // Insert - for(key_type i = 0; i < N; ++i) - db.insert(&i, &data, sizeof(data), ec); - // Fetch - for(key_type i = 0; i < N; ++i) - db.fetch(&i, - [&](void const* buffer, std::size_t size) - { - // do something with buffer, size - }, ec); + if(ec){ + std::cerr << "open failed:" << ec.message() << std::endl; + return 1; + } + std::cerr << "open successful" << '\n'; + + const std::pair input_data[] = { + {123456789L, "bob"}, + {999999999L, "carol"}, + {987654321L, "ted"}, + {666666666L, "alice"} + }; + + // (4) Insert key/value pairs + // insert ssn/name pairs + for(const auto & p : input_data){ + db.insert(& p.first, p.second, std::strlen(p.second), ec); + if(ec){ + std::cerr << "insertion failed:" << ec.message() << std::endl; + return 1; + } + } + std::cerr << "inserted 4 records" << '\n'; + + // (5) Fetch a value given it's key + // get carol's address + ssn key = 999999999L; + std::string address; + db.fetch( + & key, + [&](void const * buffer, std::size_t size){ + address = std::string(static_cast(buffer), size); + }, + ec + ); + if(ec){ + std::cerr << "fetch failed:" << ec.message() << std::endl; + return 1; + } + std::cerr + << "given ssn=" << key << ", " + << "retrieved " << address << '\n'; + + // (6) Terminate access to the database db.close(ec); - erase_file(dat_path); - erase_file(key_path); - erase_file(log_path); + if(ec){ + std::cerr << "close failed:" << ec.message() << std::endl; + return 1; + } + std::cerr << "close successful" << '\n'; + + return 0; } +//] diff --git a/examples/recover.cpp b/examples/recover.cpp new file mode 100644 index 0000000..1bbecf0 --- /dev/null +++ b/examples/recover.cpp @@ -0,0 +1,68 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +//[ recover +#include + +#include // xxhasher +#include // create +#include // store +#include +#include + +#include +#include // std::uint32_t, std::uint64_t + +int main(){ + boost::system::error_code ec; + + // key type for this example + using ssn = std::uint64_t; + + // Open an existing database + nudb::store db; + db.open("db.dat", "db.key", "db.log", ec); + if(ec){ + std::cerr << "open failed: " << ec.message() << std::endl; + return 1; + } + std::cerr << "open successful" << '\n'; + + nudb::visit( + "db.dat", + [&](// called with each item found in the data file + void const* key, // A pointer to the item key + std::size_t key_size, // The size of the key (always the same) + void const* data, // A pointer to the item data + std::size_t data_size, // The size of the item data + boost::system::error_code& ec // Indicates an error (out parameter) + ){ + if(ec){ + std::cerr << "visit failed: " << ec.message() << std::endl; + return std::terminate(); + } + std::cerr + << "key: " << * static_cast(key) << '\n' + << "name: " << + std::string(static_cast(data), data_size) << '\n' + ; + }, + [&](// called to indicate progress of visitation + std::uint64_t amount, // Amount of work done so far + std::uint64_t total // Total amount of work to do + ){ + // we ignore this information in this example + }, + ec // result of visit operation + ); + if(ec){ + std::cerr << "visit failed: " << ec.message() << std::endl; + return 1; + } + return 0; +} + +//] From 0339cbbbdf3a00cd7c39805f6b58f0cd0cb6212a Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 16:48:21 -0700 Subject: [PATCH 04/21] Split up doc files --- CHANGELOG.md | 1 + doc/example.qbk | 58 ++++++++++ doc/main.qbk | 292 +++-------------------------------------------- doc/overview.qbk | 100 ++++++++++++++++ doc/tool.qbk | 19 +++ doc/usage.qbk | 122 ++++++++++++++++++++ 6 files changed, 314 insertions(+), 278 deletions(-) create mode 100644 doc/example.qbk create mode 100644 doc/overview.qbk create mode 100644 doc/tool.qbk create mode 100644 doc/usage.qbk diff --git a/CHANGELOG.md b/CHANGELOG.md index 9423135..f1e21b7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,7 @@ * Update Beast subtree to v41 * Update Travis targets and build scripts * Update example programs +* Split up doc files -------------------------------------------------------------------------------- diff --git a/doc/example.qbk b/doc/example.qbk new file mode 100644 index 0000000..8f78826 --- /dev/null +++ b/doc/example.qbk @@ -0,0 +1,58 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:example Example] + +This complete program creates a database, opens the database, inserts several +key/value pairs, fetches the key/value pairs, closes the database, then erases +the database files. Source code for this program is located in the examples +directory. + +``` +#include +#include +#include + +int main() +{ + using namespace nudb; + std::size_t constexpr N = 1000; + using key_type = std::uint32_t; + error_code ec; + auto const dat_path = "db.dat"; + auto const key_path = "db.key"; + auto const log_path = "db.log"; + create( + dat_path, key_path, log_path, + 1, + make_salt(), + sizeof(key_type), + block_size("."), + 0.5f, + ec); + store db; + db.open(dat_path, key_path, log_path, + 16 * 1024 * 1024, ec); + char data = 0; + // Insert + for(key_type i = 0; i < N; ++i) + db.insert(&i, &data, sizeof(data), ec); + // Fetch + for(key_type i = 0; i < N; ++i) + db.fetch(&i, + [&](void const* buffer, std::size_t size) + { + // do something with buffer, size + }, ec); + db.close(ec); + erase_file(dat_path); + erase_file(key_path); + erase_file(log_path); +} +``` + +[endsect] diff --git a/doc/main.qbk b/doc/main.qbk index a3a2493..ae0be40 100644 --- a/doc/main.qbk +++ b/doc/main.qbk @@ -30,21 +30,31 @@ An overview of features, requirements, and credits, plus rationale and design information. ]] + [[ [link nudb.example Example] ][ An example that illustrates the use of NuDB. ]] + [[ [link nudb.usage Usage] ][ An explanation of operations on the database. ]] + + [[ + [link nudb.tool Command Line Tool] + ][ + Instructions on using the nudb command line tool. + ]] + [[ [link nudb.ref Reference] ][ Detailed class and function reference. ]] + [[ [link nudb.index Index] ][ @@ -52,284 +62,10 @@ ]] ] -[section:overview Overview] - -NuDB is an append only, key/value store specifically optimized for random -read performance on modern SSDs or equivalent high-IOPS devices. The most -common application for NuDB is content addressible storage where a -cryptographic digest of the data is used as the key. The read performance -and memory usage are independent of the size of the database. These are -some other features: - -[heading History] - -The first versions of rippled, the application behind the Ripple consensus -network, used SQLite as their back end for unstructured data. The -performance quickly became a limiting factor. - -Then rippled then went through a series of back ends including LMDB, LevelDB, and -RocksDB. Each of these databases performed well at first, but as the data -size increased, memory usage increased and performance dropped off drastically. - -The problem is caching. Each of these databases relies on some O(n) data -structure, such as a Bloom filter, to improve their performance. These work -well until the structures no longer fit in memory. In addition, many virtual -machines are memory constrained. - -To address this issue, the developers performed a thought experiment -- if -you assume the data size is so large that no O(n) caching is effective, what -is the best read performance you could expect? They reached the following -conclusions: - -1) Writes should not block reads. -2) Reads should be limited only by the SSD's IOPS limit. -3) A read for a non-present key should require one IOP. -4) A read for a present key whose data can be read in a single IOP should -only require two IOPs, one to figure out where it is and one to read it in. - -NuDB is designed to come as close to this ideal as possible. - -[heading Design] - -NuDB uses three files to hold the data and indexes. The data file is append -only and contains sufficient information to rebuild the index. The index -file is random access and contains hash buckets. When an update is in -progress, a temporary journal file is used to roll the update back if -needed. - -NuDB uses linear hashing to dynamically increase the number of buckets in -the index file as the data size grows. Bucket overflows are handled by -adding "overflow" records to the data file. Bucket overflows can be -minimized by increasing the number of buckets, leading to a size/speed -tradeoff. Typical databases keep the average bucket half full (or half -empty, depending on your point of view) resulting in spill records -accounting for less than 1% of reads. - -Inserts are buffered in memory and appended to the data file immediately. -Updates to the index file are performed as an atomic operation. Fetch -operations retrieve records in the process of being modified from memory -during the update operation so that writes do not block fetches. - -Before the index file is modified, a journal file is created to recover -consistency in the event of a crash during the update. The recovery process -will index all records written to the data file, so the aggregation of index -updates does not increase the time which a crash would result in loss of -data. - -Iteration can be performed on the data file directly. Since it is append -only, there is no risk of other operations corrupting an iteration in -progress. - -[heading Performance] - -Writes do not block reads. Read rates are typically around 90% of the SSD's -IOPS limit. An average fetch for a non-present key typically requires fewer -than 1.01 IOPs. An average fetch for a present key requires fewer than 1.01 -IOPs plus however many IOPs it takes to read the data. - -[heading Applications] - -Content addressable storage associates data with its cryptographic digest. -This type of storage is commonly used in decentralized blockchain applications. - -Often these applications require following hash chains -- where one object -contains the hash of another object that ultimately leads to the object -desired. NuDB's low latency and high speed are particularly advantageous -in these kinds of applications. - -NuDB is append only and does not support a delete operation. To support -retaining limited historical information, NuDB is often used in a dual -database configuration. One database is older and is read only, the other -is newer and is read/write. Periodically, the older database is discarded and -the newer database becomes the new read only database and a new read/write -database is created. - -[endsect] - - - -[section:example Example] - -This complete program creates a database, opens the database, inserts several -key/value pairs, fetches the key/value pairs, closes the database, then erases -the database files. Source code for this program is located in the examples -directory. - -``` -#include -#include -#include - -int main() -{ - using namespace nudb; - std::size_t constexpr N = 1000; - using key_type = std::uint32_t; - error_code ec; - auto const dat_path = "db.dat"; - auto const key_path = "db.key"; - auto const log_path = "db.log"; - create( - dat_path, key_path, log_path, - 1, - make_salt(), - sizeof(key_type), - block_size("."), - 0.5f, - ec); - store db; - db.open(dat_path, key_path, log_path, - 16 * 1024 * 1024, ec); - char data = 0; - // Insert - for(key_type i = 0; i < N; ++i) - db.insert(&i, &data, sizeof(data), ec); - // Fetch - for(key_type i = 0; i < N; ++i) - db.fetch(&i, - [&](void const* buffer, std::size_t size) - { - // do something with buffer, size - }, ec); - db.close(ec); - erase_file(dat_path); - erase_file(key_path); - erase_file(log_path); -} -``` - -[endsect] - - - -[section:usage Usage] - -[heading Files] - -A database is represented by three files: the data file, the key file, -and the log file. Each file has a distinct header in a well known format. -The data file holds all of the key/value pairs and is serially iterable. The -key file holds a hash table indexing all of the contents in the data file. -The log file holds information used to roll the database back in the event -of a failure. - -[heading Create/Open] - -The [link nudb.ref.nudb__create create] function creates a new data file and key -file for a database with the specified parameters. The caller specifies -the hash function to use as a template argument, the file paths, -and the database constants: - -[note - Sample code and identifiers mentioned in this section are written - as if the following declarations are in effect: - ``` - #include - using namespace nudb; - error_code ec; - ``` -] - -``` -create( - "nudb.dat", // Path to data file - "nudb.key", // Path to key file - "nudb.log", // Path to log file - 1, // Application-defined constant - make_salt(), // A random integer - 4, // The size of keys - block_size(".") // Block size in key file - 0.5f // The load factor - ec); -``` - -The application-defined constant is a 64-bit unsigned integer which the -caller may set to any value. This value can be retrieved from an open -database, where it will be equal to the value used at creation time. This -constant can be used for any purpose. For example, to inform the application -of what application-specific version was used to create the database. - -The salt is a 64-bit unsigned integer used to prevent algorithmic complexity -attacks. Hash functions used during database operations are constructed with -the salt, providing an opportunity to permute the hash function. This feature -is useful when inserted database keys come from untrusted sources, such as the -network. - -The key size is specified when the database is created, and cannot be changed. -All key files indexing the same data file will use the key size of the data -file. - -The block size indicates the size of buckets in the key file. The best choice -for the block size is the natural sector size of the device. For most SSDs -in production today this is 4096, or less often 8192 or 16384. The function -[link nudb.ref.nudb__block_size block_size] returns the best guess of the block -size used by the device mounted at the specified path. - -The load factor determines the target bucket occupancy fraction. There is -almost never a need to specify anything other than the recommended value of -0.5, which strikes the perfect balance of space-efficiency and fast lookup. - -An open database is represented by objects of type -[link nudb.ref.nudb__basic_store basic_store], templated on the hasher. The type -alias [link nudb.ref.nudb__store store] represents a database using -[link nudb.ref.nudb__xxhasher xxhasher], the default hash function. To open -a database, declare a database object and then call the -[link nudb.ref.nudb__basic_store.open open] member function: - -``` -store db; -db.open("nudb.dat", "nudb.key", "nudb.log", ec); -``` - -When opening a database that was previously opened by a program that was -terminated abnormally, the implementation automatically invokes the -recovery process. This process restores the integrity of the database by -replaying the log file if it is present. - -[heading Insert/Fetch] - -Once a database is open, it becomes possible to insert new key/value pairs -and look them up. Insertions are straightforward: - -``` -db.insert(key, data, bytes, ec); -``` - -If the key already exists, the error is set to -[link nudb.ref.nudb__error.key_exists error::key_exists]. All keys in a NuDB -database must be unique. Multiple threads can call insert at the same time. -Internally however, insertions are serialized to present a consistent view -of the database to callers. - -Retrieving a key/value pair if it exists is similarly straightforward: - -``` -db.fetch(key, - [&](void const* buffer, std::size_t size) - { - ... - }, ec); -``` - -To give callers control over memory allocation strategies, the fetch -function takes a callback object as a parameter. The callback is invoked -with a pointer to the data and size, if the item exists in the database. -The callback can decide how to store this information, if at all. - -[endsect] - -[section Command Line Tool] - -To allow administration, NuDB comes with the "nudb" command line tool, -which may be built using b2 or CMake. Files for the tool are located in -the "tools" directory. Once the tool is built, and located in your path, -execute this command for additional instructions: - -``` -nudb help -``` - -[endsect] +[include overview.qbk] +[include example.qbk] +[include usage.qbk] +[include tool.qbk] [section:ref Reference] [xinclude quickref.xml] diff --git a/doc/overview.qbk b/doc/overview.qbk new file mode 100644 index 0000000..6ea81b8 --- /dev/null +++ b/doc/overview.qbk @@ -0,0 +1,100 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:overview Overview] + +NuDB is an append only, key/value store specifically optimized for random +read performance on modern SSDs or equivalent high-IOPS devices. The most +common application for NuDB is content addressible storage where a +cryptographic digest of the data is used as the key. The read performance +and memory usage are independent of the size of the database. These are +some other features: + +[heading History] + +The first versions of rippled, the application behind the Ripple consensus +network, used SQLite as their back end for unstructured data. The +performance quickly became a limiting factor. + +Then rippled then went through a series of back ends including LMDB, LevelDB, and +RocksDB. Each of these databases performed well at first, but as the data +size increased, memory usage increased and performance dropped off drastically. + +The problem is caching. Each of these databases relies on some O(n) data +structure, such as a Bloom filter, to improve their performance. These work +well until the structures no longer fit in memory. In addition, many virtual +machines are memory constrained. + +To address this issue, the developers performed a thought experiment -- if +you assume the data size is so large that no O(n) caching is effective, what +is the best read performance you could expect? They reached the following +conclusions: + +1) Writes should not block reads. +2) Reads should be limited only by the SSD's IOPS limit. +3) A read for a non-present key should require one IOP. +4) A read for a present key whose data can be read in a single IOP should +only require two IOPs, one to figure out where it is and one to read it in. + +NuDB is designed to come as close to this ideal as possible. + +[heading Design] + +NuDB uses three files to hold the data and indexes. The data file is append +only and contains sufficient information to rebuild the index. The index +file is random access and contains hash buckets. When an update is in +progress, a temporary journal file is used to roll the update back if +needed. + +NuDB uses linear hashing to dynamically increase the number of buckets in +the index file as the data size grows. Bucket overflows are handled by +adding "overflow" records to the data file. Bucket overflows can be +minimized by increasing the number of buckets, leading to a size/speed +tradeoff. Typical databases keep the average bucket half full (or half +empty, depending on your point of view) resulting in spill records +accounting for less than 1% of reads. + +Inserts are buffered in memory and appended to the data file immediately. +Updates to the index file are performed as an atomic operation. Fetch +operations retrieve records in the process of being modified from memory +during the update operation so that writes do not block fetches. + +Before the index file is modified, a journal file is created to recover +consistency in the event of a crash during the update. The recovery process +will index all records written to the data file, so the aggregation of index +updates does not increase the time which a crash would result in loss of +data. + +Iteration can be performed on the data file directly. Since it is append +only, there is no risk of other operations corrupting an iteration in +progress. + +[heading Performance] + +Writes do not block reads. Read rates are typically around 90% of the SSD's +IOPS limit. An average fetch for a non-present key typically requires fewer +than 1.01 IOPs. An average fetch for a present key requires fewer than 1.01 +IOPs plus however many IOPs it takes to read the data. + +[heading Applications] + +Content addressable storage associates data with its cryptographic digest. +This type of storage is commonly used in decentralized blockchain applications. + +Often these applications require following hash chains -- where one object +contains the hash of another object that ultimately leads to the object +desired. NuDB's low latency and high speed are particularly advantageous +in these kinds of applications. + +NuDB is append only and does not support a delete operation. To support +retaining limited historical information, NuDB is often used in a dual +database configuration. One database is older and is read only, the other +is newer and is read/write. Periodically, the older database is discarded and +the newer database becomes the new read only database and a new read/write +database is created. + +[endsect] diff --git a/doc/tool.qbk b/doc/tool.qbk new file mode 100644 index 0000000..5afde00 --- /dev/null +++ b/doc/tool.qbk @@ -0,0 +1,19 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:tool Command Line Tool] + +To allow administration, NuDB comes with the "nudb" command line tool, +which may be built using b2 or CMake. Files for the tool are located in +the "tools" directory. Once the tool is built, and located in your path, +execute this command for additional instructions: + +``` +nudb help +``` + +[endsect] diff --git a/doc/usage.qbk b/doc/usage.qbk new file mode 100644 index 0000000..be6fe9c --- /dev/null +++ b/doc/usage.qbk @@ -0,0 +1,122 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:usage Usage] + +[heading Files] + +A database is represented by three files: the data file, the key file, +and the log file. Each file has a distinct header in a well known format. +The data file holds all of the key/value pairs and is serially iterable. The +key file holds a hash table indexing all of the contents in the data file. +The log file holds information used to roll the database back in the event +of a failure. + +[heading Create/Open] + +The [link nudb.ref.nudb__create create] function creates a new data file and key +file for a database with the specified parameters. The caller specifies +the hash function to use as a template argument, the file paths, +and the database constants: + +[note + Sample code and identifiers mentioned in this section are written + as if the following declarations are in effect: + ``` + #include + using namespace nudb; + error_code ec; + ``` +] + +``` +create( + "nudb.dat", // Path to data file + "nudb.key", // Path to key file + "nudb.log", // Path to log file + 1, // Application-defined constant + make_salt(), // A random integer + 4, // The size of keys + block_size(".") // Block size in key file + 0.5f // The load factor + ec); +``` + +The application-defined constant is a 64-bit unsigned integer which the +caller may set to any value. This value can be retrieved from an open +database, where it will be equal to the value used at creation time. This +constant can be used for any purpose. For example, to inform the application +of what application-specific version was used to create the database. + +The salt is a 64-bit unsigned integer used to prevent algorithmic complexity +attacks. Hash functions used during database operations are constructed with +the salt, providing an opportunity to permute the hash function. This feature +is useful when inserted database keys come from untrusted sources, such as the +network. + +The key size is specified when the database is created, and cannot be changed. +All key files indexing the same data file will use the key size of the data +file. + +The block size indicates the size of buckets in the key file. The best choice +for the block size is the natural sector size of the device. For most SSDs +in production today this is 4096, or less often 8192 or 16384. The function +[link nudb.ref.nudb__block_size block_size] returns the best guess of the block +size used by the device mounted at the specified path. + +The load factor determines the target bucket occupancy fraction. There is +almost never a need to specify anything other than the recommended value of +0.5, which strikes the perfect balance of space-efficiency and fast lookup. + +An open database is represented by objects of type +[link nudb.ref.nudb__basic_store basic_store], templated on the hasher. The type +alias [link nudb.ref.nudb__store store] represents a database using +[link nudb.ref.nudb__xxhasher xxhasher], the default hash function. To open +a database, declare a database object and then call the +[link nudb.ref.nudb__basic_store.open open] member function: + +``` +store db; +db.open("nudb.dat", "nudb.key", "nudb.log", ec); +``` + +When opening a database that was previously opened by a program that was +terminated abnormally, the implementation automatically invokes the +recovery process. This process restores the integrity of the database by +replaying the log file if it is present. + +[heading Insert/Fetch] + +Once a database is open, it becomes possible to insert new key/value pairs +and look them up. Insertions are straightforward: + +``` +db.insert(key, data, bytes, ec); +``` + +If the key already exists, the error is set to +[link nudb.ref.nudb__error.key_exists error::key_exists]. All keys in a NuDB +database must be unique. Multiple threads can call insert at the same time. +Internally however, insertions are serialized to present a consistent view +of the database to callers. + +Retrieving a key/value pair if it exists is similarly straightforward: + +``` +db.fetch(key, + [&](void const* buffer, std::size_t size) + { + ... + }, ec); +``` + +To give callers control over memory allocation strategies, the fetch +function takes a callback object as a parameter. The callback is invoked +with a pointer to the data and size, if the item exists in the database. +The callback can decide how to store this information, if at all. + +[endsect] From 6026dfcc46b0d61f7474f9e39608f002239cf55d Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 16:50:27 -0700 Subject: [PATCH 05/21] Add introduction to doc --- CHANGELOG.md | 1 + doc/intro.qbk | 159 ++++++++++++++++++++++++++++++++++++++++++++++++++ doc/main.qbk | 7 +++ 3 files changed, 167 insertions(+) create mode 100644 doc/intro.qbk diff --git a/CHANGELOG.md b/CHANGELOG.md index f1e21b7..ce29356 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ * Update Travis targets and build scripts * Update example programs * Split up doc files +* Add introduction to doc -------------------------------------------------------------------------------- diff --git a/doc/intro.qbk b/doc/intro.qbk new file mode 100644 index 0000000..4fc3f90 --- /dev/null +++ b/doc/intro.qbk @@ -0,0 +1,159 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:intro Introduction] + +NuDB is an append only, key/value store specifically optimized for sequential +write and random read performance on modern SSDs or equivalent high-IOPS devices. +It does not support deletion of data. Both the write/append, read performance +and memory usage are independent of the size of the database. +A common application for NuDB is storage where write/append performance is +important but where a key may be used to retrieve previously stored records. + +The design emphasizes two goals: simplicitly and robustness/recoverability. + +[heading Simplicity] + +This complete program demonstrates how easy the library is easy to use. It creates +a database, opens the database, inserts several key/value pairs, reads them back +and displays them. Source code for this program is located in the examples +directory. + +[import ../examples/example.cpp][simple_example] + +[variablelist Notes +[ + [(1) File names] + [ + A database is represented by three files: the data file, the key file, + and the log file. Each file has a distinct header in a well known format. + The data file holds all of the key/value pairs and is serially iterable. The + key file holds a hash table indexing all of the contents in the data file. + The log file holds information used to roll the database back in the event + of a failure. + ] +] +[ + [(2) Create a new database] + [ + The [link nudb.ref.nudb__create `create`] function creates a new data file and key + file for a database with the specified parameters. The caller specifies + the hash function to use as a template argument, the file paths, + and the database constants. + ] +] +[ + [(3) Open an existing database] + [ + The [link nudb.ref.nudb__basic_store.open `open`] function + prepares a database for insertion and/or reading. + ] +] +[ + [(4) Insert key/value pairs] + [ + Once a database is open, call the [link nudb.ref.nudb__basic_store.insert `insert`] + function to insert new key/value pairs. + If the key already exists, the error is set to + [link nudb.ref.nudb__error `error::key_exists`] + All keys in a NuDB database must be unique. Multiple threads can call + insert at the same time. Internally however, insertions are serialized to + present a consistent view of the database to callers. + ] +] +[ + [(5) Fetch a value given it's key] + [ + The function [link nudb.ref.nudb__basic_store.fetch `fetch`] takes a callback object + as a parameter which is invoked when the value is retrieved. If there is + no record in the database which corresponds to the key, the error code + returns a value of [link nudb.ref.nudb__error `error::key_not_found`] + ] +] +[ + [(6) Terminate access to the database] + [ + The function [link nudb.ref.nudb__basic_store.close `close`] frees all the resources + used by the database. + ] +] +] + +[heading Robustness] + +A second goal of the library is to create database as resistent to corruption +as possible. Ideally, this would guarantee that any time the insert function +successfully returns, the database is still readable an that the there is one +and only more record appended to the database. + +The enforcement of such a guarantee would depend on the implementation of +the interface to underlying file system. In practice this means judicious +usage of a "sync" function which most file systems provide in one form or +another. The common understanding of this "sync" function is that it guarantees +that all file operations on the files have been completed and that they are +all reflected in the files themselves. + +Unfortunately, it seems that most current file systems make no such +guarantee. See +[@ https://www.usenix.org/system/files/conference/osdi14/osdi14-paper-pillai.pdf +On the Complexity of Crafting Crash-Consistent Applications +]. + +In fact, the whole question regarding robustness turns out to be quite +complicated as evidenced by the discussion on ths +[@ http://boost.2283326.n4.nabble.com/NuDB-A-fast-key-value-insert-only-database-for-SSD-drives-in-C-11-tp4692735.html Boost Developers mailing list]. + +One simple way to investigate how this works on one's' own system is illustrated +by the example here. The first is a program named crash.cpp which opens an +existing database, appends one record and invokes `std::abort()`. This program +is meant to simulate a system failure such as a power outtage at the most +inconvenient time. + +[import ../examples/crash.cpp][crash] + +After running this program, the database would be in some unknown state. +Specifically, does the database contain the record for the name "george". +In fact, it's not even clear that the database is readable at all. + +This last program opens up the database and lists all it's members. The +includes a function named [link nudb.ref.nudb__recover `recover`] for +fixing errors in a corrupted database such as one which might be result +from a crashed environment. But it is not necessary to invoke it +explicitly as the normal [link nudb.ref.nudb__basic_store.open `open`] +includes that functionality. So we can just open the database and list +all the records counting on the libary implementation to detect any +corruption resulting from previous operations. + +Listing all the contents of the database invokes a user specified +function for each pair of key and value. Here is a simple program +which lists all the data in our sample. + +[import ../examples/recover.cpp][recover] + +Invoking this program on our test database produces the followning +output. +[pre +open successful +key: 123456789 +name: bob +key: 666666666 +name: alice +key: 987654321 +name: ted +key: 999999999 +name: carol +] + +which shows that on this particular platform (Mac OSX), return +from an insert operation does not guarantee that a record is +actually appended to the database. + +The library includes an test suite with more examples similar to the +above which can be used to get a better understanding of the utility +and robustness in one's own environment. + +[endsect] diff --git a/doc/main.qbk b/doc/main.qbk index ae0be40..3d4e4e5 100644 --- a/doc/main.qbk +++ b/doc/main.qbk @@ -24,6 +24,12 @@ [template indexterm2[term1 term2] ''''''[term1]''''''[term2]''''''] [variablelist + [[ + [link nudb.intro Introduction] + ][ + A description of what the library does, and its target audience. + ]] + [[ [link nudb.overview Overview] ][ @@ -62,6 +68,7 @@ ]] ] +[include intro.qbk] [include overview.qbk] [include example.qbk] [include usage.qbk] From 676177b69cbdc048184abc9f0a602a5169401898 Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 16:51:54 -0700 Subject: [PATCH 06/21] Tidy up doc build script --- CHANGELOG.md | 1 + doc/makeqbk.sh | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) mode change 100644 => 100755 doc/makeqbk.sh diff --git a/CHANGELOG.md b/CHANGELOG.md index ce29356..ef7b1f0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,7 @@ * Update example programs * Split up doc files * Add introduction to doc +* Tidy up doc build script -------------------------------------------------------------------------------- diff --git a/doc/makeqbk.sh b/doc/makeqbk.sh old mode 100644 new mode 100755 index 3da8c07..9cda696 --- a/doc/makeqbk.sh +++ b/doc/makeqbk.sh @@ -1,4 +1,4 @@ -#!/usr/bin/bash +#!/bin/bash # Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) # From 7449d72e027edef85224b324082778669e6ef638 Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 17:00:28 -0700 Subject: [PATCH 07/21] Update File concept doc --- CHANGELOG.md | 1 + doc/types/File.qbk | 180 +++++++++++++++++++++++++-------------------- 2 files changed, 103 insertions(+), 78 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ef7b1f0..802a0bc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,7 @@ * Split up doc files * Add introduction to doc * Tidy up doc build script +* Update File concept doc -------------------------------------------------------------------------------- diff --git a/doc/types/File.qbk b/doc/types/File.qbk index f36259c..40980f3 100644 --- a/doc/types/File.qbk +++ b/doc/types/File.qbk @@ -8,130 +8,124 @@ [section:File File] The [*File] concept abstracts access to files in the underlying file system. -Two implementations are provided, one for the Win32 API and the other for -POSIX compliant systems. The [link nudb.ref.nudb__native_file native_file] type -alias is automatically set to either [link nudb.ref.nudb__win32_file win32_file] -or [link nudb.ref.nudb__posix_file posix_file] as appropriate. To support interfaces other than Win32 or POSIX, callers may provide their -own [*File] type that meets these requirements. The unit test code also provides -its own [*File] type which causes simulated operating system file failures -to exercise all failure paths in the implementation. +own [*File] type that meets these requirements. -In the table below: +[heading Associated Types] +* [link nudb.ref.nudb__path_type path_type] +* [link nudb.ref.nudb__file_mode file_mode] +* [link nudb.ref.nudb__error_code error_code] -* `X` denotes a [*File] type -* `a` and `b` denote values of type `X` -* `c` denotes a (possibly const) value of type `X` -* `m` denotes a value of type [link nudb.ref.nudb__file_mode file_mode] -* `f` denotes a value of type [link nudb.ref.nudb__path_type path_type] -* `q` denotes a value of type `void*` -* `p` denotes a value of type `void const*` -* `ec` denotes a value of type [link nudb.ref.nudb__error_code error_code] +[heading Notation] +* `F` a [*File] type +* `f` an instance of type F +* `p` an instance of type [link nudb.ref.nudb__path_type path_type] +* `m` an instance of the type [link nudb.ref.nudb__file_mode file_mode] +* `ec` an instance of a reference to the type + [link nudb.ref.nudb__error_code error_code] +* `n` a number of bytes. Type is `std::size_t` +* 'o' an offset from the beginning of a file. Type is `std::size_t` +* 'a' a memory address of type void * -* `o` denotes a value of type `std::uint64_t` -* `n` denotes a value of type `std::size_t` - -[table File requirements -[[operation] [type] [semantics, pre/post-conditions]] +[heading Valid Expressions] +[table Valid Expressions +[[Operation] [Return Type] [Semantics, Pre/Post-conditions]] +[ + [`F()`] + [ ] + [ + Default constructable + ] +] [ - [`X a{std::move(b)}`] + [`F(f)`] [ ] [ - `X` is `MoveConstructible` + Move constructable. Note NOT copy constructable. ] ] + [ - [`c.is_open()`] + [`f.is_open()`] [`bool`] [ - Returns `true` if `c` refers to an open file. + Returns `true` if `f` refers to an open file, false otherwise. ] ] [ - [`a.close()`] - [ ] + [`f.close()`] + [`void`] [ - If `a` refers to an open file, closes the file. Does nothing if - `a` does not refer to an open file. After this call, `a.open()` - will return `false`. + If `f` refers to an open file, closes the file. Otherwise, does + nothing. After this call, `f.is_open()` will return `false`. ] ] [ - [`a.create(m,f,ec)`] - [ ] + [`f.create(m,p,ec)`] + [`void`] [ - Attempts to create a file at the path specified by `f`, and + Attempts to create a file at the path specified by `p`, and open it with the mode specified by `m`. If an error occurs, `ec` is set to the system specific error code. If no error - occurs, a subsequent call to `a.is_open()` will return `true`. - Undefined behavior if `a` already refers to an open file. + occurs, a subsequent call to `f.is_open()` will return `true`. + Behavior is undefined if `f` already refers to an open file. ] ] [ - [`a.open(m,f,ec)`] - [ ] + [`f.open(m,p,ec)`] + [`void`] [ - Attempts to open the file at the path specified by `f`. If + Attempts to open the file at the path specified by `p`. If an error occurs, `ec` is set to the system specific error - code. If no error occurs, a subsequent call to `a.is_open()` - will return `true`. Undefined behavior if `a` already refers - to an open file. + code. If no error occurs, a subsequent call to `f.is_open()` + will return `true`. Behavior is undefined if `f` already + refers to an open file. ] ] [ - [`X::erase(f,ec)`] - [ ] + [`F::erase(p,ec)`] + [`void`] [ - Attempts to delete the file at the path specified by `f`. + Attempts to delete the file at the path specified by `p`. If an error occurs, `ec` is set to the system specific error code. ] ] [ - [`c.size(ec)`] - [ `std::uint64_t` ] + [`f.size(ec)`] + [`std::size_t`] [ Returns the size of the file in bytes. This value is also equal to lowest byte offset for which a read will always return a - [link nudb.ref.nudb__error short_read] error. Undefined - behavior if `a` does not refer to an open file. + [link nudb.ref.nudb__error short_read] error. Behavior is undefined + if `f` does not refer to an open file. ] ] [ - [`a.read(o,p,n,ec)`] - [ ] + [`f.read(o,a,n,ec)`] + [`void`] [ - Attempts to read `n` bytes from the open file referred to by `a`, - starting at offset `o`, and storing the results in the memory - pointed to by `p`, which must be at least of size `n` bytes. - If an error occurs, `ec` is set to the system specific error - code. Undefined behavior if `a` does not refer to an open file. + Attempts to read `n` bytes starting at file offset `o` + from the open file referred to by `f`. Bytes read are stored + in the memory buffer at address 'a'. The memory buffer at 'a' + must be at least n bytes long. If an error occurs, `ec` is set + to the system specific error code. Behavior is undefined if `f` + does not refer file opened with [link nudb.ref.nudb__file_mode file_mode] + equal to scan or read. ] ] [ - [`a.write(o,q,n,ec)`] - [ ] - [ - Attempts to write `n` bytes to the open file referred to by `a` - and opened with a write mode, starting at offset `o`, and storing - the results in the memory pointed to by `p`, which must be at - least of size `n` bytes. If an error occurs, `ec` is set to the - system specific error code. Undefined behavior if `a` does not - refer to an open file. - ] -] -[ - [`a.sync(ec)`] - [ ] + [`f.sync(ec)`] + [`void`] [ Attempts to synchronize the file on disk. This instructs the operating system to ensure that any data which resides in caches or buffers is fully written to the underlying storage device before this call returns. If an error occurs, `ec` is set to the - system specific error code. Undefined behavior if `a` does not - refer to an open file. + system specific error code. Behavior is undefined if + `f` does not refer to an open file. NuDB's database integrity guarantees are only valid if the implementation of `sync` assures that all data is fully written @@ -139,14 +133,26 @@ In the table below: ] ] [ - [`a.trunc(o,ec)`] - [ ] + [`f.write(o,a,n,ec)`] + [`void`] + [ + Attempts to write `n` bytes starting at file offset `o` + to the open file referred to by `f`. The memory buffer at + `a` should contain the `n` bytes meant to be copied to the file. + If an error occurs, `ec` is set to the system specific error code. + Behavior is undefined if `f` does not refer a file opened with + [link nudb.ref.nudb__file_mode file_mode] equal to write or append. + ] +] +[ + [`f.trunc(o,ec)`] + [`void`] [ - Attempts to change the size of the open file referred to by `a` - and opened with a write mode, to the size in bytes specified - by `o`. If an error occurs, `ec` is set to the system specific - error code. Undefined behavior if `a` does not refer to an open - file. After a successful call, `a.size(ec)` will return `o`. + Attempts to change the size of the open file referred to by `f` + to the size in bytes specified by `o`. If an error occurs, `ec` + is set to the system specific error code. Behavior is undefined if + `f` does not refer to an open file. After a successful call, + `a.size(ec)` will return `o`. NuDB's database integrity guarantees are only valid if the implementation of `trunc` assures that subsequent calls to @@ -154,6 +160,24 @@ In the table below: device is taken offline before calling `size`. ] ] +[ + [`is_File`] + [`std::true_type`] + [ + returns `std::true_type` for `F`. Otherwise returns `std::false_type` + ] +] ] +[heading Associated Functions] +* [link nudb.ref.nudb__block_size block_size] +* [link nudb.ref.nudb__erase_file erase_file] + +[heading Models] +* [link nudb.ref.nudb__posix_file posix_file] +* [link nudb.ref.nudb__win32_file win32_file] +* [link nudb.ref.nudb__native_file native_file] + +native file is just an alias which selects one of the previous two depending on the operating system on which the code is compiled. + [endsect] From c55a868b6d24fc87b2aab5cc707ba76f349aebee Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 17:00:57 -0700 Subject: [PATCH 08/21] [FOLD] remove void types from File doc table --- doc/types/File.qbk | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/types/File.qbk b/doc/types/File.qbk index 40980f3..9e714bc 100644 --- a/doc/types/File.qbk +++ b/doc/types/File.qbk @@ -56,7 +56,7 @@ own [*File] type that meets these requirements. ] [ [`f.close()`] - [`void`] + [] [ If `f` refers to an open file, closes the file. Otherwise, does nothing. After this call, `f.is_open()` will return `false`. @@ -64,7 +64,7 @@ own [*File] type that meets these requirements. ] [ [`f.create(m,p,ec)`] - [`void`] + [] [ Attempts to create a file at the path specified by `p`, and open it with the mode specified by `m`. If an error occurs, @@ -75,7 +75,7 @@ own [*File] type that meets these requirements. ] [ [`f.open(m,p,ec)`] - [`void`] + [] [ Attempts to open the file at the path specified by `p`. If an error occurs, `ec` is set to the system specific error @@ -86,7 +86,7 @@ own [*File] type that meets these requirements. ] [ [`F::erase(p,ec)`] - [`void`] + [] [ Attempts to delete the file at the path specified by `p`. If an error occurs, `ec` is set to the system specific error @@ -105,7 +105,7 @@ own [*File] type that meets these requirements. ] [ [`f.read(o,a,n,ec)`] - [`void`] + [] [ Attempts to read `n` bytes starting at file offset `o` from the open file referred to by `f`. Bytes read are stored @@ -118,7 +118,7 @@ own [*File] type that meets these requirements. ] [ [`f.sync(ec)`] - [`void`] + [] [ Attempts to synchronize the file on disk. This instructs the operating system to ensure that any data which resides in caches @@ -134,7 +134,7 @@ own [*File] type that meets these requirements. ] [ [`f.write(o,a,n,ec)`] - [`void`] + [] [ Attempts to write `n` bytes starting at file offset `o` to the open file referred to by `f`. The memory buffer at @@ -146,7 +146,7 @@ own [*File] type that meets these requirements. ] [ [`f.trunc(o,ec)`] - [`void`] + [] [ Attempts to change the size of the open file referred to by `f` to the size in bytes specified by `o`. If an error occurs, `ec` From a13ee4b04f264c7275905d2e2c41475751ec5ad8 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 17:02:22 -0700 Subject: [PATCH 09/21] [FOLD] tidy up File table heading --- doc/types/File.qbk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/types/File.qbk b/doc/types/File.qbk index 9e714bc..5efd9a6 100644 --- a/doc/types/File.qbk +++ b/doc/types/File.qbk @@ -29,7 +29,7 @@ own [*File] type that meets these requirements. * 'o' an offset from the beginning of a file. Type is `std::size_t` * 'a' a memory address of type void * -[heading Valid Expressions] +[heading File Requirements] [table Valid Expressions [[Operation] [Return Type] [Semantics, Pre/Post-conditions]] [ From c84f055292300f8495c0b8e5b5bd17c352c51248 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 17:14:28 -0700 Subject: [PATCH 10/21] [FOLD] File concept doc edit --- doc/types/File.qbk | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/types/File.qbk b/doc/types/File.qbk index 5efd9a6..526ea5b 100644 --- a/doc/types/File.qbk +++ b/doc/types/File.qbk @@ -164,7 +164,8 @@ own [*File] type that meets these requirements. [`is_File`] [`std::true_type`] [ - returns `std::true_type` for `F`. Otherwise returns `std::false_type` + An alias for `std::true_type` for `F`. Otherwise equivalent + to `std::false_type`. ] ] ] From 756ae4fb7eafb38691343165f5a75bc914174982 Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 17:06:53 -0700 Subject: [PATCH 11/21] Update hasher concept doc --- CHANGELOG.md | 1 + doc/types/Hasher.qbk | 57 +++++++++++++++++++++++++++++--------------- 2 files changed, 39 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 802a0bc..915be39 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ * Add introduction to doc * Tidy up doc build script * Update File concept doc +* Update Hasher concept doc -------------------------------------------------------------------------------- diff --git a/doc/types/Hasher.qbk b/doc/types/Hasher.qbk index e80955b..6dd9b7b 100644 --- a/doc/types/Hasher.qbk +++ b/doc/types/Hasher.qbk @@ -7,37 +7,42 @@ [section:Hasher Hasher] -A [@Hasher] implements a hash algorithm. This is used to compute the small -digests NuDB needs to effectively implement a hash table. NuDB provides -the default implementation [link nudb.ref.nudb__xxhasher xxhasher], which is -suitable for most use cases. For advanced applications, a user supplied -hash function may be supplied which must meet these requirements. +A type [@Hasher] implements a hash algorithm. This is used to compute the small +digests NuDB needs to effectively implement a hash table. -In the table below: +The NuDB database stores and retrieves key/value pairs. This storage is +organized as "buckets" where each bucket forms the start of a linked list. +Given a key, one must caluculate which bucket it the key should be +added to. The library includes code to guarantee that any set of +keys will map to a more or less even distribution of buckets, thus +ensuring that the array of buckets is utilized efficiently. A key +element in this code is a call to the Hasher function. This is why +the Hasher function sould be designed to generate a uniform +distribution of integers for any random collection of keys. -* `X` denotes a hasher class -* `a` denotes a value of type `X const` -* `s` denotes a value of type `std::uint64_t` +[heading Notation] +* `H` is a hasher class +* `h` is an instance of type `H` +* `s` is a random seed of type `std::uint64_t` * `p` denotes a value of type `void const*` * `n` denotes a value of type `std::size_t` -[table Hasher requirements +[heading Hasher requirements] +[table [[operation] [type] [semantics, pre/post-conditions]] [ - [`X a{s}`] - [ ] + [`H{s}`] + [`H`] [ - `a` is constructed with a seed value integer. To achieve resistance + `H` is constructed with a seed value integer `s`. To achieve resistance from algorithmic complexity attacks, an implementation of [*Hasher] should ensure that values returned from the hash function will be - distinctly different for different values of `s` given the same - inputs. If algorithmic complexity attack resistance is not a - requirement, the seed may be ignored upon construction. + distinctly different for different values of `s`. ] ] [ - [`a(p,n)`] - [ `std::uint64_t` ] + [`h(p,n)`] + [`std::uint64_t`] [ Returns the digest of the memory `n` bytes in size and pointed to by `p`. `n` will never be zero. A good hash function will @@ -51,6 +56,20 @@ In the table below: ] ] +[ + [`is_Hasher`] + [`std::true_type`] + [ + returns `std::true_type` for `H`. Otherwise returns `std::false_type` + ] ] +] + +[heading Models] +* [link nudb.ref.nudb__xxhasher xxhasher] +[br] +NuDB provides the default implementation , which is +suitable for most use cases. For advanced applications, users may +supply their own implementation which fulfills the above requirements. -[endsect] +[endsect] \ No newline at end of file From 70a6e0fbf2d56b367b882c1ab895d5fa979c83ac Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 17:07:21 -0700 Subject: [PATCH 12/21] [FOLD] Hasher doc edit --- doc/types/Hasher.qbk | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/doc/types/Hasher.qbk b/doc/types/Hasher.qbk index 6dd9b7b..90c334a 100644 --- a/doc/types/Hasher.qbk +++ b/doc/types/Hasher.qbk @@ -27,8 +27,8 @@ distribution of integers for any random collection of keys. * `p` denotes a value of type `void const*` * `n` denotes a value of type `std::size_t` -[heading Hasher requirements] -[table +[heading Hasher Requirements] +[table Valid Expressions [[operation] [type] [semantics, pre/post-conditions]] [ [`H{s}`] @@ -37,7 +37,9 @@ distribution of integers for any random collection of keys. `H` is constructed with a seed value integer `s`. To achieve resistance from algorithmic complexity attacks, an implementation of [*Hasher] should ensure that values returned from the hash function will be - distinctly different for different values of `s`. + distinctly different for different values of `s`. If algorithmic + complexity attack resistance is not a requirement, the seed may be + ignored upon construction. ] ] [ @@ -60,14 +62,15 @@ distribution of integers for any random collection of keys. [`is_Hasher`] [`std::true_type`] [ - returns `std::true_type` for `H`. Otherwise returns `std::false_type` + An alias for `std::true_type` for `H`. Otherwise equivalent + to `std::false_type`. ] ] ] [heading Models] * [link nudb.ref.nudb__xxhasher xxhasher] -[br] + NuDB provides the default implementation , which is suitable for most use cases. For advanced applications, users may supply their own implementation which fulfills the above requirements. From edb43c38439546f9a09d65bb5a9158eacd78a46e Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 17:12:52 -0700 Subject: [PATCH 13/21] Update Progress concept doc --- CHANGELOG.md | 1 + doc/types/Progress.qbk | 50 +++++++++++++++++++++++++++--------------- 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 915be39..de1ce0a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * Tidy up doc build script * Update File concept doc * Update Hasher concept doc +* Update Progress concept doc -------------------------------------------------------------------------------- diff --git a/doc/types/Progress.qbk b/doc/types/Progress.qbk index 52dafe2..69d967d 100644 --- a/doc/types/Progress.qbk +++ b/doc/types/Progress.qbk @@ -7,34 +7,48 @@ [section:Progress Progress] -A [*Progress] object provides feedback to callers on the progress of -long running operations such as calls to [link nudb.ref.nudb__verify verify] or -[link nudb.ref.nudb__rekey rekey] which can take days or weeks for databases that -measure in the terabytes. These objects are used by passing them as parameters -to the appropriate functions, where the will be called periodically with -numbers that indicate the amount of work completed, versus the total amount -of work required. +An instance of type [*Progress] feedback to callers on the progress of +long running operations such as calls to [link nudb.ref.nudb__verify verify], +[link nudb.ref.nudb__visit visit] or [link nudb.ref.nudb__rekey rekey] +which can take days or weeks for databases that measure in the terabytes. +These objects are used by passing them as parameters to the appropriate functions, +where the will be called periodically with numbers that indicate the amount of +work completed, versus the total amount of work required. -In the table below: +[heading Notation] +* `P` is a progress class +* `p` is an instance of type `P` +* `n` is a value of type `std::uint64_t` representng numerator of the fraction of work done. +* `d` is a value of type `std::uint64_t` representng denominator of the fraction of work done. -* `X` denotes a progress class -* `a` denotes a value of type `X` -* `p` and `q` denote values of type `std::uint64_t` +[heading Valid Expressions] -[table Progress requirements +[table [[operation] [type] [semantics, pre/post-conditions]] [ - [`a(p, q)`] + [`p(n, d)`] [ ] [ Indicates to the progress object that work has been performed and - intermediate results calculated. `p` represents the amount of work - completed from the beginning of the operation. `q` represents the + intermediate results calculated. `n` represents the amount of work + completed from the beginning of the operation. `d` represents the total amount of work required. The fraction of completed work is - therefore `p/q`, with zero representing no work complete, and one - represents all work complete. `p` and `q` are unitless. + therefore `n/d`, with zero representing no work complete, and one + represents all work complete. ] ] +[ + [`is_Progress

`] + [`std::true_type`] + [ + returns `std::true_type` for `P`. Otherwise returns `std::false_type` + ] ] +] + +[heading Models] +* [link nudb.ref.nudb__no_progress no_progress] +[br] +This is a default implementation which does nothing. -[endsect] +[endsect] \ No newline at end of file From 051609d59374ffb801fdd1810f1bdfe22e416860 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 17:13:15 -0700 Subject: [PATCH 14/21] [FOLD] Progress doc edit --- doc/types/Progress.qbk | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/doc/types/Progress.qbk b/doc/types/Progress.qbk index 69d967d..641d16e 100644 --- a/doc/types/Progress.qbk +++ b/doc/types/Progress.qbk @@ -21,9 +21,8 @@ work completed, versus the total amount of work required. * `n` is a value of type `std::uint64_t` representng numerator of the fraction of work done. * `d` is a value of type `std::uint64_t` representng denominator of the fraction of work done. -[heading Valid Expressions] - -[table +[heading Progress Requirements] +[table Valid Expressions [[operation] [type] [semantics, pre/post-conditions]] [ [`p(n, d)`] @@ -41,14 +40,15 @@ work completed, versus the total amount of work required. [`is_Progress

`] [`std::true_type`] [ - returns `std::true_type` for `P`. Otherwise returns `std::false_type` + An alias for `std::true_type` for `P`. Otherwise equivalent + to `std::false_type` ] ] ] [heading Models] * [link nudb.ref.nudb__no_progress no_progress] -[br] + This is a default implementation which does nothing. [endsect] \ No newline at end of file From 33f038bf0aa3f6310841c279119bd9d6ac7f4612 Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 17:19:03 -0700 Subject: [PATCH 15/21] Add error_code doc section --- CHANGELOG.md | 1 + doc/main.qbk | 7 ++ doc/types/error_code.qbk | 138 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 146 insertions(+) create mode 100644 doc/types/error_code.qbk diff --git a/CHANGELOG.md b/CHANGELOG.md index de1ce0a..cec4cfe 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ * Update File concept doc * Update Hasher concept doc * Update Progress concept doc +* Add error_code doc section -------------------------------------------------------------------------------- diff --git a/doc/main.qbk b/doc/main.qbk index 3d4e4e5..d054f28 100644 --- a/doc/main.qbk +++ b/doc/main.qbk @@ -49,6 +49,12 @@ An explanation of operations on the database. ]] + [[ + [link nudb.error_code Error Codes] + ][ + A treatment of the C++ error_code facility, and NuDB. + ]] + [[ [link nudb.tool Command Line Tool] ][ @@ -72,6 +78,7 @@ [include overview.qbk] [include example.qbk] [include usage.qbk] +[include types/error_code.qbk] [include tool.qbk] [section:ref Reference] diff --git a/doc/types/error_code.qbk b/doc/types/error_code.qbk new file mode 100644 index 0000000..4b17b92 --- /dev/null +++ b/doc/types/error_code.qbk @@ -0,0 +1,138 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] +[/ +[section:error_code [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-error_code] `boost::system::error_code`] +] +[section:error_code error_code] +Invoking functions in this library can result in errors. Such errors might +be detected by the functions of this library or be detected by other components +that this library depends upon such as the C++ standard library or underlying +operating system. So all functions in this library return an error code of type +`boost::system::error_code`. This type is capable of holding error codes +returned from any possible source. Given this, it's not surprising that it's +somewhat more complex than a simple integer value. Full description is beyond +the scope of this documentation, but fortunately there are various sources which +together provide a good explanation of how to use it. +[variablelist + [[ + [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/index.html + Boost System Library] + ][ + This is the official documentation for the Boost System Library which + includes the description of boost::system::error_code used by this library + ]] + [[ + [@http://en.cppreference.com/w/cpp/error + C++ Standard Library version] + ][ + The Boost System Library has been incorporated into the standard library + as part of the C++ standard error handling utilities. Except for the + substitution of std:: for boost::system:: namespace, the libraries are + identical. This link points to the standard library documentation which + may be used in addition to the boost version. + ]] + [[ + [@http://blog.think-async.com/2010/04/system-error-support-in-c0x-part-1.html + Thinking Asyncronously in C++] + ][ + Another essential reference on the design and usage of the `error_code`. + ]] +] +[heading Associated Types] +[itemized_list +[ + [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Header-error_code `boost::system::error::ercc`] + list of values which might be used as values by + [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-error_code `boost::system::error_code`]. +] +[ + [link nudb.ref.nudb__error error] list of values which might be used as values NuDB library functions. +] +[ + [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error`] +] +] +Any NuDB function may return `error_code` values from either of the above sources. +So error_codes should be checked against both lists of error codes used. + +[heading Notation] +[itemized_list + [`ec` an instance of a reference to the type `boost::system::error_code`.] + [ a value listed in either `boost::system::errc` or `nudb::error`. + [`se` an instance of the type + [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error` + ]. + ] +] + +] +[heading Valid Expressions] +[table + [[Operation] [Return Type] [Description]] + [ + [[@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-error_code-observers `ec.message()`]] + [`std::string`] + [ + Return an explanation of the error code as a string + ] + ] + [ + [[@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error(ec)`]] + [`se`] + [ + Construct a standard exception type. The result can be the object + of a C++ "throw" statement. + ] + ] + [ + [`ec == `] + [bool] + [ + Compare an error_code with an error enum. + ] + ] + [ + [`ec != `] + [bool] + [ + Compare an error_code with an error enum. + ] + ] + [ + [`ec`] + [bool] + [ + Returns `true` if there is an error, `false` otherwise. + ] + ] +] + +[heading Header] +[@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Header-error_code `#include `] +[br] +[@../../include/nudb/error.hpp `#include `] + +[heading Example] +`` +#include +#include +// ... +boost::system::error_code ec; +nudb::store db; +db.open("db.dat", "db.key", "db.log", ec); +if(!ec) + return; // success ! +if(ec == nudb::error::no_key_file) + std::cerr << "key file not found" << std::endl; +else +if(ec == boost::system::errc::filename_too_long) + std::cerr << "file name too long" << std::endl; +else + std::cerr << ec.message() << std::endl; +return; // failure ! +`` + +[endsect] From 9ba7dafef65e0c2ff172fb88af9f73e8f60881c8 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 17:21:34 -0700 Subject: [PATCH 16/21] [FOLD] use evergreen links --- doc/types/error_code.qbk | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/doc/types/error_code.qbk b/doc/types/error_code.qbk index 4b17b92..32bf55b 100644 --- a/doc/types/error_code.qbk +++ b/doc/types/error_code.qbk @@ -4,7 +4,7 @@ file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) ] [/ -[section:error_code [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-error_code] `boost::system::error_code`] +[section:error_code [@http://www.boost.orglibs/system/doc/reference.html#Class-error_code] `boost::system::error_code`] ] [section:error_code error_code] Invoking functions in this library can result in errors. Such errors might @@ -18,7 +18,7 @@ the scope of this documentation, but fortunately there are various sources which together provide a good explanation of how to use it. [variablelist [[ - [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/index.html + [@http://www.boost.org/libs/system/doc/index.html Boost System Library] ][ This is the official documentation for the Boost System Library which @@ -44,15 +44,15 @@ together provide a good explanation of how to use it. [heading Associated Types] [itemized_list [ - [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Header-error_code `boost::system::error::ercc`] + [@http://www.boost.org/libs/system/doc/reference.html#Header-error_code `boost::system::error::ercc`] list of values which might be used as values by - [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-error_code `boost::system::error_code`]. + [@http://www.boost.org/libs/system/doc/reference.html#Class-error_code `boost::system::error_code`]. ] [ [link nudb.ref.nudb__error error] list of values which might be used as values NuDB library functions. ] [ - [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error`] + [@http://www.boost.org/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error`] ] ] Any NuDB function may return `error_code` values from either of the above sources. @@ -63,7 +63,7 @@ So error_codes should be checked against both lists of error codes used. [`ec` an instance of a reference to the type `boost::system::error_code`.] [ a value listed in either `boost::system::errc` or `nudb::error`. [`se` an instance of the type - [@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error` + [@http://www.boost.org/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error` ]. ] ] @@ -73,14 +73,14 @@ So error_codes should be checked against both lists of error codes used. [table [[Operation] [Return Type] [Description]] [ - [[@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-error_code-observers `ec.message()`]] + [[@http://www.boost.orglibs/system/doc/reference.html#Class-error_code-observers `ec.message()`]] [`std::string`] [ Return an explanation of the error code as a string ] ] [ - [[@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error(ec)`]] + [[@http://www.boost.orglibs/system/doc/reference.html#Class-system_error `boost::system::error::system_error(ec)`]] [`se`] [ Construct a standard exception type. The result can be the object @@ -111,7 +111,7 @@ So error_codes should be checked against both lists of error codes used. ] [heading Header] -[@http://www.boost.org/doc/libs/1_64_0/libs/system/doc/reference.html#Header-error_code `#include `] +[@http://www.boost.orglibs/system/doc/reference.html#Header-error_code `#include `] [br] [@../../include/nudb/error.hpp `#include `] From 45ec9b85fa670ef7c3db34c35f5fa688b6effb81 Mon Sep 17 00:00:00 2001 From: Vinnie Falco Date: Wed, 17 May 2017 17:23:17 -0700 Subject: [PATCH 17/21] [FOLD] fix boostbook warning --- doc/types/error_code.qbk | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/types/error_code.qbk b/doc/types/error_code.qbk index 32bf55b..20c4ddc 100644 --- a/doc/types/error_code.qbk +++ b/doc/types/error_code.qbk @@ -112,7 +112,7 @@ So error_codes should be checked against both lists of error codes used. [heading Header] [@http://www.boost.orglibs/system/doc/reference.html#Header-error_code `#include `] -[br] + [@../../include/nudb/error.hpp `#include `] [heading Example] From c0f4b1a874bcff56ae5d7ca518edbbbf9e9f8bd8 Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 17:27:13 -0700 Subject: [PATCH 18/21] Update basic_store javadoc --- CHANGELOG.md | 1 + include/nudb/basic_store.hpp | 41 ++++++++++++++++++++++++++---------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cec4cfe..408187b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ * Update Hasher concept doc * Update Progress concept doc * Add error_code doc section +* Update basic_store javadoc -------------------------------------------------------------------------------- diff --git a/include/nudb/basic_store.hpp b/include/nudb/basic_store.hpp index 0e69195..b66f29c 100644 --- a/include/nudb/basic_store.hpp +++ b/include/nudb/basic_store.hpp @@ -23,24 +23,43 @@ namespace nudb { /** A high performance, insert-only key/value database for SSDs. - To create a database first call the @ref create - free function. Then construct a @ref basic_store and - call @ref open on it: + A database is represented by triplet of three files: the data file, the key file, and the log file. + These files together constitute a database of key/value pairs. Each file has a distinct header in a + well known format. The data file holds all of the key/value pairs and is serially iterable. + The key file holds a hash table indexing all of the contents in the data file. + The log file holds information used to roll the database back in the event of a failure. - @code - error_code ec; - create( - "db.dat", "db.key", "db.log", - 1, make_salt(), 8, 4096, 0.5f, ec); - basic_store db; - db.open("db.dat", "db.key", "db.log", ec); - @endcode + Operations which apply to the database as whole such as create, recover are implemented as free + functions which include the names of the data, key and log files as parameters. + + In order to alter or access of the contents of a database, one creates a data structure of + type "basic_store". Operations on the contents of the database such as insertions, + fetches implemented as member functions of the "basic_store" type. @tparam Hasher The hash function to use. This type must meet the requirements of @b Hasher. @tparam File The type of File object to use. This type must meet the requirements of @b File. + + @par Models + The simplest and most common usage of basic_store class template is the store class + which is defined like this: + + @code + #include + #include + using store = basic_store< xxhasher, native_file >; + @endcode + + @par Example + @code + #include + #include + nudb::store db; + nudb::error_code ec; + db.open("db.dat", "db.key", "db.log", ec); + @endcode */ template class basic_store From 98a21e0451e44cdf6edb4a5ff10ea7f0c1d19702 Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 17:30:33 -0700 Subject: [PATCH 19/21] Edit error_code javadoc --- CHANGELOG.md | 1 + include/nudb/error.hpp | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 408187b..ca5b451 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ * Update Progress concept doc * Add error_code doc section * Update basic_store javadoc +* Edit error_code javadoc -------------------------------------------------------------------------------- diff --git a/include/nudb/error.hpp b/include/nudb/error.hpp index f5f8685..e8f12ea 100644 --- a/include/nudb/error.hpp +++ b/include/nudb/error.hpp @@ -228,7 +228,7 @@ enum class error error_category const& nudb_category(); -/** Returns a database error code. +/** Constructs a database error code. This function is used by the implementation to convert @ref error values into @ref error_code objects. From 491ba4373cac1427a8e3dca9a1484dc14d3b6b80 Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 17:39:12 -0700 Subject: [PATCH 20/21] Update file javadocs --- CHANGELOG.md | 1 + include/nudb/posix_file.hpp | 4 ++++ include/nudb/win32_file.hpp | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ca5b451..452228b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ * Add error_code doc section * Update basic_store javadoc * Edit error_code javadoc +* Update file javadocs -------------------------------------------------------------------------------- diff --git a/include/nudb/posix_file.hpp b/include/nudb/posix_file.hpp index 8d3b8e5..d99e25e 100644 --- a/include/nudb/posix_file.hpp +++ b/include/nudb/posix_file.hpp @@ -35,6 +35,10 @@ namespace nudb { +/** A a posix file. + + This class provides a posix implementation of the @b File concept. +*/ class posix_file { int fd_ = -1; diff --git a/include/nudb/win32_file.hpp b/include/nudb/win32_file.hpp index d225ae9..9ca323a 100644 --- a/include/nudb/win32_file.hpp +++ b/include/nudb/win32_file.hpp @@ -48,7 +48,7 @@ namespace nudb { -/** A descriptor to a Win32 file. +/** A Win32 file. This class provides a Win32 implementation of the @b File concept. From d1feb01e62a0c2a5c4d167ea6df7da77bbfd8ac7 Mon Sep 17 00:00:00 2001 From: Robert Ramey Date: Wed, 17 May 2017 17:42:56 -0700 Subject: [PATCH 21/21] Update verify javadoc --- CHANGELOG.md | 1 + include/nudb/verify.hpp | 44 +++++++++++++++++------------------------ 2 files changed, 19 insertions(+), 26 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 452228b..deea1aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -13,6 +13,7 @@ * Update basic_store javadoc * Edit error_code javadoc * Update file javadocs +* Update verify javadoc -------------------------------------------------------------------------------- diff --git a/include/nudb/verify.hpp b/include/nudb/verify.hpp index 23b1260..9cb567f 100644 --- a/include/nudb/verify.hpp +++ b/include/nudb/verify.hpp @@ -119,69 +119,61 @@ struct verify_info }; /** Verify consistency of the key and data files. - This function opens the key and data files, and performs the following checks on the contents: - @li Data file header validity - @li Key file header validity - @li Data and key file header agreements - @li Check that each value is contained in a bucket - @li Check that each bucket item reflects a value - @li Ensure no values with duplicate keys - Undefined behavior results when verifying a database that still has a log file. Use @ref recover on such databases first. - This function selects one of two algorithms to use, the normal version, and a faster version that can take advantage of a buffer of sufficient size. Depending on the value of the bufferSize argument, the appropriate algorithm is chosen. - A good value of bufferSize is one that is a large fraction of the key file size. For example, 20% of the size of the key file. Larger is better, with the highest usable value depending on the size of the key file. If presented with a buffer size that is too large to be of extra use, the fast algorithm will simply allocate what it needs. - - @par Template Parameters - @tparam Hasher The hash function to use. This type must meet the requirements of @b HashFunction. The hash function must be the same as that used to create the database, or else an error is returned. - - @param info A structure which will be default constructed - inside this function, and filled in if the operation completes - successfully. If an error is indicated, the contents of this - variable are undefined. - + @param info A reference to a structure of type + @ref verify_info which will be filled in. + Upon return the contents of this structure are defined + if and only if the verify function completes successfully + as indicated by the ec parameter. @param dat_path The path to the data file. - @param key_path The path to the key file. - @param bufferSize The number of bytes to allocate for the buffer. If this number is too small, or zero, a slower algorithm will be used that does not require a buffer. - - @param progress A function which will be called periodically - as the algorithm proceeds. The equivalent signature of the - progress function must be: + @param progress + A function address, function object or lambda + with the following signature: @code void progress( std::uint64_t amount, // Amount of work done so far std::uint64_t total // Total amount of work to do ); @endcode - + This function will be called periodically as the algorithm + proceeds so that the user code can monitor the progress + of the operation and calculate how much time remains to + complete the operation. The library contains a default function object, + [no_progress](@ref no_progress), which matches the above signature + but does nothing. @param ec Set to the error, if any occurred. + + @par Associated Types + @li @ref verify_info - Information returned by verify function + @li @ref no_progress - Function of type Progress which does nothing */ template void