diff --git a/.travis.yml b/.travis.yml index afc790f..65fa179 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,3 +1,7 @@ +notifications: + email: + false + language: cpp env: @@ -13,27 +17,28 @@ env: - BOOST_ROOT=$HOME/boost_1_61_0 - BOOST_URL='http://sourceforge.net/projects/boost/files/boost/1.61.0/boost_1_61_0.tar.gz' -packages: &gcc5_pkgs - - gcc-5 - - g++-5 - - python-software-properties - - libssl-dev - - libffi-dev - - libstdc++6 - - binutils-gold - # Provides a backtrace if the unittests crash - - gdb - # Needed for installing valgrind - - subversion - - automake - - autotools-dev - - libc6-dbg - # Needed to build rocksdb - - libsnappy-dev +addons: + apt: + sources: &base_sources + - ubuntu-toolchain-r-test + packages: &base_packages + - python-software-properties + - libffi-dev + - libstdc++6 + - binutils-gold + # Provides a backtrace if the unittests crash + - gdb + # Needed for installing valgrind + - subversion + - automake + - autotools-dev + - libc6-dbg + # Needed to build rocksdb + - libsnappy-dev matrix: include: - # GCC/Coverage/Autobahn + # gcc coverage (+valgrind if master or develop) - compiler: gcc env: - GCC_VER=5 @@ -41,33 +46,66 @@ matrix: - ADDRESS_MODEL=64 - BUILD_SYSTEM=cmake - PATH=$PWD/cmake/bin:$PATH - addons: &ao_gcc5 + addons: apt: - sources: ['ubuntu-toolchain-r-test'] - packages: *gcc5_pkgs + packages: + - gcc-5 + - g++-5 + - *base_packages + sources: + - *base_sources - # Clang/UndefinedBehaviourSanitizer - - compiler: clang + # older GCC, release + - compiler: gcc env: - - GCC_VER=5 - - VARIANT=usan - - CLANG_VER=3.8 + - GCC_VER=4.9 + - VARIANT=release + - ADDRESS_MODEL=64 + addons: + apt: + packages: + - gcc-4.9 + - g++-4.9 + - *base_packages + sources: + - *base_sources + + # later GCC + - compiler: gcc + env: + - GCC_VER=6 + - VARIANT=release - ADDRESS_MODEL=64 - - UBSAN_OPTIONS='print_stacktrace=1' - BUILD_SYSTEM=cmake - PATH=$PWD/cmake/bin:$PATH - - PATH=$PWD/llvm-$LLVM_VERSION/bin:$PATH - addons: *ao_gcc5 + addons: + apt: + packages: + - gcc-6 + - g++-6 + - *base_packages + sources: + - *base_sources - # Clang/AddressSanitizer + # clang ubsan+asan - compiler: clang env: - GCC_VER=5 - - VARIANT=asan + - VARIANT=ubasan - CLANG_VER=3.8 - ADDRESS_MODEL=64 + - UBSAN_OPTIONS='print_stacktrace=1' + - BUILD_SYSTEM=cmake + - PATH=$PWD/cmake/bin:$PATH - PATH=$PWD/llvm-$LLVM_VERSION/bin:$PATH - addons: *ao_gcc5 + addons: + apt: + packages: + - gcc-5 + - g++-5 + - *base_packages + sources: + - *base_sources cache: directories: @@ -84,7 +122,3 @@ script: after_script: - cat nohup.out || echo "nohup.out already deleted" - -notifications: - email: - false diff --git a/CHANGELOG.md b/CHANGELOG.md index 124698c..deea1aa 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,22 @@ +1.0.3 + +* Update Beast subtree to v41 +* Update Travis targets and build scripts +* Update example programs +* Split up doc files +* Add introduction to doc +* Tidy up doc build script +* Update File concept doc +* Update Hasher concept doc +* Update Progress concept doc +* Add error_code doc section +* Update basic_store javadoc +* Edit error_code javadoc +* Update file javadocs +* Update verify javadoc + +-------------------------------------------------------------------------------- + 1.0.2 * Better CMakeLists.txt for finding Boost @@ -5,33 +24,33 @@ * Remove error::success (API Change) * Update Travis to Boost 1.61.0 ---- +-------------------------------------------------------------------------------- 1.0.1 * Travis: Limit the number of jobs ---- +-------------------------------------------------------------------------------- 1.0.0 * First Official Release! * Fix badge markdown in README.md ---- +-------------------------------------------------------------------------------- 1.0.0-b7 * Fix doc typos * Improve file creation on POSIX ---- +-------------------------------------------------------------------------------- 1.0.0-b6 * Fix incorrect file deletion in create() ---- +-------------------------------------------------------------------------------- 1.0.0-b5 @@ -46,7 +65,7 @@ * Improved commit process * Dynamic block size in custom allocator ---- +-------------------------------------------------------------------------------- 1.0.0-b4 @@ -55,12 +74,12 @@ * Throw on API calls when no database open * Benchmarks vs. RocksDB -### API Changes: +API Changes: * `insert` sets `error::key_exists` instead of returning `false` * `fetch` sets `error::key_not_found` instead of returning `false` ---- +-------------------------------------------------------------------------------- 1.0.0-b3 @@ -71,14 +90,14 @@ * Add visit test * Improved coverage ---- +-------------------------------------------------------------------------------- 1.0.0-b2 * Minor documentation and tidying * Add CHANGELOG ---- +-------------------------------------------------------------------------------- 1.0.0-b1 diff --git a/CMakeLists.txt b/CMakeLists.txt index 8ea7a04..6ded137 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -82,25 +82,24 @@ endfunction () #------------------------------------------------------------------------------- if ("${VARIANT}" STREQUAL "coverage") - set (CMAKE_CXX_FLAGS + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fprofile-arcs -ftest-coverage") - set (CMAKE_BUILD_TYPE RELWITHDEBINFO) + set(CMAKE_BUILD_TYPE RELWITHDEBINFO) set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lgcov") -elseif ("${VARIANT}" STREQUAL "asan") - set (CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fsanitize=address -fno-omit-frame-pointer") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address") - set (CMAKE_BUILD_TYPE RELWITHDEBINFO) -elseif ("${VARIANT}" STREQUAL "usan") - set (CMAKE_CXX_FLAGS - "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-omit-frame-pointer") - set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=undefined") - set (CMAKE_BUILD_TYPE RELWITHDEBINFO) + +elseif ("${VARIANT}" STREQUAL "ubasan") + set(CMAKE_CXX_FLAGS + "${CMAKE_CXX_FLAGS} -funsigned-char -fno-omit-frame-pointer -fsanitize=address,undefined") + set (CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fsanitize=address,undefined") + set(CMAKE_BUILD_TYPE RELWITHDEBINFO) + elseif ("${VARIANT}" STREQUAL "debug") - set (CMAKE_BUILD_TYPE DEBUG) + set(CMAKE_BUILD_TYPE DEBUG) + elseif ("${VARIANT}" STREQUAL "release") - set (CMAKE_BUILD_TYPE RELEASE) -endif () + set(CMAKE_BUILD_TYPE RELEASE) + +endif() include_directories ( include diff --git a/Jamroot b/Jamroot index 7c140b8..8f13a5c 100644 --- a/Jamroot +++ b/Jamroot @@ -19,28 +19,12 @@ variant coverage "-lgcov" ; -variant asan +variant ubasan : release : - "-fsanitize=address -fno-omit-frame-pointer" - "-fsanitize=address" - ; - -variant msan - : - debug - : - "-fsanitize=memory -fno-omit-frame-pointer -fsanitize-memory-track-origins=2 -fsanitize-memory-use-after-dtor" - "-fsanitize=memory" - ; - -variant usan - : - debug - : - "-fsanitize=undefined -fno-omit-frame-pointer" - "-fsanitize=undefined" + "-funsigned-char -fno-omit-frame-pointer -fsanitize=address,undefined" + "-fsanitize=address,undefined" ; project nudb diff --git a/doc/example.qbk b/doc/example.qbk new file mode 100644 index 0000000..8f78826 --- /dev/null +++ b/doc/example.qbk @@ -0,0 +1,58 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:example Example] + +This complete program creates a database, opens the database, inserts several +key/value pairs, fetches the key/value pairs, closes the database, then erases +the database files. Source code for this program is located in the examples +directory. + +``` +#include +#include +#include + +int main() +{ + using namespace nudb; + std::size_t constexpr N = 1000; + using key_type = std::uint32_t; + error_code ec; + auto const dat_path = "db.dat"; + auto const key_path = "db.key"; + auto const log_path = "db.log"; + create( + dat_path, key_path, log_path, + 1, + make_salt(), + sizeof(key_type), + block_size("."), + 0.5f, + ec); + store db; + db.open(dat_path, key_path, log_path, + 16 * 1024 * 1024, ec); + char data = 0; + // Insert + for(key_type i = 0; i < N; ++i) + db.insert(&i, &data, sizeof(data), ec); + // Fetch + for(key_type i = 0; i < N; ++i) + db.fetch(&i, + [&](void const* buffer, std::size_t size) + { + // do something with buffer, size + }, ec); + db.close(ec); + erase_file(dat_path); + erase_file(key_path); + erase_file(log_path); +} +``` + +[endsect] diff --git a/doc/intro.qbk b/doc/intro.qbk new file mode 100644 index 0000000..4fc3f90 --- /dev/null +++ b/doc/intro.qbk @@ -0,0 +1,159 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:intro Introduction] + +NuDB is an append only, key/value store specifically optimized for sequential +write and random read performance on modern SSDs or equivalent high-IOPS devices. +It does not support deletion of data. Both the write/append, read performance +and memory usage are independent of the size of the database. +A common application for NuDB is storage where write/append performance is +important but where a key may be used to retrieve previously stored records. + +The design emphasizes two goals: simplicitly and robustness/recoverability. + +[heading Simplicity] + +This complete program demonstrates how easy the library is easy to use. It creates +a database, opens the database, inserts several key/value pairs, reads them back +and displays them. Source code for this program is located in the examples +directory. + +[import ../examples/example.cpp][simple_example] + +[variablelist Notes +[ + [(1) File names] + [ + A database is represented by three files: the data file, the key file, + and the log file. Each file has a distinct header in a well known format. + The data file holds all of the key/value pairs and is serially iterable. The + key file holds a hash table indexing all of the contents in the data file. + The log file holds information used to roll the database back in the event + of a failure. + ] +] +[ + [(2) Create a new database] + [ + The [link nudb.ref.nudb__create `create`] function creates a new data file and key + file for a database with the specified parameters. The caller specifies + the hash function to use as a template argument, the file paths, + and the database constants. + ] +] +[ + [(3) Open an existing database] + [ + The [link nudb.ref.nudb__basic_store.open `open`] function + prepares a database for insertion and/or reading. + ] +] +[ + [(4) Insert key/value pairs] + [ + Once a database is open, call the [link nudb.ref.nudb__basic_store.insert `insert`] + function to insert new key/value pairs. + If the key already exists, the error is set to + [link nudb.ref.nudb__error `error::key_exists`] + All keys in a NuDB database must be unique. Multiple threads can call + insert at the same time. Internally however, insertions are serialized to + present a consistent view of the database to callers. + ] +] +[ + [(5) Fetch a value given it's key] + [ + The function [link nudb.ref.nudb__basic_store.fetch `fetch`] takes a callback object + as a parameter which is invoked when the value is retrieved. If there is + no record in the database which corresponds to the key, the error code + returns a value of [link nudb.ref.nudb__error `error::key_not_found`] + ] +] +[ + [(6) Terminate access to the database] + [ + The function [link nudb.ref.nudb__basic_store.close `close`] frees all the resources + used by the database. + ] +] +] + +[heading Robustness] + +A second goal of the library is to create database as resistent to corruption +as possible. Ideally, this would guarantee that any time the insert function +successfully returns, the database is still readable an that the there is one +and only more record appended to the database. + +The enforcement of such a guarantee would depend on the implementation of +the interface to underlying file system. In practice this means judicious +usage of a "sync" function which most file systems provide in one form or +another. The common understanding of this "sync" function is that it guarantees +that all file operations on the files have been completed and that they are +all reflected in the files themselves. + +Unfortunately, it seems that most current file systems make no such +guarantee. See +[@ https://www.usenix.org/system/files/conference/osdi14/osdi14-paper-pillai.pdf +On the Complexity of Crafting Crash-Consistent Applications +]. + +In fact, the whole question regarding robustness turns out to be quite +complicated as evidenced by the discussion on ths +[@ http://boost.2283326.n4.nabble.com/NuDB-A-fast-key-value-insert-only-database-for-SSD-drives-in-C-11-tp4692735.html Boost Developers mailing list]. + +One simple way to investigate how this works on one's' own system is illustrated +by the example here. The first is a program named crash.cpp which opens an +existing database, appends one record and invokes `std::abort()`. This program +is meant to simulate a system failure such as a power outtage at the most +inconvenient time. + +[import ../examples/crash.cpp][crash] + +After running this program, the database would be in some unknown state. +Specifically, does the database contain the record for the name "george". +In fact, it's not even clear that the database is readable at all. + +This last program opens up the database and lists all it's members. The +includes a function named [link nudb.ref.nudb__recover `recover`] for +fixing errors in a corrupted database such as one which might be result +from a crashed environment. But it is not necessary to invoke it +explicitly as the normal [link nudb.ref.nudb__basic_store.open `open`] +includes that functionality. So we can just open the database and list +all the records counting on the libary implementation to detect any +corruption resulting from previous operations. + +Listing all the contents of the database invokes a user specified +function for each pair of key and value. Here is a simple program +which lists all the data in our sample. + +[import ../examples/recover.cpp][recover] + +Invoking this program on our test database produces the followning +output. +[pre +open successful +key: 123456789 +name: bob +key: 666666666 +name: alice +key: 987654321 +name: ted +key: 999999999 +name: carol +] + +which shows that on this particular platform (Mac OSX), return +from an insert operation does not guarantee that a record is +actually appended to the database. + +The library includes an test suite with more examples similar to the +above which can be used to get a better understanding of the utility +and robustness in one's own environment. + +[endsect] diff --git a/doc/main.qbk b/doc/main.qbk index a3a2493..d054f28 100644 --- a/doc/main.qbk +++ b/doc/main.qbk @@ -24,27 +24,49 @@ [template indexterm2[term1 term2] ''''''[term1]''''''[term2]''''''] [variablelist + [[ + [link nudb.intro Introduction] + ][ + A description of what the library does, and its target audience. + ]] + [[ [link nudb.overview Overview] ][ An overview of features, requirements, and credits, plus rationale and design information. ]] + [[ [link nudb.example Example] ][ An example that illustrates the use of NuDB. ]] + [[ [link nudb.usage Usage] ][ An explanation of operations on the database. ]] + + [[ + [link nudb.error_code Error Codes] + ][ + A treatment of the C++ error_code facility, and NuDB. + ]] + + [[ + [link nudb.tool Command Line Tool] + ][ + Instructions on using the nudb command line tool. + ]] + [[ [link nudb.ref Reference] ][ Detailed class and function reference. ]] + [[ [link nudb.index Index] ][ @@ -52,284 +74,12 @@ ]] ] -[section:overview Overview] - -NuDB is an append only, key/value store specifically optimized for random -read performance on modern SSDs or equivalent high-IOPS devices. The most -common application for NuDB is content addressible storage where a -cryptographic digest of the data is used as the key. The read performance -and memory usage are independent of the size of the database. These are -some other features: - -[heading History] - -The first versions of rippled, the application behind the Ripple consensus -network, used SQLite as their back end for unstructured data. The -performance quickly became a limiting factor. - -Then rippled then went through a series of back ends including LMDB, LevelDB, and -RocksDB. Each of these databases performed well at first, but as the data -size increased, memory usage increased and performance dropped off drastically. - -The problem is caching. Each of these databases relies on some O(n) data -structure, such as a Bloom filter, to improve their performance. These work -well until the structures no longer fit in memory. In addition, many virtual -machines are memory constrained. - -To address this issue, the developers performed a thought experiment -- if -you assume the data size is so large that no O(n) caching is effective, what -is the best read performance you could expect? They reached the following -conclusions: - -1) Writes should not block reads. -2) Reads should be limited only by the SSD's IOPS limit. -3) A read for a non-present key should require one IOP. -4) A read for a present key whose data can be read in a single IOP should -only require two IOPs, one to figure out where it is and one to read it in. - -NuDB is designed to come as close to this ideal as possible. - -[heading Design] - -NuDB uses three files to hold the data and indexes. The data file is append -only and contains sufficient information to rebuild the index. The index -file is random access and contains hash buckets. When an update is in -progress, a temporary journal file is used to roll the update back if -needed. - -NuDB uses linear hashing to dynamically increase the number of buckets in -the index file as the data size grows. Bucket overflows are handled by -adding "overflow" records to the data file. Bucket overflows can be -minimized by increasing the number of buckets, leading to a size/speed -tradeoff. Typical databases keep the average bucket half full (or half -empty, depending on your point of view) resulting in spill records -accounting for less than 1% of reads. - -Inserts are buffered in memory and appended to the data file immediately. -Updates to the index file are performed as an atomic operation. Fetch -operations retrieve records in the process of being modified from memory -during the update operation so that writes do not block fetches. - -Before the index file is modified, a journal file is created to recover -consistency in the event of a crash during the update. The recovery process -will index all records written to the data file, so the aggregation of index -updates does not increase the time which a crash would result in loss of -data. - -Iteration can be performed on the data file directly. Since it is append -only, there is no risk of other operations corrupting an iteration in -progress. - -[heading Performance] - -Writes do not block reads. Read rates are typically around 90% of the SSD's -IOPS limit. An average fetch for a non-present key typically requires fewer -than 1.01 IOPs. An average fetch for a present key requires fewer than 1.01 -IOPs plus however many IOPs it takes to read the data. - -[heading Applications] - -Content addressable storage associates data with its cryptographic digest. -This type of storage is commonly used in decentralized blockchain applications. - -Often these applications require following hash chains -- where one object -contains the hash of another object that ultimately leads to the object -desired. NuDB's low latency and high speed are particularly advantageous -in these kinds of applications. - -NuDB is append only and does not support a delete operation. To support -retaining limited historical information, NuDB is often used in a dual -database configuration. One database is older and is read only, the other -is newer and is read/write. Periodically, the older database is discarded and -the newer database becomes the new read only database and a new read/write -database is created. - -[endsect] - - - -[section:example Example] - -This complete program creates a database, opens the database, inserts several -key/value pairs, fetches the key/value pairs, closes the database, then erases -the database files. Source code for this program is located in the examples -directory. - -``` -#include -#include -#include - -int main() -{ - using namespace nudb; - std::size_t constexpr N = 1000; - using key_type = std::uint32_t; - error_code ec; - auto const dat_path = "db.dat"; - auto const key_path = "db.key"; - auto const log_path = "db.log"; - create( - dat_path, key_path, log_path, - 1, - make_salt(), - sizeof(key_type), - block_size("."), - 0.5f, - ec); - store db; - db.open(dat_path, key_path, log_path, - 16 * 1024 * 1024, ec); - char data = 0; - // Insert - for(key_type i = 0; i < N; ++i) - db.insert(&i, &data, sizeof(data), ec); - // Fetch - for(key_type i = 0; i < N; ++i) - db.fetch(&i, - [&](void const* buffer, std::size_t size) - { - // do something with buffer, size - }, ec); - db.close(ec); - erase_file(dat_path); - erase_file(key_path); - erase_file(log_path); -} -``` - -[endsect] - - - -[section:usage Usage] - -[heading Files] - -A database is represented by three files: the data file, the key file, -and the log file. Each file has a distinct header in a well known format. -The data file holds all of the key/value pairs and is serially iterable. The -key file holds a hash table indexing all of the contents in the data file. -The log file holds information used to roll the database back in the event -of a failure. - -[heading Create/Open] - -The [link nudb.ref.nudb__create create] function creates a new data file and key -file for a database with the specified parameters. The caller specifies -the hash function to use as a template argument, the file paths, -and the database constants: - -[note - Sample code and identifiers mentioned in this section are written - as if the following declarations are in effect: - ``` - #include - using namespace nudb; - error_code ec; - ``` -] - -``` -create( - "nudb.dat", // Path to data file - "nudb.key", // Path to key file - "nudb.log", // Path to log file - 1, // Application-defined constant - make_salt(), // A random integer - 4, // The size of keys - block_size(".") // Block size in key file - 0.5f // The load factor - ec); -``` - -The application-defined constant is a 64-bit unsigned integer which the -caller may set to any value. This value can be retrieved from an open -database, where it will be equal to the value used at creation time. This -constant can be used for any purpose. For example, to inform the application -of what application-specific version was used to create the database. - -The salt is a 64-bit unsigned integer used to prevent algorithmic complexity -attacks. Hash functions used during database operations are constructed with -the salt, providing an opportunity to permute the hash function. This feature -is useful when inserted database keys come from untrusted sources, such as the -network. - -The key size is specified when the database is created, and cannot be changed. -All key files indexing the same data file will use the key size of the data -file. - -The block size indicates the size of buckets in the key file. The best choice -for the block size is the natural sector size of the device. For most SSDs -in production today this is 4096, or less often 8192 or 16384. The function -[link nudb.ref.nudb__block_size block_size] returns the best guess of the block -size used by the device mounted at the specified path. - -The load factor determines the target bucket occupancy fraction. There is -almost never a need to specify anything other than the recommended value of -0.5, which strikes the perfect balance of space-efficiency and fast lookup. - -An open database is represented by objects of type -[link nudb.ref.nudb__basic_store basic_store], templated on the hasher. The type -alias [link nudb.ref.nudb__store store] represents a database using -[link nudb.ref.nudb__xxhasher xxhasher], the default hash function. To open -a database, declare a database object and then call the -[link nudb.ref.nudb__basic_store.open open] member function: - -``` -store db; -db.open("nudb.dat", "nudb.key", "nudb.log", ec); -``` - -When opening a database that was previously opened by a program that was -terminated abnormally, the implementation automatically invokes the -recovery process. This process restores the integrity of the database by -replaying the log file if it is present. - -[heading Insert/Fetch] - -Once a database is open, it becomes possible to insert new key/value pairs -and look them up. Insertions are straightforward: - -``` -db.insert(key, data, bytes, ec); -``` - -If the key already exists, the error is set to -[link nudb.ref.nudb__error.key_exists error::key_exists]. All keys in a NuDB -database must be unique. Multiple threads can call insert at the same time. -Internally however, insertions are serialized to present a consistent view -of the database to callers. - -Retrieving a key/value pair if it exists is similarly straightforward: - -``` -db.fetch(key, - [&](void const* buffer, std::size_t size) - { - ... - }, ec); -``` - -To give callers control over memory allocation strategies, the fetch -function takes a callback object as a parameter. The callback is invoked -with a pointer to the data and size, if the item exists in the database. -The callback can decide how to store this information, if at all. - -[endsect] - -[section Command Line Tool] - -To allow administration, NuDB comes with the "nudb" command line tool, -which may be built using b2 or CMake. Files for the tool are located in -the "tools" directory. Once the tool is built, and located in your path, -execute this command for additional instructions: - -``` -nudb help -``` - -[endsect] +[include intro.qbk] +[include overview.qbk] +[include example.qbk] +[include usage.qbk] +[include types/error_code.qbk] +[include tool.qbk] [section:ref Reference] [xinclude quickref.xml] diff --git a/doc/makeqbk.sh b/doc/makeqbk.sh old mode 100644 new mode 100755 index 3da8c07..9cda696 --- a/doc/makeqbk.sh +++ b/doc/makeqbk.sh @@ -1,4 +1,4 @@ -#!/usr/bin/bash +#!/bin/bash # Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) # diff --git a/doc/overview.qbk b/doc/overview.qbk new file mode 100644 index 0000000..6ea81b8 --- /dev/null +++ b/doc/overview.qbk @@ -0,0 +1,100 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:overview Overview] + +NuDB is an append only, key/value store specifically optimized for random +read performance on modern SSDs or equivalent high-IOPS devices. The most +common application for NuDB is content addressible storage where a +cryptographic digest of the data is used as the key. The read performance +and memory usage are independent of the size of the database. These are +some other features: + +[heading History] + +The first versions of rippled, the application behind the Ripple consensus +network, used SQLite as their back end for unstructured data. The +performance quickly became a limiting factor. + +Then rippled then went through a series of back ends including LMDB, LevelDB, and +RocksDB. Each of these databases performed well at first, but as the data +size increased, memory usage increased and performance dropped off drastically. + +The problem is caching. Each of these databases relies on some O(n) data +structure, such as a Bloom filter, to improve their performance. These work +well until the structures no longer fit in memory. In addition, many virtual +machines are memory constrained. + +To address this issue, the developers performed a thought experiment -- if +you assume the data size is so large that no O(n) caching is effective, what +is the best read performance you could expect? They reached the following +conclusions: + +1) Writes should not block reads. +2) Reads should be limited only by the SSD's IOPS limit. +3) A read for a non-present key should require one IOP. +4) A read for a present key whose data can be read in a single IOP should +only require two IOPs, one to figure out where it is and one to read it in. + +NuDB is designed to come as close to this ideal as possible. + +[heading Design] + +NuDB uses three files to hold the data and indexes. The data file is append +only and contains sufficient information to rebuild the index. The index +file is random access and contains hash buckets. When an update is in +progress, a temporary journal file is used to roll the update back if +needed. + +NuDB uses linear hashing to dynamically increase the number of buckets in +the index file as the data size grows. Bucket overflows are handled by +adding "overflow" records to the data file. Bucket overflows can be +minimized by increasing the number of buckets, leading to a size/speed +tradeoff. Typical databases keep the average bucket half full (or half +empty, depending on your point of view) resulting in spill records +accounting for less than 1% of reads. + +Inserts are buffered in memory and appended to the data file immediately. +Updates to the index file are performed as an atomic operation. Fetch +operations retrieve records in the process of being modified from memory +during the update operation so that writes do not block fetches. + +Before the index file is modified, a journal file is created to recover +consistency in the event of a crash during the update. The recovery process +will index all records written to the data file, so the aggregation of index +updates does not increase the time which a crash would result in loss of +data. + +Iteration can be performed on the data file directly. Since it is append +only, there is no risk of other operations corrupting an iteration in +progress. + +[heading Performance] + +Writes do not block reads. Read rates are typically around 90% of the SSD's +IOPS limit. An average fetch for a non-present key typically requires fewer +than 1.01 IOPs. An average fetch for a present key requires fewer than 1.01 +IOPs plus however many IOPs it takes to read the data. + +[heading Applications] + +Content addressable storage associates data with its cryptographic digest. +This type of storage is commonly used in decentralized blockchain applications. + +Often these applications require following hash chains -- where one object +contains the hash of another object that ultimately leads to the object +desired. NuDB's low latency and high speed are particularly advantageous +in these kinds of applications. + +NuDB is append only and does not support a delete operation. To support +retaining limited historical information, NuDB is often used in a dual +database configuration. One database is older and is read only, the other +is newer and is read/write. Periodically, the older database is discarded and +the newer database becomes the new read only database and a new read/write +database is created. + +[endsect] diff --git a/doc/tool.qbk b/doc/tool.qbk new file mode 100644 index 0000000..5afde00 --- /dev/null +++ b/doc/tool.qbk @@ -0,0 +1,19 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:tool Command Line Tool] + +To allow administration, NuDB comes with the "nudb" command line tool, +which may be built using b2 or CMake. Files for the tool are located in +the "tools" directory. Once the tool is built, and located in your path, +execute this command for additional instructions: + +``` +nudb help +``` + +[endsect] diff --git a/doc/types/File.qbk b/doc/types/File.qbk index f36259c..526ea5b 100644 --- a/doc/types/File.qbk +++ b/doc/types/File.qbk @@ -8,130 +8,124 @@ [section:File File] The [*File] concept abstracts access to files in the underlying file system. -Two implementations are provided, one for the Win32 API and the other for -POSIX compliant systems. The [link nudb.ref.nudb__native_file native_file] type -alias is automatically set to either [link nudb.ref.nudb__win32_file win32_file] -or [link nudb.ref.nudb__posix_file posix_file] as appropriate. To support interfaces other than Win32 or POSIX, callers may provide their -own [*File] type that meets these requirements. The unit test code also provides -its own [*File] type which causes simulated operating system file failures -to exercise all failure paths in the implementation. +own [*File] type that meets these requirements. -In the table below: +[heading Associated Types] +* [link nudb.ref.nudb__path_type path_type] +* [link nudb.ref.nudb__file_mode file_mode] +* [link nudb.ref.nudb__error_code error_code] -* `X` denotes a [*File] type -* `a` and `b` denote values of type `X` -* `c` denotes a (possibly const) value of type `X` -* `m` denotes a value of type [link nudb.ref.nudb__file_mode file_mode] -* `f` denotes a value of type [link nudb.ref.nudb__path_type path_type] -* `q` denotes a value of type `void*` -* `p` denotes a value of type `void const*` -* `ec` denotes a value of type [link nudb.ref.nudb__error_code error_code] +[heading Notation] +* `F` a [*File] type +* `f` an instance of type F +* `p` an instance of type [link nudb.ref.nudb__path_type path_type] +* `m` an instance of the type [link nudb.ref.nudb__file_mode file_mode] +* `ec` an instance of a reference to the type + [link nudb.ref.nudb__error_code error_code] +* `n` a number of bytes. Type is `std::size_t` +* 'o' an offset from the beginning of a file. Type is `std::size_t` +* 'a' a memory address of type void * -* `o` denotes a value of type `std::uint64_t` -* `n` denotes a value of type `std::size_t` - -[table File requirements -[[operation] [type] [semantics, pre/post-conditions]] +[heading File Requirements] +[table Valid Expressions +[[Operation] [Return Type] [Semantics, Pre/Post-conditions]] +[ + [`F()`] + [ ] + [ + Default constructable + ] +] [ - [`X a{std::move(b)}`] + [`F(f)`] [ ] [ - `X` is `MoveConstructible` + Move constructable. Note NOT copy constructable. ] ] + [ - [`c.is_open()`] + [`f.is_open()`] [`bool`] [ - Returns `true` if `c` refers to an open file. + Returns `true` if `f` refers to an open file, false otherwise. ] ] [ - [`a.close()`] - [ ] + [`f.close()`] + [] [ - If `a` refers to an open file, closes the file. Does nothing if - `a` does not refer to an open file. After this call, `a.open()` - will return `false`. + If `f` refers to an open file, closes the file. Otherwise, does + nothing. After this call, `f.is_open()` will return `false`. ] ] [ - [`a.create(m,f,ec)`] - [ ] + [`f.create(m,p,ec)`] + [] [ - Attempts to create a file at the path specified by `f`, and + Attempts to create a file at the path specified by `p`, and open it with the mode specified by `m`. If an error occurs, `ec` is set to the system specific error code. If no error - occurs, a subsequent call to `a.is_open()` will return `true`. - Undefined behavior if `a` already refers to an open file. + occurs, a subsequent call to `f.is_open()` will return `true`. + Behavior is undefined if `f` already refers to an open file. ] ] [ - [`a.open(m,f,ec)`] - [ ] + [`f.open(m,p,ec)`] + [] [ - Attempts to open the file at the path specified by `f`. If + Attempts to open the file at the path specified by `p`. If an error occurs, `ec` is set to the system specific error - code. If no error occurs, a subsequent call to `a.is_open()` - will return `true`. Undefined behavior if `a` already refers - to an open file. + code. If no error occurs, a subsequent call to `f.is_open()` + will return `true`. Behavior is undefined if `f` already + refers to an open file. ] ] [ - [`X::erase(f,ec)`] - [ ] + [`F::erase(p,ec)`] + [] [ - Attempts to delete the file at the path specified by `f`. + Attempts to delete the file at the path specified by `p`. If an error occurs, `ec` is set to the system specific error code. ] ] [ - [`c.size(ec)`] - [ `std::uint64_t` ] + [`f.size(ec)`] + [`std::size_t`] [ Returns the size of the file in bytes. This value is also equal to lowest byte offset for which a read will always return a - [link nudb.ref.nudb__error short_read] error. Undefined - behavior if `a` does not refer to an open file. + [link nudb.ref.nudb__error short_read] error. Behavior is undefined + if `f` does not refer to an open file. ] ] [ - [`a.read(o,p,n,ec)`] - [ ] + [`f.read(o,a,n,ec)`] + [] [ - Attempts to read `n` bytes from the open file referred to by `a`, - starting at offset `o`, and storing the results in the memory - pointed to by `p`, which must be at least of size `n` bytes. - If an error occurs, `ec` is set to the system specific error - code. Undefined behavior if `a` does not refer to an open file. + Attempts to read `n` bytes starting at file offset `o` + from the open file referred to by `f`. Bytes read are stored + in the memory buffer at address 'a'. The memory buffer at 'a' + must be at least n bytes long. If an error occurs, `ec` is set + to the system specific error code. Behavior is undefined if `f` + does not refer file opened with [link nudb.ref.nudb__file_mode file_mode] + equal to scan or read. ] ] [ - [`a.write(o,q,n,ec)`] - [ ] - [ - Attempts to write `n` bytes to the open file referred to by `a` - and opened with a write mode, starting at offset `o`, and storing - the results in the memory pointed to by `p`, which must be at - least of size `n` bytes. If an error occurs, `ec` is set to the - system specific error code. Undefined behavior if `a` does not - refer to an open file. - ] -] -[ - [`a.sync(ec)`] - [ ] + [`f.sync(ec)`] + [] [ Attempts to synchronize the file on disk. This instructs the operating system to ensure that any data which resides in caches or buffers is fully written to the underlying storage device before this call returns. If an error occurs, `ec` is set to the - system specific error code. Undefined behavior if `a` does not - refer to an open file. + system specific error code. Behavior is undefined if + `f` does not refer to an open file. NuDB's database integrity guarantees are only valid if the implementation of `sync` assures that all data is fully written @@ -139,14 +133,26 @@ In the table below: ] ] [ - [`a.trunc(o,ec)`] - [ ] + [`f.write(o,a,n,ec)`] + [] + [ + Attempts to write `n` bytes starting at file offset `o` + to the open file referred to by `f`. The memory buffer at + `a` should contain the `n` bytes meant to be copied to the file. + If an error occurs, `ec` is set to the system specific error code. + Behavior is undefined if `f` does not refer a file opened with + [link nudb.ref.nudb__file_mode file_mode] equal to write or append. + ] +] +[ + [`f.trunc(o,ec)`] + [] [ - Attempts to change the size of the open file referred to by `a` - and opened with a write mode, to the size in bytes specified - by `o`. If an error occurs, `ec` is set to the system specific - error code. Undefined behavior if `a` does not refer to an open - file. After a successful call, `a.size(ec)` will return `o`. + Attempts to change the size of the open file referred to by `f` + to the size in bytes specified by `o`. If an error occurs, `ec` + is set to the system specific error code. Behavior is undefined if + `f` does not refer to an open file. After a successful call, + `a.size(ec)` will return `o`. NuDB's database integrity guarantees are only valid if the implementation of `trunc` assures that subsequent calls to @@ -154,6 +160,25 @@ In the table below: device is taken offline before calling `size`. ] ] +[ + [`is_File`] + [`std::true_type`] + [ + An alias for `std::true_type` for `F`. Otherwise equivalent + to `std::false_type`. + ] +] ] +[heading Associated Functions] +* [link nudb.ref.nudb__block_size block_size] +* [link nudb.ref.nudb__erase_file erase_file] + +[heading Models] +* [link nudb.ref.nudb__posix_file posix_file] +* [link nudb.ref.nudb__win32_file win32_file] +* [link nudb.ref.nudb__native_file native_file] + +native file is just an alias which selects one of the previous two depending on the operating system on which the code is compiled. + [endsect] diff --git a/doc/types/Hasher.qbk b/doc/types/Hasher.qbk index e80955b..90c334a 100644 --- a/doc/types/Hasher.qbk +++ b/doc/types/Hasher.qbk @@ -7,37 +7,44 @@ [section:Hasher Hasher] -A [@Hasher] implements a hash algorithm. This is used to compute the small -digests NuDB needs to effectively implement a hash table. NuDB provides -the default implementation [link nudb.ref.nudb__xxhasher xxhasher], which is -suitable for most use cases. For advanced applications, a user supplied -hash function may be supplied which must meet these requirements. +A type [@Hasher] implements a hash algorithm. This is used to compute the small +digests NuDB needs to effectively implement a hash table. -In the table below: +The NuDB database stores and retrieves key/value pairs. This storage is +organized as "buckets" where each bucket forms the start of a linked list. +Given a key, one must caluculate which bucket it the key should be +added to. The library includes code to guarantee that any set of +keys will map to a more or less even distribution of buckets, thus +ensuring that the array of buckets is utilized efficiently. A key +element in this code is a call to the Hasher function. This is why +the Hasher function sould be designed to generate a uniform +distribution of integers for any random collection of keys. -* `X` denotes a hasher class -* `a` denotes a value of type `X const` -* `s` denotes a value of type `std::uint64_t` +[heading Notation] +* `H` is a hasher class +* `h` is an instance of type `H` +* `s` is a random seed of type `std::uint64_t` * `p` denotes a value of type `void const*` * `n` denotes a value of type `std::size_t` -[table Hasher requirements +[heading Hasher Requirements] +[table Valid Expressions [[operation] [type] [semantics, pre/post-conditions]] [ - [`X a{s}`] - [ ] + [`H{s}`] + [`H`] [ - `a` is constructed with a seed value integer. To achieve resistance + `H` is constructed with a seed value integer `s`. To achieve resistance from algorithmic complexity attacks, an implementation of [*Hasher] should ensure that values returned from the hash function will be - distinctly different for different values of `s` given the same - inputs. If algorithmic complexity attack resistance is not a - requirement, the seed may be ignored upon construction. + distinctly different for different values of `s`. If algorithmic + complexity attack resistance is not a requirement, the seed may be + ignored upon construction. ] ] [ - [`a(p,n)`] - [ `std::uint64_t` ] + [`h(p,n)`] + [`std::uint64_t`] [ Returns the digest of the memory `n` bytes in size and pointed to by `p`. `n` will never be zero. A good hash function will @@ -51,6 +58,21 @@ In the table below: ] ] +[ + [`is_Hasher`] + [`std::true_type`] + [ + An alias for `std::true_type` for `H`. Otherwise equivalent + to `std::false_type`. + ] ] +] + +[heading Models] +* [link nudb.ref.nudb__xxhasher xxhasher] + +NuDB provides the default implementation , which is +suitable for most use cases. For advanced applications, users may +supply their own implementation which fulfills the above requirements. -[endsect] +[endsect] \ No newline at end of file diff --git a/doc/types/Progress.qbk b/doc/types/Progress.qbk index 52dafe2..641d16e 100644 --- a/doc/types/Progress.qbk +++ b/doc/types/Progress.qbk @@ -7,34 +7,48 @@ [section:Progress Progress] -A [*Progress] object provides feedback to callers on the progress of -long running operations such as calls to [link nudb.ref.nudb__verify verify] or -[link nudb.ref.nudb__rekey rekey] which can take days or weeks for databases that -measure in the terabytes. These objects are used by passing them as parameters -to the appropriate functions, where the will be called periodically with -numbers that indicate the amount of work completed, versus the total amount -of work required. +An instance of type [*Progress] feedback to callers on the progress of +long running operations such as calls to [link nudb.ref.nudb__verify verify], +[link nudb.ref.nudb__visit visit] or [link nudb.ref.nudb__rekey rekey] +which can take days or weeks for databases that measure in the terabytes. +These objects are used by passing them as parameters to the appropriate functions, +where the will be called periodically with numbers that indicate the amount of +work completed, versus the total amount of work required. -In the table below: +[heading Notation] +* `P` is a progress class +* `p` is an instance of type `P` +* `n` is a value of type `std::uint64_t` representng numerator of the fraction of work done. +* `d` is a value of type `std::uint64_t` representng denominator of the fraction of work done. -* `X` denotes a progress class -* `a` denotes a value of type `X` -* `p` and `q` denote values of type `std::uint64_t` - -[table Progress requirements +[heading Progress Requirements] +[table Valid Expressions [[operation] [type] [semantics, pre/post-conditions]] [ - [`a(p, q)`] + [`p(n, d)`] [ ] [ Indicates to the progress object that work has been performed and - intermediate results calculated. `p` represents the amount of work - completed from the beginning of the operation. `q` represents the + intermediate results calculated. `n` represents the amount of work + completed from the beginning of the operation. `d` represents the total amount of work required. The fraction of completed work is - therefore `p/q`, with zero representing no work complete, and one - represents all work complete. `p` and `q` are unitless. + therefore `n/d`, with zero representing no work complete, and one + represents all work complete. ] ] +[ + [`is_Progress

`] + [`std::true_type`] + [ + An alias for `std::true_type` for `P`. Otherwise equivalent + to `std::false_type` + ] ] +] + +[heading Models] +* [link nudb.ref.nudb__no_progress no_progress] + +This is a default implementation which does nothing. -[endsect] +[endsect] \ No newline at end of file diff --git a/doc/types/error_code.qbk b/doc/types/error_code.qbk new file mode 100644 index 0000000..20c4ddc --- /dev/null +++ b/doc/types/error_code.qbk @@ -0,0 +1,138 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] +[/ +[section:error_code [@http://www.boost.orglibs/system/doc/reference.html#Class-error_code] `boost::system::error_code`] +] +[section:error_code error_code] +Invoking functions in this library can result in errors. Such errors might +be detected by the functions of this library or be detected by other components +that this library depends upon such as the C++ standard library or underlying +operating system. So all functions in this library return an error code of type +`boost::system::error_code`. This type is capable of holding error codes +returned from any possible source. Given this, it's not surprising that it's +somewhat more complex than a simple integer value. Full description is beyond +the scope of this documentation, but fortunately there are various sources which +together provide a good explanation of how to use it. +[variablelist + [[ + [@http://www.boost.org/libs/system/doc/index.html + Boost System Library] + ][ + This is the official documentation for the Boost System Library which + includes the description of boost::system::error_code used by this library + ]] + [[ + [@http://en.cppreference.com/w/cpp/error + C++ Standard Library version] + ][ + The Boost System Library has been incorporated into the standard library + as part of the C++ standard error handling utilities. Except for the + substitution of std:: for boost::system:: namespace, the libraries are + identical. This link points to the standard library documentation which + may be used in addition to the boost version. + ]] + [[ + [@http://blog.think-async.com/2010/04/system-error-support-in-c0x-part-1.html + Thinking Asyncronously in C++] + ][ + Another essential reference on the design and usage of the `error_code`. + ]] +] +[heading Associated Types] +[itemized_list +[ + [@http://www.boost.org/libs/system/doc/reference.html#Header-error_code `boost::system::error::ercc`] + list of values which might be used as values by + [@http://www.boost.org/libs/system/doc/reference.html#Class-error_code `boost::system::error_code`]. +] +[ + [link nudb.ref.nudb__error error] list of values which might be used as values NuDB library functions. +] +[ + [@http://www.boost.org/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error`] +] +] +Any NuDB function may return `error_code` values from either of the above sources. +So error_codes should be checked against both lists of error codes used. + +[heading Notation] +[itemized_list + [`ec` an instance of a reference to the type `boost::system::error_code`.] + [ a value listed in either `boost::system::errc` or `nudb::error`. + [`se` an instance of the type + [@http://www.boost.org/libs/system/doc/reference.html#Class-system_error `boost::system::error::system_error` + ]. + ] +] + +] +[heading Valid Expressions] +[table + [[Operation] [Return Type] [Description]] + [ + [[@http://www.boost.orglibs/system/doc/reference.html#Class-error_code-observers `ec.message()`]] + [`std::string`] + [ + Return an explanation of the error code as a string + ] + ] + [ + [[@http://www.boost.orglibs/system/doc/reference.html#Class-system_error `boost::system::error::system_error(ec)`]] + [`se`] + [ + Construct a standard exception type. The result can be the object + of a C++ "throw" statement. + ] + ] + [ + [`ec == `] + [bool] + [ + Compare an error_code with an error enum. + ] + ] + [ + [`ec != `] + [bool] + [ + Compare an error_code with an error enum. + ] + ] + [ + [`ec`] + [bool] + [ + Returns `true` if there is an error, `false` otherwise. + ] + ] +] + +[heading Header] +[@http://www.boost.orglibs/system/doc/reference.html#Header-error_code `#include `] + +[@../../include/nudb/error.hpp `#include `] + +[heading Example] +`` +#include +#include +// ... +boost::system::error_code ec; +nudb::store db; +db.open("db.dat", "db.key", "db.log", ec); +if(!ec) + return; // success ! +if(ec == nudb::error::no_key_file) + std::cerr << "key file not found" << std::endl; +else +if(ec == boost::system::errc::filename_too_long) + std::cerr << "file name too long" << std::endl; +else + std::cerr << ec.message() << std::endl; +return; // failure ! +`` + +[endsect] diff --git a/doc/usage.qbk b/doc/usage.qbk new file mode 100644 index 0000000..be6fe9c --- /dev/null +++ b/doc/usage.qbk @@ -0,0 +1,122 @@ +[/ + Copyright (c) 2013-2016 Vinnie Falco (vinnie dot falco at gmail dot com) + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +] + +[section:usage Usage] + +[heading Files] + +A database is represented by three files: the data file, the key file, +and the log file. Each file has a distinct header in a well known format. +The data file holds all of the key/value pairs and is serially iterable. The +key file holds a hash table indexing all of the contents in the data file. +The log file holds information used to roll the database back in the event +of a failure. + +[heading Create/Open] + +The [link nudb.ref.nudb__create create] function creates a new data file and key +file for a database with the specified parameters. The caller specifies +the hash function to use as a template argument, the file paths, +and the database constants: + +[note + Sample code and identifiers mentioned in this section are written + as if the following declarations are in effect: + ``` + #include + using namespace nudb; + error_code ec; + ``` +] + +``` +create( + "nudb.dat", // Path to data file + "nudb.key", // Path to key file + "nudb.log", // Path to log file + 1, // Application-defined constant + make_salt(), // A random integer + 4, // The size of keys + block_size(".") // Block size in key file + 0.5f // The load factor + ec); +``` + +The application-defined constant is a 64-bit unsigned integer which the +caller may set to any value. This value can be retrieved from an open +database, where it will be equal to the value used at creation time. This +constant can be used for any purpose. For example, to inform the application +of what application-specific version was used to create the database. + +The salt is a 64-bit unsigned integer used to prevent algorithmic complexity +attacks. Hash functions used during database operations are constructed with +the salt, providing an opportunity to permute the hash function. This feature +is useful when inserted database keys come from untrusted sources, such as the +network. + +The key size is specified when the database is created, and cannot be changed. +All key files indexing the same data file will use the key size of the data +file. + +The block size indicates the size of buckets in the key file. The best choice +for the block size is the natural sector size of the device. For most SSDs +in production today this is 4096, or less often 8192 or 16384. The function +[link nudb.ref.nudb__block_size block_size] returns the best guess of the block +size used by the device mounted at the specified path. + +The load factor determines the target bucket occupancy fraction. There is +almost never a need to specify anything other than the recommended value of +0.5, which strikes the perfect balance of space-efficiency and fast lookup. + +An open database is represented by objects of type +[link nudb.ref.nudb__basic_store basic_store], templated on the hasher. The type +alias [link nudb.ref.nudb__store store] represents a database using +[link nudb.ref.nudb__xxhasher xxhasher], the default hash function. To open +a database, declare a database object and then call the +[link nudb.ref.nudb__basic_store.open open] member function: + +``` +store db; +db.open("nudb.dat", "nudb.key", "nudb.log", ec); +``` + +When opening a database that was previously opened by a program that was +terminated abnormally, the implementation automatically invokes the +recovery process. This process restores the integrity of the database by +replaying the log file if it is present. + +[heading Insert/Fetch] + +Once a database is open, it becomes possible to insert new key/value pairs +and look them up. Insertions are straightforward: + +``` +db.insert(key, data, bytes, ec); +``` + +If the key already exists, the error is set to +[link nudb.ref.nudb__error.key_exists error::key_exists]. All keys in a NuDB +database must be unique. Multiple threads can call insert at the same time. +Internally however, insertions are serialized to present a consistent view +of the database to callers. + +Retrieving a key/value pair if it exists is similarly straightforward: + +``` +db.fetch(key, + [&](void const* buffer, std::size_t size) + { + ... + }, ec); +``` + +To give callers control over memory allocation strategies, the fetch +function takes a callback object as a parameter. The callback is invoked +with a pointer to the data and size, if the item exists in the database. +The callback can decide how to store this information, if at all. + +[endsect] diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt index ab7e100..93cd446 100644 --- a/examples/CMakeLists.txt +++ b/examples/CMakeLists.txt @@ -4,6 +4,18 @@ GroupSources (include/nudb nudb) GroupSources (extras/nudb extras) GroupSources (examples/ "/") +add_executable (crash + ${NUDB_INCLUDES} + ${EXTRAS_INCLUDES} + crash.cpp +) + +if (WIN32) + target_link_libraries (crash ${Boost_LIBRARIES}) +else () + target_link_libraries (crash ${Boost_LIBRARIES} rt Threads::Threads) +endif () + add_executable (example ${NUDB_INCLUDES} ${EXTRAS_INCLUDES} @@ -15,3 +27,15 @@ if (WIN32) else () target_link_libraries (example ${Boost_LIBRARIES} rt Threads::Threads) endif () + +add_executable (recover + ${NUDB_INCLUDES} + ${EXTRAS_INCLUDES} + recover.cpp +) + +if (WIN32) + target_link_libraries (recover ${Boost_LIBRARIES}) +else () + target_link_libraries (recover ${Boost_LIBRARIES} rt Threads::Threads) +endif () diff --git a/examples/Jamfile b/examples/Jamfile index d165cc9..977b4c3 100644 --- a/examples/Jamfile +++ b/examples/Jamfile @@ -7,6 +7,14 @@ import os ; +exe crash : + crash.cpp + ; + exe example : example.cpp ; + +exe recover : + recover.cpp + ; diff --git a/examples/crash.cpp b/examples/crash.cpp new file mode 100644 index 0000000..647a526 --- /dev/null +++ b/examples/crash.cpp @@ -0,0 +1,45 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +//[ crash + +// simulates system crash while in use. + +#include + +#include // store + +#include +#include // std::uint32_t, std::uint64_t +#include // std::abort + +int main(){ + boost::system::error_code ec; + + // key type for this example + using ssn = std::uint64_t; + + // Open an existing database + nudb::store db; + db.open("db.dat", "db.key", "db.log", ec); + if(ec){ + std::cerr << "open failed: " << ec.message() << std::endl; + return 1; + } + std::cerr << "open successful" << '\n'; + + ssn key = 777777777L; + const char * name = "george"; + + // insert a ssn/name pair + db.insert(& key, name, std::strlen(name), ec); + + // simulate a crash + std::abort(); + + return 1; +} +//] diff --git a/examples/example.cpp b/examples/example.cpp index c9a9696..85a3446 100644 --- a/examples/example.cpp +++ b/examples/example.cpp @@ -4,43 +4,106 @@ // Distributed under the Boost Software License, Version 1.0. (See accompanying // file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) // +//[ simple_example +#include -#include -#include -#include - -int main() -{ - using namespace nudb; - std::size_t constexpr N = 1000; - using key_type = std::uint32_t; - error_code ec; - auto const dat_path = "db.dat"; - auto const key_path = "db.key"; - auto const log_path = "db.log"; - create( - dat_path, key_path, log_path, - 1, - make_salt(), - sizeof(key_type), - block_size("."), - 0.5f, - ec); - store db; +#include // xxhasher +#include // create +#include // store +#include // path_type + +#include +#include // uint32_t, uint64_t +#include // pair +#include // string +#include // fill, copy_n, min +#include + +int main(){ + // error code returned by NuDb operations + boost::system::error_code ec; + + // key type for this example - a social security number + using ssn = std::uint64_t; + + // (1) File Names + const nudb::path_type dat_path = "db.dat"; + const nudb::path_type key_path = "db.key"; + const nudb::path_type log_path = "db.log"; + + // (2) Create a new database + // given names of data, key and log files + nudb::create( + dat_path, // path name of data file + key_path, // path name of key file + log_path, // path name of log file + 1, // application number + nudb::make_salt(), // random seed + sizeof(ssn), + nudb::block_size("."), // block size of current directory + 0.5f, // load factor + ec // reference to return code + ); + if(ec){ + std::cerr << "creation failed:" << ec.message() << std::endl; + return 1; + } + std::cerr << "creation successful" << '\n'; + + // (3) Open an existing database + nudb::store db; db.open(dat_path, key_path, log_path, ec); - char data = 0; - // Insert - for(key_type i = 0; i < N; ++i) - db.insert(&i, &data, sizeof(data), ec); - // Fetch - for(key_type i = 0; i < N; ++i) - db.fetch(&i, - [&](void const* buffer, std::size_t size) - { - // do something with buffer, size - }, ec); + if(ec){ + std::cerr << "open failed:" << ec.message() << std::endl; + return 1; + } + std::cerr << "open successful" << '\n'; + + const std::pair input_data[] = { + {123456789L, "bob"}, + {999999999L, "carol"}, + {987654321L, "ted"}, + {666666666L, "alice"} + }; + + // (4) Insert key/value pairs + // insert ssn/name pairs + for(const auto & p : input_data){ + db.insert(& p.first, p.second, std::strlen(p.second), ec); + if(ec){ + std::cerr << "insertion failed:" << ec.message() << std::endl; + return 1; + } + } + std::cerr << "inserted 4 records" << '\n'; + + // (5) Fetch a value given it's key + // get carol's address + ssn key = 999999999L; + std::string address; + db.fetch( + & key, + [&](void const * buffer, std::size_t size){ + address = std::string(static_cast(buffer), size); + }, + ec + ); + if(ec){ + std::cerr << "fetch failed:" << ec.message() << std::endl; + return 1; + } + std::cerr + << "given ssn=" << key << ", " + << "retrieved " << address << '\n'; + + // (6) Terminate access to the database db.close(ec); - erase_file(dat_path); - erase_file(key_path); - erase_file(log_path); + if(ec){ + std::cerr << "close failed:" << ec.message() << std::endl; + return 1; + } + std::cerr << "close successful" << '\n'; + + return 0; } +//] diff --git a/examples/recover.cpp b/examples/recover.cpp new file mode 100644 index 0000000..1bbecf0 --- /dev/null +++ b/examples/recover.cpp @@ -0,0 +1,68 @@ +// +// Copyright (c) 2015-2016 Vinnie Falco (vinnie dot falco at gmail dot com) +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +// +//[ recover +#include + +#include // xxhasher +#include // create +#include // store +#include +#include + +#include +#include // std::uint32_t, std::uint64_t + +int main(){ + boost::system::error_code ec; + + // key type for this example + using ssn = std::uint64_t; + + // Open an existing database + nudb::store db; + db.open("db.dat", "db.key", "db.log", ec); + if(ec){ + std::cerr << "open failed: " << ec.message() << std::endl; + return 1; + } + std::cerr << "open successful" << '\n'; + + nudb::visit( + "db.dat", + [&](// called with each item found in the data file + void const* key, // A pointer to the item key + std::size_t key_size, // The size of the key (always the same) + void const* data, // A pointer to the item data + std::size_t data_size, // The size of the item data + boost::system::error_code& ec // Indicates an error (out parameter) + ){ + if(ec){ + std::cerr << "visit failed: " << ec.message() << std::endl; + return std::terminate(); + } + std::cerr + << "key: " << * static_cast(key) << '\n' + << "name: " << + std::string(static_cast(data), data_size) << '\n' + ; + }, + [&](// called to indicate progress of visitation + std::uint64_t amount, // Amount of work done so far + std::uint64_t total // Total amount of work to do + ){ + // we ignore this information in this example + }, + ec // result of visit operation + ); + if(ec){ + std::cerr << "visit failed: " << ec.message() << std::endl; + return 1; + } + return 0; +} + +//] diff --git a/extras/beast b/extras/beast index 2f9a844..823aee2 160000 --- a/extras/beast +++ b/extras/beast @@ -1 +1 @@ -Subproject commit 2f9a8440c2432d8a196571d6300404cb76314125 +Subproject commit 823aee222a6d923299cf680403d8adcd0c53b1e3 diff --git a/include/nudb/basic_store.hpp b/include/nudb/basic_store.hpp index 0e69195..b66f29c 100644 --- a/include/nudb/basic_store.hpp +++ b/include/nudb/basic_store.hpp @@ -23,24 +23,43 @@ namespace nudb { /** A high performance, insert-only key/value database for SSDs. - To create a database first call the @ref create - free function. Then construct a @ref basic_store and - call @ref open on it: + A database is represented by triplet of three files: the data file, the key file, and the log file. + These files together constitute a database of key/value pairs. Each file has a distinct header in a + well known format. The data file holds all of the key/value pairs and is serially iterable. + The key file holds a hash table indexing all of the contents in the data file. + The log file holds information used to roll the database back in the event of a failure. - @code - error_code ec; - create( - "db.dat", "db.key", "db.log", - 1, make_salt(), 8, 4096, 0.5f, ec); - basic_store db; - db.open("db.dat", "db.key", "db.log", ec); - @endcode + Operations which apply to the database as whole such as create, recover are implemented as free + functions which include the names of the data, key and log files as parameters. + + In order to alter or access of the contents of a database, one creates a data structure of + type "basic_store". Operations on the contents of the database such as insertions, + fetches implemented as member functions of the "basic_store" type. @tparam Hasher The hash function to use. This type must meet the requirements of @b Hasher. @tparam File The type of File object to use. This type must meet the requirements of @b File. + + @par Models + The simplest and most common usage of basic_store class template is the store class + which is defined like this: + + @code + #include + #include + using store = basic_store< xxhasher, native_file >; + @endcode + + @par Example + @code + #include + #include + nudb::store db; + nudb::error_code ec; + db.open("db.dat", "db.key", "db.log", ec); + @endcode */ template class basic_store diff --git a/include/nudb/error.hpp b/include/nudb/error.hpp index f5f8685..e8f12ea 100644 --- a/include/nudb/error.hpp +++ b/include/nudb/error.hpp @@ -228,7 +228,7 @@ enum class error error_category const& nudb_category(); -/** Returns a database error code. +/** Constructs a database error code. This function is used by the implementation to convert @ref error values into @ref error_code objects. diff --git a/include/nudb/posix_file.hpp b/include/nudb/posix_file.hpp index 8d3b8e5..d99e25e 100644 --- a/include/nudb/posix_file.hpp +++ b/include/nudb/posix_file.hpp @@ -35,6 +35,10 @@ namespace nudb { +/** A a posix file. + + This class provides a posix implementation of the @b File concept. +*/ class posix_file { int fd_ = -1; diff --git a/include/nudb/verify.hpp b/include/nudb/verify.hpp index 23b1260..9cb567f 100644 --- a/include/nudb/verify.hpp +++ b/include/nudb/verify.hpp @@ -119,69 +119,61 @@ struct verify_info }; /** Verify consistency of the key and data files. - This function opens the key and data files, and performs the following checks on the contents: - @li Data file header validity - @li Key file header validity - @li Data and key file header agreements - @li Check that each value is contained in a bucket - @li Check that each bucket item reflects a value - @li Ensure no values with duplicate keys - Undefined behavior results when verifying a database that still has a log file. Use @ref recover on such databases first. - This function selects one of two algorithms to use, the normal version, and a faster version that can take advantage of a buffer of sufficient size. Depending on the value of the bufferSize argument, the appropriate algorithm is chosen. - A good value of bufferSize is one that is a large fraction of the key file size. For example, 20% of the size of the key file. Larger is better, with the highest usable value depending on the size of the key file. If presented with a buffer size that is too large to be of extra use, the fast algorithm will simply allocate what it needs. - - @par Template Parameters - @tparam Hasher The hash function to use. This type must meet the requirements of @b HashFunction. The hash function must be the same as that used to create the database, or else an error is returned. - - @param info A structure which will be default constructed - inside this function, and filled in if the operation completes - successfully. If an error is indicated, the contents of this - variable are undefined. - + @param info A reference to a structure of type + @ref verify_info which will be filled in. + Upon return the contents of this structure are defined + if and only if the verify function completes successfully + as indicated by the ec parameter. @param dat_path The path to the data file. - @param key_path The path to the key file. - @param bufferSize The number of bytes to allocate for the buffer. If this number is too small, or zero, a slower algorithm will be used that does not require a buffer. - - @param progress A function which will be called periodically - as the algorithm proceeds. The equivalent signature of the - progress function must be: + @param progress + A function address, function object or lambda + with the following signature: @code void progress( std::uint64_t amount, // Amount of work done so far std::uint64_t total // Total amount of work to do ); @endcode - + This function will be called periodically as the algorithm + proceeds so that the user code can monitor the progress + of the operation and calculate how much time remains to + complete the operation. The library contains a default function object, + [no_progress](@ref no_progress), which matches the above signature + but does nothing. @param ec Set to the error, if any occurred. + + @par Associated Types + @li @ref verify_info - Information returned by verify function + @li @ref no_progress - Function of type Progress which does nothing */ template void diff --git a/include/nudb/win32_file.hpp b/include/nudb/win32_file.hpp index d225ae9..9ca323a 100644 --- a/include/nudb/win32_file.hpp +++ b/include/nudb/win32_file.hpp @@ -48,7 +48,7 @@ namespace nudb { -/** A descriptor to a Win32 file. +/** A Win32 file. This class provides a Win32 implementation of the @b File concept.