diff --git a/.github/workflows/build_ubuntu_lastest.yml b/.github/workflows/build_ubuntu_lastest.yml new file mode 100644 index 0000000..c531b44 --- /dev/null +++ b/.github/workflows/build_ubuntu_lastest.yml @@ -0,0 +1,20 @@ +name: C/C++ CI + +on: + push: + branches: [ master ] + pull_request: + branches: [ master ] + +jobs: + build: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + + - name: build + run: | + ./autogen.sh + ./configure --prefix=$PWD + make && make check diff --git a/ChangeLog b/ChangeLog index 4cbe701..fd53c6f 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Mon Oct 12 21:00:00 2015 Google Inc. + + * sparsehash: version 2.0.3 + * Fix compilation on modern compilers and operating systems + Thu Feb 23 23:47:18 2012 Google Inc. * sparsehash: version 2.0.2 diff --git a/NEWS b/NEWS index 589c709..4af929c 100644 --- a/NEWS +++ b/NEWS @@ -1,4 +1,9 @@ -== 23 Ferbruary 2012 == +== 12 October 2015 == + +Various small fixes to ensure compilation on modern compilers and operating +systems. Tagged as 2.0.3 + +== 23 February 2012 == A backwards incompatibility arose from flattening the include headers structure for the folder. diff --git a/README b/README index 8c806b4..26bb485 100644 --- a/README +++ b/README @@ -106,20 +106,24 @@ SPARSETABLE In addition to the hash-map and hash-set classes, this package also provides sparsetable.h, an array implementation that uses space proportional to the number of elements in the array, rather than the -maximum element index. It uses very little space overhead: 1 bit per -entry. See doc/sparsetable.html for the API. +maximum element index. It uses very little space overhead: 2 to 5 +bits per entry. See doc/sparsetable.html for the API. RESOURCE USAGE -------------- -* sparse_hash_map has memory overhead of about 2 bits per hash-map - entry. +* sparse_hash_map has memory overhead of about 4 to 10 bits per + hash-map entry, assuming a typical average occupancy of 50%. * dense_hash_map has a factor of 2-3 memory overhead: if your hashtable data takes X bytes, dense_hash_map will use 3X-4X memory total. Hashtables tend to double in size when resizing, creating an additional 50% space overhead. dense_hash_map does in fact have a -significant "high water mark" memory use requirement. +significant "high water mark" memory use requirement, which is 6 times +the size of hash entries in the table when resizing (when reaching +50% occupancy, the table resizes to double the previous size, and the +old table (2x) is copied to the new table (4x)). + sparse_hash_map, however, is written to need very little space overhead when resizing: only a few bits per hashtable entry. diff --git a/doc/implementation.html b/doc/implementation.html index 2050d54..31789ac 100644 --- a/doc/implementation.html +++ b/doc/implementation.html @@ -66,7 +66,7 @@

insert()

replaces vector[3] with the new value. If the lookup fails, then the code must insert a new entry into the middle of the vector. Again, to insert at position i, the code must count all the bitmap entries <= i -that are set to i. This indicates the position to insert into the +that are set to 1. This indicates the position to insert into the vector. All vector entries above that position must be moved to make room for the new entry. This takes time, but still constant time since the vector has size at most M.

@@ -131,6 +131,66 @@

Resource use

entry -- but take longer for inserts, deletes, and lookups. A smaller M would use more overhead but make operations somewhat faster.

+The numbers above assume that the allocator used doesn't require extra +memory. The default allocator (using malloc/free) typically has some overhead +for each allocation. If we assume 16 byte overhead per allocation, the +overhead becomes 4.6 bit per array entry (32 bit pointers) or 5.3 bit per +array entry (64 bit pointers) + +

Each sparsegroup has:

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
member32 bit64 bit
pointer4 bytes8 bytes
num_buckets2 bytes2 bytes
bitmap6 bytes6 bytes
total12 bytes = 96 bits16 bytes = 128 bits
because this is the overhead for each sparsegroup (48 entries), we divide by 48
overhead / entry96 / 48 = 2 bits128 / 48 = 2.67 bits
additional overhead per allocation up to 16 bytes = 128 bits
max overhead / entry(96 + 128) / 48 = 4.67 bits(128 + 128) / 48 = 5.33 bits
+

You can also look at some specific performance numbers.

diff --git a/src/hashtable_test.cc b/src/hashtable_test.cc index 193138b..1a12509 100644 --- a/src/hashtable_test.cc +++ b/src/hashtable_test.cc @@ -1908,6 +1908,36 @@ TEST(HashtableTest, NestedHashtables) { dense_hash_map, Hasher, Hasher> ht3copy = ht3; } +TEST(HashtableTest, ResizeWithoutShrink) { + const size_t N = 1000000L; + const size_t max_entries = 40; +#define KEY(i, j) (i * 4 + j) * 28 + 11 + + dense_hash_map ht; + ht.set_empty_key(0); + ht.set_deleted_key(1); + ht.min_load_factor(0); + ht.max_load_factor(0.2); + + for (size_t i = 0; i < N; ++i) { + for (size_t j = 0; j < max_entries; ++j) { + size_t key = KEY(i, j); + ht[key] = 0; + } + for (size_t j = 0; j < max_entries / 2; ++j) { + size_t key = KEY(i, j); + ht.erase(key); + ht[key + 1] = 0; + } + for (size_t j = 0; j < max_entries; ++j) { + size_t key = KEY(i, j); + ht.erase(key); + ht.erase(key + (j < max_entries / 2)); + } + EXPECT_LT(ht.bucket_count(), 4096); + } +} + TEST(HashtableDeathTest, ResizeOverflow) { dense_hash_map ht; EXPECT_DEATH(ht.resize(static_cast(-1)), diff --git a/src/sparsehash/internal/densehashtable.h b/src/sparsehash/internal/densehashtable.h index 1f0c943..cdf4ff6 100644 --- a/src/sparsehash/internal/densehashtable.h +++ b/src/sparsehash/internal/densehashtable.h @@ -588,13 +588,19 @@ class dense_hashtable { // are currently taking up room). But later, when we decide what // size to resize to, *don't* count deleted buckets, since they // get discarded during the resize. - const size_type needed_size = settings.min_buckets(num_elements + delta, 0); + size_type needed_size = settings.min_buckets(num_elements + delta, 0); if ( needed_size <= bucket_count() ) // we have enough buckets return did_resize; size_type resize_to = settings.min_buckets(num_elements - num_deleted + delta, bucket_count()); + // When num_deleted is large, we may still grow but we do not want to + // over expand. So we reduce needed_size by a portion of num_deleted + // (the exact portion does not matter). This is especially helpful + // when min_load_factor is zero (no shrink at all) to avoid doubling + // the bucket count to infinity. See also test ResizeWithoutShrink. + needed_size = settings.min_buckets(num_elements - num_deleted / 4 + delta, 0); if (resize_to < needed_size && // may double resize_to resize_to < (std::numeric_limits::max)() / 2) { // This situation means that we have enough deleted elements, @@ -1195,8 +1201,10 @@ class dense_hashtable { pointer realloc_or_die(pointer ptr, size_type n) { pointer retval = this->reallocate(ptr, n); if (retval == NULL) { - fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate " - "%lu elements for ptr %p", static_cast(n), ptr); + fprintf(stderr, + "sparsehash: FATAL ERROR: failed to reallocate " + "%lu elements for ptr %p", + static_cast(n), static_cast(ptr)); exit(1); } return retval; diff --git a/src/sparsehash/internal/libc_allocator_with_realloc.h b/src/sparsehash/internal/libc_allocator_with_realloc.h index 0c1e03d..769329f 100644 --- a/src/sparsehash/internal/libc_allocator_with_realloc.h +++ b/src/sparsehash/internal/libc_allocator_with_realloc.h @@ -65,7 +65,10 @@ class libc_allocator_with_realloc { free(p); } pointer reallocate(pointer p, size_type n) { - return static_cast(realloc(p, n * sizeof(value_type))); + // p points to a storage array whose objects have already been destroyed + // cast to void* to prevent compiler warnings about calling realloc() on + // an object which cannot be relocated in memory + return static_cast(realloc(static_cast(p), n * sizeof(value_type))); } size_type max_size() const { diff --git a/src/sparsehash/sparsetable b/src/sparsehash/sparsetable index efbeaac..6259ebd 100644 --- a/src/sparsehash/sparsetable +++ b/src/sparsehash/sparsetable @@ -1088,7 +1088,9 @@ class sparsegroup { // This is equivalent to memmove(), but faster on my Intel P4, // at least with gcc4.1 -O2 / glibc 2.3.6. for (size_type i = settings.num_buckets; i > offset; --i) - memcpy(group + i, group + i-1, sizeof(*group)); + // cast to void* to prevent compiler warnings about writing to an object + // with no trivial copy-assignment + memcpy(static_cast(group + i), group + i-1, sizeof(*group)); } // Create space at group[offset], without special assumptions about value_type @@ -1154,7 +1156,10 @@ class sparsegroup { // at lesat with gcc4.1 -O2 / glibc 2.3.6. assert(settings.num_buckets > 0); for (size_type i = offset; i < settings.num_buckets-1; ++i) - memcpy(group + i, group + i+1, sizeof(*group)); // hopefully inlined! + // cast to void* to prevent compiler warnings about writing to an object + // with no trivial copy-assignment + // hopefully inlined! + memcpy(static_cast(group + i), group + i+1, sizeof(*group)); group = settings.realloc_or_die(group, settings.num_buckets-1); } @@ -1591,7 +1596,7 @@ class sparsetable { } // And the reverse transformation. - size_type get_pos(const const_nonempty_iterator it) const { + size_type get_pos(const const_nonempty_iterator& it) const { difference_type current_row = it.row_current - it.row_begin; difference_type current_col = (it.col_current - groups[current_row].nonempty_begin()); diff --git a/src/time_hash_map.cc b/src/time_hash_map.cc index 635c5ab..34d5721 100644 --- a/src/time_hash_map.cc +++ b/src/time_hash_map.cc @@ -331,6 +331,8 @@ class Rusage { }; inline void Rusage::Reset() { + g_num_copies = 0; + g_num_hashes = 0; #if defined HAVE_SYS_RESOURCE_H getrusage(RUSAGE_SELF, &start); #elif defined HAVE_WINDOWS_H @@ -721,7 +723,7 @@ int main(int argc, char** argv) { if (FLAGS_test_4_bytes) test_all_maps< HashObject<4,4> >(4, iters/1); if (FLAGS_test_8_bytes) test_all_maps< HashObject<8,8> >(8, iters/2); if (FLAGS_test_16_bytes) test_all_maps< HashObject<16,16> >(16, iters/4); - if (FLAGS_test_256_bytes) test_all_maps< HashObject<256,256> >(256, iters/32); + if (FLAGS_test_256_bytes) test_all_maps< HashObject<256,32> >(256, iters/32); return 0; } diff --git a/src/windows/config.h b/src/windows/config.h index 2040758..8d16d43 100644 --- a/src/windows/config.h +++ b/src/windows/config.h @@ -6,27 +6,54 @@ /* Namespace for Google classes */ #define GOOGLE_NAMESPACE ::google +#if (_MSC_VER >= 1800 ) + +/* the location of the header defining hash functions */ +#define HASH_FUN_H + +/* the location of or */ +#define HASH_MAP_H + +/* the location of or */ +#define HASH_SET_H + +/* define if the compiler has hash_map */ +#define HAVE_HASH_MAP 0 + +/* define if the compiler has hash_set */ +#define HAVE_HASH_SET 0 + +/* define if the compiler supports unordered_{map,set} */ +#define HAVE_UNORDERED_MAP 1 + +#else /* Earlier than VSC++ 2013 */ + /* the location of the header defining hash functions */ #define HASH_FUN_H /* the location of or */ #define HASH_MAP_H -/* the namespace of the hash<> function */ -#define HASH_NAMESPACE stdext - /* the location of or */ #define HASH_SET_H -/* Define to 1 if you have the header file. */ -#undef HAVE_GOOGLE_MALLOC_EXTENSION_H - /* define if the compiler has hash_map */ #define HAVE_HASH_MAP 1 /* define if the compiler has hash_set */ #define HAVE_HASH_SET 1 +/* define if the compiler supports unordered_{map,set} */ +#undef HAVE_UNORDERED_MAP + +#endif + +/* the namespace of the hash<> function */ +#define HASH_NAMESPACE stdext + +/* Define to 1 if you have the header file. */ +#undef HAVE_GOOGLE_MALLOC_EXTENSION_H + /* Define to 1 if you have the header file. */ #undef HAVE_INTTYPES_H @@ -81,9 +108,6 @@ /* Define to 1 if you have the header file. */ #undef HAVE_UNISTD_H -/* define if the compiler supports unordered_{map,set} */ -#undef HAVE_UNORDERED_MAP - /* Define to 1 if the system has the type `u_int16_t'. */ #undef HAVE_U_INT16_T diff --git a/src/windows/sparsehash/internal/sparseconfig.h b/src/windows/sparsehash/internal/sparseconfig.h index 3091559..a962430 100644 --- a/src/windows/sparsehash/internal/sparseconfig.h +++ b/src/windows/sparsehash/internal/sparseconfig.h @@ -6,8 +6,17 @@ /* Namespace for Google classes */ #define GOOGLE_NAMESPACE ::google +#if (_MSC_VER >= 1800 ) + +/* the location of the header defining hash functions */ +#define HASH_FUN_H + +#else /* Earlier than VSC++ 2013 */ + /* the location of the header defining hash functions */ #define HASH_FUN_H + +#endif /* the namespace of the hash<> function */ #define HASH_NAMESPACE stdext