From acf5709d40bd24ed6049db61c252d1d4835e4f62 Mon Sep 17 00:00:00 2001
From: Donovan Hide
Date: Mon, 12 Oct 2015 22:08:43 +0100
Subject: [PATCH 01/15] Update NEWS
---
NEWS | 7 ++++++-
1 file changed, 6 insertions(+), 1 deletion(-)
diff --git a/NEWS b/NEWS
index 589c709..4af929c 100644
--- a/NEWS
+++ b/NEWS
@@ -1,4 +1,9 @@
-== 23 Ferbruary 2012 ==
+== 12 October 2015 ==
+
+Various small fixes to ensure compilation on modern compilers and operating
+systems. Tagged as 2.0.3
+
+== 23 February 2012 ==
A backwards incompatibility arose from flattening the include headers
structure for the folder.
From 2589511ed6c23997d4ebad759a0786ae7df39333 Mon Sep 17 00:00:00 2001
From: Donovan Hide
Date: Mon, 12 Oct 2015 22:10:48 +0100
Subject: [PATCH 02/15] Update ChangeLog
---
ChangeLog | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/ChangeLog b/ChangeLog
index 4cbe701..b37bb6d 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,8 @@
+Mon Oct 12 21:00:00 2012 Google Inc.
+
+ * sparsehash: version 2.0.3
+ * Fix compilation on modern compilers and operating systems
+
Thu Feb 23 23:47:18 2012 Google Inc.
* sparsehash: version 2.0.2
From 6af4bcf4ded7cf270ebb190470b8854c6531fa14 Mon Sep 17 00:00:00 2001
From: Donovan Hide
Date: Mon, 12 Oct 2015 22:13:52 +0100
Subject: [PATCH 03/15] Update ChangeLog
---
ChangeLog | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/ChangeLog b/ChangeLog
index b37bb6d..fd53c6f 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,4 +1,4 @@
-Mon Oct 12 21:00:00 2012 Google Inc.
+Mon Oct 12 21:00:00 2015 Google Inc.
* sparsehash: version 2.0.3
* Fix compilation on modern compilers and operating systems
From 6c4151b553e5e0183e55d5f72fe747e5e86e7800 Mon Sep 17 00:00:00 2001
From: mengweichao
Date: Wed, 16 Dec 2015 19:25:53 +0800
Subject: [PATCH 04/15] amend spelling mistakes for insert() method
---
doc/implementation.html | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/doc/implementation.html b/doc/implementation.html
index 2050d54..dce49d3 100644
--- a/doc/implementation.html
+++ b/doc/implementation.html
@@ -66,7 +66,7 @@ insert()
replaces vector[3] with the new value. If the lookup fails, then the
code must insert a new entry into the middle of the vector. Again, to
insert at position i, the code must count all the bitmap entries <= i
-that are set to i. This indicates the position to insert into the
+that are set to 1. This indicates the position to insert into the
vector. All vector entries above that position must be moved to make
room for the new entry. This takes time, but still constant time
since the vector has size at most M.
From 7b8afad35bd601ea780b0f72e215de96613d3c95 Mon Sep 17 00:00:00 2001
From: Philip Deegan
Date: Mon, 21 Dec 2015 23:54:25 +0000
Subject: [PATCH 05/15] Use unordered_map instead of hash_map for Visual Studio
2013 and later
---
src/windows/config.h | 42 +++++++++++++++----
.../sparsehash/internal/sparseconfig.h | 9 ++++
2 files changed, 42 insertions(+), 9 deletions(-)
diff --git a/src/windows/config.h b/src/windows/config.h
index 2040758..8d16d43 100644
--- a/src/windows/config.h
+++ b/src/windows/config.h
@@ -6,27 +6,54 @@
/* Namespace for Google classes */
#define GOOGLE_NAMESPACE ::google
+#if (_MSC_VER >= 1800 )
+
+/* the location of the header defining hash functions */
+#define HASH_FUN_H
+
+/* the location of or */
+#define HASH_MAP_H
+
+/* the location of or */
+#define HASH_SET_H
+
+/* define if the compiler has hash_map */
+#define HAVE_HASH_MAP 0
+
+/* define if the compiler has hash_set */
+#define HAVE_HASH_SET 0
+
+/* define if the compiler supports unordered_{map,set} */
+#define HAVE_UNORDERED_MAP 1
+
+#else /* Earlier than VSC++ 2013 */
+
/* the location of the header defining hash functions */
#define HASH_FUN_H
/* the location of or */
#define HASH_MAP_H
-/* the namespace of the hash<> function */
-#define HASH_NAMESPACE stdext
-
/* the location of or */
#define HASH_SET_H
-/* Define to 1 if you have the header file. */
-#undef HAVE_GOOGLE_MALLOC_EXTENSION_H
-
/* define if the compiler has hash_map */
#define HAVE_HASH_MAP 1
/* define if the compiler has hash_set */
#define HAVE_HASH_SET 1
+/* define if the compiler supports unordered_{map,set} */
+#undef HAVE_UNORDERED_MAP
+
+#endif
+
+/* the namespace of the hash<> function */
+#define HASH_NAMESPACE stdext
+
+/* Define to 1 if you have the header file. */
+#undef HAVE_GOOGLE_MALLOC_EXTENSION_H
+
/* Define to 1 if you have the header file. */
#undef HAVE_INTTYPES_H
@@ -81,9 +108,6 @@
/* Define to 1 if you have the header file. */
#undef HAVE_UNISTD_H
-/* define if the compiler supports unordered_{map,set} */
-#undef HAVE_UNORDERED_MAP
-
/* Define to 1 if the system has the type `u_int16_t'. */
#undef HAVE_U_INT16_T
diff --git a/src/windows/sparsehash/internal/sparseconfig.h b/src/windows/sparsehash/internal/sparseconfig.h
index 3091559..a962430 100644
--- a/src/windows/sparsehash/internal/sparseconfig.h
+++ b/src/windows/sparsehash/internal/sparseconfig.h
@@ -6,8 +6,17 @@
/* Namespace for Google classes */
#define GOOGLE_NAMESPACE ::google
+#if (_MSC_VER >= 1800 )
+
+/* the location of the header defining hash functions */
+#define HASH_FUN_H
+
+#else /* Earlier than VSC++ 2013 */
+
/* the location of the header defining hash functions */
#define HASH_FUN_H
+
+#endif
/* the namespace of the hash<> function */
#define HASH_NAMESPACE stdext
From 2d27620772a303356a3534d93f67b1105b481047 Mon Sep 17 00:00:00 2001
From: Ling Li
Date: Sat, 16 Jan 2016 05:26:23 -0800
Subject: [PATCH 06/15] Fix the bug of endless bucket doubling when
min_load_factor=0.
---
src/sparsehash/internal/densehashtable.h | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/src/sparsehash/internal/densehashtable.h b/src/sparsehash/internal/densehashtable.h
index 1f0c943..12ff98d 100644
--- a/src/sparsehash/internal/densehashtable.h
+++ b/src/sparsehash/internal/densehashtable.h
@@ -588,13 +588,14 @@ class dense_hashtable {
// are currently taking up room). But later, when we decide what
// size to resize to, *don't* count deleted buckets, since they
// get discarded during the resize.
- const size_type needed_size = settings.min_buckets(num_elements + delta, 0);
+ size_type needed_size = settings.min_buckets(num_elements + delta, 0);
if ( needed_size <= bucket_count() ) // we have enough buckets
return did_resize;
size_type resize_to =
settings.min_buckets(num_elements - num_deleted + delta, bucket_count());
+ needed_size = settings.min_buckets(num_elements - num_deleted / 4 + delta, 0);
if (resize_to < needed_size && // may double resize_to
resize_to < (std::numeric_limits::max)() / 2) {
// This situation means that we have enough deleted elements,
From 3151e1195bdbdb57beb78087c5634f043c34746f Mon Sep 17 00:00:00 2001
From: Ling Li
Date: Sat, 16 Jan 2016 07:57:07 -0800
Subject: [PATCH 07/15] Add test ResizeWithoutShrink and in-code comments.
---
src/hashtable_test.cc | 30 ++++++++++++++++++++++++
src/sparsehash/internal/densehashtable.h | 5 ++++
2 files changed, 35 insertions(+)
diff --git a/src/hashtable_test.cc b/src/hashtable_test.cc
index 193138b..1a12509 100644
--- a/src/hashtable_test.cc
+++ b/src/hashtable_test.cc
@@ -1908,6 +1908,36 @@ TEST(HashtableTest, NestedHashtables) {
dense_hash_map, Hasher, Hasher> ht3copy = ht3;
}
+TEST(HashtableTest, ResizeWithoutShrink) {
+ const size_t N = 1000000L;
+ const size_t max_entries = 40;
+#define KEY(i, j) (i * 4 + j) * 28 + 11
+
+ dense_hash_map ht;
+ ht.set_empty_key(0);
+ ht.set_deleted_key(1);
+ ht.min_load_factor(0);
+ ht.max_load_factor(0.2);
+
+ for (size_t i = 0; i < N; ++i) {
+ for (size_t j = 0; j < max_entries; ++j) {
+ size_t key = KEY(i, j);
+ ht[key] = 0;
+ }
+ for (size_t j = 0; j < max_entries / 2; ++j) {
+ size_t key = KEY(i, j);
+ ht.erase(key);
+ ht[key + 1] = 0;
+ }
+ for (size_t j = 0; j < max_entries; ++j) {
+ size_t key = KEY(i, j);
+ ht.erase(key);
+ ht.erase(key + (j < max_entries / 2));
+ }
+ EXPECT_LT(ht.bucket_count(), 4096);
+ }
+}
+
TEST(HashtableDeathTest, ResizeOverflow) {
dense_hash_map ht;
EXPECT_DEATH(ht.resize(static_cast(-1)),
diff --git a/src/sparsehash/internal/densehashtable.h b/src/sparsehash/internal/densehashtable.h
index 12ff98d..97eb245 100644
--- a/src/sparsehash/internal/densehashtable.h
+++ b/src/sparsehash/internal/densehashtable.h
@@ -595,6 +595,11 @@ class dense_hashtable {
size_type resize_to =
settings.min_buckets(num_elements - num_deleted + delta, bucket_count());
+ // When num_deleted is large, we may still grow but we do not want to
+ // over expand. So we reduce needed_size by a portion of num_deleted
+ // (the exact portion does not matter). This is especially helpful
+ // when min_load_factor is zero (no shrink at all) to avoid doubling
+ // the bucket count to infinity. See also test ResizeWithoutShrink.
needed_size = settings.min_buckets(num_elements - num_deleted / 4 + delta, 0);
if (resize_to < needed_size && // may double resize_to
resize_to < (std::numeric_limits::max)() / 2) {
From 4a36398d442cc86f47d2d7e751596cc2430ad135 Mon Sep 17 00:00:00 2001
From: Dekken
Date: Mon, 14 Mar 2016 12:19:34 +0000
Subject: [PATCH 08/15] Pass by const ref not copy
https://github.com/sparsehash/sparsehash/issues/88
---
src/sparsehash/sparsetable | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/sparsehash/sparsetable b/src/sparsehash/sparsetable
index efbeaac..3ddccf8 100644
--- a/src/sparsehash/sparsetable
+++ b/src/sparsehash/sparsetable
@@ -1591,7 +1591,7 @@ class sparsetable {
}
// And the reverse transformation.
- size_type get_pos(const const_nonempty_iterator it) const {
+ size_type get_pos(const const_nonempty_iterator& it) const {
difference_type current_row = it.row_current - it.row_begin;
difference_type current_col = (it.col_current -
groups[current_row].nonempty_begin());
From 67cdd69a002e2d7059a484c2cc25add6c329d6d6 Mon Sep 17 00:00:00 2001
From: Dekken
Date: Tue, 12 Jul 2016 21:49:59 +0100
Subject: [PATCH 09/15] -Wformat-pedantic casts to quite compiler warning
---
src/sparsehash/internal/densehashtable.h | 6 ++++--
1 file changed, 4 insertions(+), 2 deletions(-)
diff --git a/src/sparsehash/internal/densehashtable.h b/src/sparsehash/internal/densehashtable.h
index 97eb245..cdf4ff6 100644
--- a/src/sparsehash/internal/densehashtable.h
+++ b/src/sparsehash/internal/densehashtable.h
@@ -1201,8 +1201,10 @@ class dense_hashtable {
pointer realloc_or_die(pointer ptr, size_type n) {
pointer retval = this->reallocate(ptr, n);
if (retval == NULL) {
- fprintf(stderr, "sparsehash: FATAL ERROR: failed to reallocate "
- "%lu elements for ptr %p", static_cast(n), ptr);
+ fprintf(stderr,
+ "sparsehash: FATAL ERROR: failed to reallocate "
+ "%lu elements for ptr %p",
+ static_cast(n), static_cast(ptr));
exit(1);
}
return retval;
From d6684b241c915a2764caa20270e48d9c5f25eb1d Mon Sep 17 00:00:00 2001
From: greg7mdp
Date: Sat, 23 Jul 2016 17:31:40 -0400
Subject: [PATCH 10/15] Fix missing initialization of g_num_copies and
g_num_hashes
When running the benchmark, the numbers of copies and hashes reported is
constantly increasing, as the values were not reset when starting the
test. This commit fixes this, and does not change the benchmark itself
or reported times.
---
src/time_hash_map.cc | 2 ++
1 file changed, 2 insertions(+)
diff --git a/src/time_hash_map.cc b/src/time_hash_map.cc
index 635c5ab..1f6e0f3 100644
--- a/src/time_hash_map.cc
+++ b/src/time_hash_map.cc
@@ -331,6 +331,8 @@ class Rusage {
};
inline void Rusage::Reset() {
+ g_num_copies = 0;
+ g_num_hashes = 0;
#if defined HAVE_SYS_RESOURCE_H
getrusage(RUSAGE_SELF, &start);
#elif defined HAVE_WINDOWS_H
From 90e60f03d03323ee225009c10c3fbb8864e23282 Mon Sep 17 00:00:00 2001
From: greg7mdp
Date: Sat, 23 Jul 2016 18:43:53 -0400
Subject: [PATCH 11/15] Update test for large objects with a more reasonable
hash function.
The HashObject uses the following code in the Hash() function:
```
size_t Hash() const {
g_num_hashes++;
int hashval = i_;
for (size_t i = 0; i < Hashsize - sizeof(i_); ++i) {
hashval += buffer_[i];
}
return SPARSEHASH_HASH()(hashval);
}```
As you can see, when Hashsize is specified as 256, the Hash() function
will iterate and sum 252 bytes.
This is a big contributor to the time spent in sparse, dense and
unordered_map tests, and I feel is unfair when comparing with the
std::map, because the HashObject comparison function compares only the
"i_" member:
```
bool operator==(const class_type& that) const { return this->i_ ==
that.i_; }
bool operator< (const class_type& that) const { return this->i_ <
that.i_; }
bool operator<=(const class_type& that) const { return this->i_ <=
that.i_; }```
---
src/time_hash_map.cc | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/src/time_hash_map.cc b/src/time_hash_map.cc
index 1f6e0f3..34d5721 100644
--- a/src/time_hash_map.cc
+++ b/src/time_hash_map.cc
@@ -723,7 +723,7 @@ int main(int argc, char** argv) {
if (FLAGS_test_4_bytes) test_all_maps< HashObject<4,4> >(4, iters/1);
if (FLAGS_test_8_bytes) test_all_maps< HashObject<8,8> >(8, iters/2);
if (FLAGS_test_16_bytes) test_all_maps< HashObject<16,16> >(16, iters/4);
- if (FLAGS_test_256_bytes) test_all_maps< HashObject<256,256> >(256, iters/32);
+ if (FLAGS_test_256_bytes) test_all_maps< HashObject<256,32> >(256, iters/32);
return 0;
}
From 4cb924025b8c622d1a1e11f4c1e9db15410c75fb Mon Sep 17 00:00:00 2001
From: greg7mdp
Date: Tue, 26 Jul 2016 09:22:51 -0400
Subject: [PATCH 12/15] Correct the memory usage claims to take into account
allocator overhead (#132)
The default memory allocator used (libc_allocator_with_realloc)
necessarily has some overhead, as the size of the block is not passed to
free(). The memory usage claims are updated to take into account an
overhead of up to 16 bytes per malloc'ed block.
---
README | 14 ++++++----
doc/implementation.html | 60 +++++++++++++++++++++++++++++++++++++++++
2 files changed, 69 insertions(+), 5 deletions(-)
diff --git a/README b/README
index 8c806b4..26bb485 100644
--- a/README
+++ b/README
@@ -106,20 +106,24 @@ SPARSETABLE
In addition to the hash-map and hash-set classes, this package also
provides sparsetable.h, an array implementation that uses space
proportional to the number of elements in the array, rather than the
-maximum element index. It uses very little space overhead: 1 bit per
-entry. See doc/sparsetable.html for the API.
+maximum element index. It uses very little space overhead: 2 to 5
+bits per entry. See doc/sparsetable.html for the API.
RESOURCE USAGE
--------------
-* sparse_hash_map has memory overhead of about 2 bits per hash-map
- entry.
+* sparse_hash_map has memory overhead of about 4 to 10 bits per
+ hash-map entry, assuming a typical average occupancy of 50%.
* dense_hash_map has a factor of 2-3 memory overhead: if your
hashtable data takes X bytes, dense_hash_map will use 3X-4X memory
total.
Hashtables tend to double in size when resizing, creating an
additional 50% space overhead. dense_hash_map does in fact have a
-significant "high water mark" memory use requirement.
+significant "high water mark" memory use requirement, which is 6 times
+the size of hash entries in the table when resizing (when reaching
+50% occupancy, the table resizes to double the previous size, and the
+old table (2x) is copied to the new table (4x)).
+
sparse_hash_map, however, is written to need very little space
overhead when resizing: only a few bits per hashtable entry.
diff --git a/doc/implementation.html b/doc/implementation.html
index dce49d3..31789ac 100644
--- a/doc/implementation.html
+++ b/doc/implementation.html
@@ -131,6 +131,66 @@ Resource use
entry -- but take longer for inserts, deletes, and lookups. A smaller
M would use more overhead but make operations somewhat faster.
+The numbers above assume that the allocator used doesn't require extra
+memory. The default allocator (using malloc/free) typically has some overhead
+for each allocation. If we assume 16 byte overhead per allocation, the
+overhead becomes 4.6 bit per array entry (32 bit pointers) or 5.3 bit per
+array entry (64 bit pointers)
+
+Each sparsegroup has:
+
+
+
+
+| member |
+32 bit |
+64 bit |
+
+
+
+
+| pointer |
+4 bytes |
+8 bytes |
+
+
+| num_buckets |
+2 bytes |
+2 bytes |
+
+
+| bitmap |
+6 bytes |
+6 bytes |
+
+
+| total |
+12 bytes = 96 bits |
+16 bytes = 128 bits |
+
+
+| because this is the overhead for each sparsegroup (48 entries), we divide by 48 |
+ |
+ |
+
+
+| overhead / entry |
+96 / 48 = 2 bits |
+128 / 48 = 2.67 bits |
+
+
+| additional overhead per allocation up to 16 bytes = 128 bits |
+ |
+ |
+
+
+| max overhead / entry |
+(96 + 128) / 48 = 4.67 bits |
+(128 + 128) / 48 = 5.33 bits |
+
+
+
+
You can also look at some specific performance numbers.
From a320767ac61d1668aa48be8ee3e973056b11a816 Mon Sep 17 00:00:00 2001
From: Si Wei How
Date: Mon, 3 Jun 2019 10:44:43 +0800
Subject: [PATCH 13/15] Prevent compiler warning about calling realloc() on an
object which cannot be relocated in memory
---
src/sparsehash/internal/libc_allocator_with_realloc.h | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)
diff --git a/src/sparsehash/internal/libc_allocator_with_realloc.h b/src/sparsehash/internal/libc_allocator_with_realloc.h
index 0c1e03d..769329f 100644
--- a/src/sparsehash/internal/libc_allocator_with_realloc.h
+++ b/src/sparsehash/internal/libc_allocator_with_realloc.h
@@ -65,7 +65,10 @@ class libc_allocator_with_realloc {
free(p);
}
pointer reallocate(pointer p, size_type n) {
- return static_cast(realloc(p, n * sizeof(value_type)));
+ // p points to a storage array whose objects have already been destroyed
+ // cast to void* to prevent compiler warnings about calling realloc() on
+ // an object which cannot be relocated in memory
+ return static_cast(realloc(static_cast(p), n * sizeof(value_type)));
}
size_type max_size() const {
From 95e5e934bc8154349fb70b18979bca69fd3121f2 Mon Sep 17 00:00:00 2001
From: How Si Wei
Date: Thu, 27 Jun 2019 12:53:16 +0800
Subject: [PATCH 14/15] Prevent compiler warning about writing to an object
with no trivial copy-assignment
---
src/sparsehash/sparsetable | 9 +++++++--
1 file changed, 7 insertions(+), 2 deletions(-)
diff --git a/src/sparsehash/sparsetable b/src/sparsehash/sparsetable
index 3ddccf8..6259ebd 100644
--- a/src/sparsehash/sparsetable
+++ b/src/sparsehash/sparsetable
@@ -1088,7 +1088,9 @@ class sparsegroup {
// This is equivalent to memmove(), but faster on my Intel P4,
// at least with gcc4.1 -O2 / glibc 2.3.6.
for (size_type i = settings.num_buckets; i > offset; --i)
- memcpy(group + i, group + i-1, sizeof(*group));
+ // cast to void* to prevent compiler warnings about writing to an object
+ // with no trivial copy-assignment
+ memcpy(static_cast(group + i), group + i-1, sizeof(*group));
}
// Create space at group[offset], without special assumptions about value_type
@@ -1154,7 +1156,10 @@ class sparsegroup {
// at lesat with gcc4.1 -O2 / glibc 2.3.6.
assert(settings.num_buckets > 0);
for (size_type i = offset; i < settings.num_buckets-1; ++i)
- memcpy(group + i, group + i+1, sizeof(*group)); // hopefully inlined!
+ // cast to void* to prevent compiler warnings about writing to an object
+ // with no trivial copy-assignment
+ // hopefully inlined!
+ memcpy(static_cast(group + i), group + i+1, sizeof(*group));
group = settings.realloc_or_die(group, settings.num_buckets-1);
}
From 9534e581624d3dc0df7470cf3a28a17478a6020a Mon Sep 17 00:00:00 2001
From: Dekken
Date: Wed, 12 Aug 2020 13:41:24 +0200
Subject: [PATCH 15/15] Create build_ubuntu_lastest.yml
---
.github/workflows/build_ubuntu_lastest.yml | 20 ++++++++++++++++++++
1 file changed, 20 insertions(+)
create mode 100644 .github/workflows/build_ubuntu_lastest.yml
diff --git a/.github/workflows/build_ubuntu_lastest.yml b/.github/workflows/build_ubuntu_lastest.yml
new file mode 100644
index 0000000..c531b44
--- /dev/null
+++ b/.github/workflows/build_ubuntu_lastest.yml
@@ -0,0 +1,20 @@
+name: C/C++ CI
+
+on:
+ push:
+ branches: [ master ]
+ pull_request:
+ branches: [ master ]
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+
+ steps:
+ - uses: actions/checkout@v2
+
+ - name: build
+ run: |
+ ./autogen.sh
+ ./configure --prefix=$PWD
+ make && make check