diff --git a/src/libutil/ustring.cpp b/src/libutil/ustring.cpp index 3da39ccf65..043ce43dfb 100644 --- a/src/libutil/ustring.cpp +++ b/src/libutil/ustring.cpp @@ -522,7 +522,7 @@ ustring::make_unique(string_view strref) size_t bin = rm.lock_bin(hash); hash_t orighash = hash; - size_t binmask = orighash & (~rm.nobin_mask()); + size_t binbits = orighash & (~rm.nobin_mask()); size_t num_rehashes = 0; while (1) { @@ -546,9 +546,23 @@ ustring::make_unique(string_view strref) break; } // Rehash, but keep the bin bits identical so we always rehash into - // the same (locked) bin. - hash = (hash & binmask) - | (farmhash::Fingerprint(hash) & rm.nobin_mask()); + // the same (locked) bin. But watch out for rehashing that returns the + // identical non-bin part as before -- that will enter an infinite + // loop if we're not careful! + hash_t old_nonbin_bits = hash & rm.nobin_mask(); + hash_t new_nonbin_bits = farmhash::Fingerprint(hash) & rm.nobin_mask(); + if (OIIO_UNLIKELY(old_nonbin_bits == new_nonbin_bits)) { + new_nonbin_bits = (new_nonbin_bits + 7) & rm.nobin_mask(); +# ifndef NDEBUG + std::string s = Strutil::escape_chars(strref); + print(stderr, "IDEMPOTENT RE-HASH! |{}|\n", s); + for (auto c : s) + print(stderr, c > 0 ? "{:c}" : "\\{:03o}", + static_cast(c)); + print(stderr, "\n"); +# endif + } + hash = binbits | new_nonbin_bits; ++num_rehashes; // Strutil::print("COLLISION \"{}\" {:08x} vs \"{}\"\n", // strref, orighash, rev->second); @@ -556,6 +570,7 @@ ustring::make_unique(string_view strref) std::lock_guard lock(collision_mutex); all_hash_collisions.emplace_back(rev->second, rev->first); } + OIIO_ASSERT(num_rehashes < 100000); // Something is very wrong } rm.unlock_bin(bin); diff --git a/src/libutil/ustring_test.cpp b/src/libutil/ustring_test.cpp index 3b4ff692e4..61b0aad167 100644 --- a/src/libutil/ustring_test.cpp +++ b/src/libutil/ustring_test.cpp @@ -167,6 +167,14 @@ test_ustring() OIIO_CHECK_EQUAL(whichtype, ustring("foo")); OIIO_CHECK_ASSERT((std::is_same::value)); OIIO_CHECK_ASSERT(!(std::is_same::value)); + + // Regression test: This specific string hashes to 0! This once caused a + // lot of trouble, because it couldn't rehash properly (every rehash also + // ended up at 0). When broken, this made an infinite loop inside + // ustring::make_unique(). + const char* hash0 + = "\t\n\n\t\301\350`O\217c85?\202\200\251r|}\304\351\2517\337K'\217C\240"; + OIIO_CHECK_EQUAL(ustring(hash0).hash(), 13226272983328805811ULL); }