//===- TrieRawHashMap.h -----------------------------------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// #ifndef LLVM_ADT_TRIERAWHASHMAP_H #define LLVM_ADT_TRIERAWHASHMAP_H #include "llvm/ADT/ArrayRef.h" #include "llvm/Support/Compiler.h" #include #include namespace llvm { class raw_ostream; /// TrieRawHashMap - is a lock-free thread-safe trie that is can be used to /// store/index data based on a hash value. It can be customized to work with /// any hash algorithm or store any data. /// /// Data structure: /// Data node stored in the Trie contains both hash and data: /// struct { /// HashT Hash; /// DataT Data; /// }; /// /// Data is stored/indexed via a prefix tree, where each node in the tree can be /// either the root, a sub-trie or a data node. Assuming a 4-bit hash and two /// data objects {0001, A} and {0100, B}, it can be stored in a trie /// (assuming Root has 2 bits, SubTrie has 1 bit): /// +--------+ /// |Root[00]| -> {0001, A} /// | [01]| -> {0100, B} /// | [10]| (empty) /// | [11]| (empty) /// +--------+ /// /// Inserting a new object {0010, C} will result in: /// +--------+ +----------+ /// |Root[00]| -> |SubTrie[0]| -> {0001, A} /// | | | [1]| -> {0010, C} /// | | +----------+ /// | [01]| -> {0100, B} /// | [10]| (empty) /// | [11]| (empty) /// +--------+ /// Note object A is sunk down to a sub-trie during the insertion. All the /// nodes are inserted through compare-exchange to ensure thread-safe and /// lock-free. /// /// To find an object in the trie, walk the tree with prefix of the hash until /// the data node is found. Then the hash is compared with the hash stored in /// the data node to see if the is the same object. /// /// Hash collision is not allowed so it is recommended to use trie with a /// "strong" hashing algorithm. A well-distributed hash can also result in /// better performance and memory usage. /// /// It currently does not support iteration and deletion. /// Base class for a lock-free thread-safe hash-mapped trie. class ThreadSafeTrieRawHashMapBase { public: static constexpr size_t TrieContentBaseSize = 4; static constexpr size_t DefaultNumRootBits = 6; static constexpr size_t DefaultNumSubtrieBits = 4; private: template struct AllocValueType { char Base[TrieContentBaseSize]; alignas(T) char Content[sizeof(T)]; }; protected: template static constexpr size_t DefaultContentAllocSize = sizeof(AllocValueType); template static constexpr size_t DefaultContentAllocAlign = alignof(AllocValueType); template static constexpr size_t DefaultContentOffset = offsetof(AllocValueType, Content); public: static void *operator new(size_t Size) { return ::operator new(Size); } void operator delete(void *Ptr) { ::operator delete(Ptr); } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) LLVM_DUMP_METHOD void dump() const; #endif LLVM_ABI void print(raw_ostream &OS) const; protected: /// Result of a lookup. Suitable for an insertion hint. Maybe could be /// expanded into an iterator of sorts, but likely not useful (visiting /// everything in the trie should probably be done some way other than /// through an iterator pattern). class PointerBase { protected: void *get() const { return I == -2u ? P : nullptr; } public: PointerBase() noexcept = default; private: friend class ThreadSafeTrieRawHashMapBase; explicit PointerBase(void *Content) : P(Content), I(-2u) {} PointerBase(void *P, unsigned I, unsigned B) : P(P), I(I), B(B) {} bool isHint() const { return I != -1u && I != -2u; } void *P = nullptr; unsigned I = -1u; unsigned B = 0; }; /// Find the stored content with hash. LLVM_ABI PointerBase find(ArrayRef Hash) const; /// Insert and return the stored content. LLVM_ABI PointerBase insert(PointerBase Hint, ArrayRef Hash, function_ref Hash)> Constructor); ThreadSafeTrieRawHashMapBase() = delete; LLVM_ABI ThreadSafeTrieRawHashMapBase( size_t ContentAllocSize, size_t ContentAllocAlign, size_t ContentOffset, std::optional NumRootBits = std::nullopt, std::optional NumSubtrieBits = std::nullopt); /// Destructor, which asserts if there's anything to do. Subclasses should /// call \a destroyImpl(). /// /// \pre \a destroyImpl() was already called. LLVM_ABI ~ThreadSafeTrieRawHashMapBase(); LLVM_ABI void destroyImpl(function_ref Destructor); LLVM_ABI ThreadSafeTrieRawHashMapBase(ThreadSafeTrieRawHashMapBase &&RHS); // Move assignment is not supported as it is not thread-safe. ThreadSafeTrieRawHashMapBase & operator=(ThreadSafeTrieRawHashMapBase &&RHS) = delete; // No copy. ThreadSafeTrieRawHashMapBase(const ThreadSafeTrieRawHashMapBase &) = delete; ThreadSafeTrieRawHashMapBase & operator=(const ThreadSafeTrieRawHashMapBase &) = delete; // Debug functions. Implementation details and not guaranteed to be // thread-safe. LLVM_ABI PointerBase getRoot() const; LLVM_ABI unsigned getStartBit(PointerBase P) const; LLVM_ABI unsigned getNumBits(PointerBase P) const; LLVM_ABI unsigned getNumSlotUsed(PointerBase P) const; LLVM_ABI std::string getTriePrefixAsString(PointerBase P) const; LLVM_ABI unsigned getNumTries() const; // Visit next trie in the allocation chain. LLVM_ABI PointerBase getNextTrie(PointerBase P) const; private: friend class TrieRawHashMapTestHelper; const unsigned short ContentAllocSize; const unsigned short ContentAllocAlign; const unsigned short ContentOffset; unsigned short NumRootBits; unsigned short NumSubtrieBits; class ImplType; // ImplPtr is owned by ThreadSafeTrieRawHashMapBase and needs to be freed in // destroyImpl. std::atomic ImplPtr; ImplType &getOrCreateImpl(); ImplType *getImpl() const; }; /// Lock-free thread-safe hash-mapped trie. template class ThreadSafeTrieRawHashMap : public ThreadSafeTrieRawHashMapBase { public: using HashT = std::array; class LazyValueConstructor; struct value_type { const HashT Hash; T Data; value_type(value_type &&) = default; value_type(const value_type &) = default; value_type(ArrayRef Hash, const T &Data) : Hash(makeHash(Hash)), Data(Data) {} value_type(ArrayRef Hash, T &&Data) : Hash(makeHash(Hash)), Data(std::move(Data)) {} private: friend class LazyValueConstructor; struct EmplaceTag {}; template value_type(ArrayRef Hash, EmplaceTag, ArgsT &&...Args) : Hash(makeHash(Hash)), Data(std::forward(Args)...) {} static HashT makeHash(ArrayRef HashRef) { HashT Hash; std::copy(HashRef.begin(), HashRef.end(), Hash.data()); return Hash; } }; using ThreadSafeTrieRawHashMapBase::operator delete; using HashType = HashT; #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) using ThreadSafeTrieRawHashMapBase::dump; #endif using ThreadSafeTrieRawHashMapBase::print; private: template class PointerImpl : PointerBase { friend class ThreadSafeTrieRawHashMap; ValueT *get() const { return reinterpret_cast(PointerBase::get()); } public: ValueT &operator*() const { assert(get()); return *get(); } ValueT *operator->() const { assert(get()); return get(); } explicit operator bool() const { return get(); } PointerImpl() = default; protected: PointerImpl(PointerBase Result) : PointerBase(Result) {} }; public: class pointer; class const_pointer; class pointer : public PointerImpl { friend class ThreadSafeTrieRawHashMap; friend class const_pointer; public: pointer() = default; private: pointer(PointerBase Result) : pointer::PointerImpl(Result) {} }; class const_pointer : public PointerImpl { friend class ThreadSafeTrieRawHashMap; public: const_pointer() = default; const_pointer(const pointer &P) : const_pointer::PointerImpl(P) {} private: const_pointer(PointerBase Result) : const_pointer::PointerImpl(Result) {} }; class LazyValueConstructor { public: value_type &operator()(T &&RHS) { assert(Mem && "Constructor already called, or moved away"); return assign(::new (Mem) value_type(Hash, std::move(RHS))); } value_type &operator()(const T &RHS) { assert(Mem && "Constructor already called, or moved away"); return assign(::new (Mem) value_type(Hash, RHS)); } template value_type &emplace(ArgsT &&...Args) { assert(Mem && "Constructor already called, or moved away"); return assign(::new (Mem) value_type(Hash, typename value_type::EmplaceTag{}, std::forward(Args)...)); } LazyValueConstructor(LazyValueConstructor &&RHS) : Mem(RHS.Mem), Result(RHS.Result), Hash(RHS.Hash) { RHS.Mem = nullptr; // Moved away, cannot call. } ~LazyValueConstructor() { assert(!Mem && "Constructor never called!"); } private: value_type &assign(value_type *V) { Mem = nullptr; Result = V; return *V; } friend class ThreadSafeTrieRawHashMap; LazyValueConstructor() = delete; LazyValueConstructor(void *Mem, value_type *&Result, ArrayRef Hash) : Mem(Mem), Result(Result), Hash(Hash) { assert(Hash.size() == sizeof(HashT) && "Invalid hash"); assert(Mem && "Invalid memory for construction"); } void *Mem; value_type *&Result; ArrayRef Hash; }; /// Insert with a hint. Default-constructed hint will work, but it's /// recommended to start with a lookup to avoid overhead in object creation /// if it already exists. pointer insertLazy(const_pointer Hint, ArrayRef Hash, function_ref OnConstruct) { return pointer(ThreadSafeTrieRawHashMapBase::insert( Hint, Hash, [&](void *Mem, ArrayRef Hash) { value_type *Result = nullptr; OnConstruct(LazyValueConstructor(Mem, Result, Hash)); return Result->Hash.data(); })); } pointer insertLazy(ArrayRef Hash, function_ref OnConstruct) { return insertLazy(const_pointer(), Hash, OnConstruct); } pointer insert(const_pointer Hint, value_type &&HashedData) { return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) { C(std::move(HashedData.Data)); }); } pointer insert(const_pointer Hint, const value_type &HashedData) { return insertLazy(Hint, HashedData.Hash, [&](LazyValueConstructor C) { C(HashedData.Data); }); } pointer find(ArrayRef Hash) { assert(Hash.size() == std::tuple_size::value); return ThreadSafeTrieRawHashMapBase::find(Hash); } const_pointer find(ArrayRef Hash) const { assert(Hash.size() == std::tuple_size::value); return ThreadSafeTrieRawHashMapBase::find(Hash); } ThreadSafeTrieRawHashMap(std::optional NumRootBits = std::nullopt, std::optional NumSubtrieBits = std::nullopt) : ThreadSafeTrieRawHashMapBase(DefaultContentAllocSize, DefaultContentAllocAlign, DefaultContentOffset, NumRootBits, NumSubtrieBits) {} ~ThreadSafeTrieRawHashMap() { if constexpr (std::is_trivially_destructible::value) this->destroyImpl(nullptr); else this->destroyImpl( [](void *P) { static_cast(P)->~value_type(); }); } // Move constructor okay. ThreadSafeTrieRawHashMap(ThreadSafeTrieRawHashMap &&) = default; // No move assignment or any copy. ThreadSafeTrieRawHashMap &operator=(ThreadSafeTrieRawHashMap &&) = delete; ThreadSafeTrieRawHashMap(const ThreadSafeTrieRawHashMap &) = delete; ThreadSafeTrieRawHashMap & operator=(const ThreadSafeTrieRawHashMap &) = delete; }; } // namespace llvm #endif // LLVM_ADT_TRIERAWHASHMAP_H