C++中unordered_map和unordered_set的介绍以及用哈希表封装实现unordered_map和unordered

1.unordered_map和unordered_set的使用

1.1unordered_set类的介绍

1.2unordered_set和set的使用差异

1.3unordered_map和map的使用差异

1.4unordered_multimap/unordered_multiset

2.用哈希表封装实现unordered_set和unordered_map

2.1实现出复用哈希表的框架并支持insert

2.1.1初步的unordered_map和unordered_set的框架

2.1.2初步泛型哈希表框架的实现

2.2iterator的实现

2.3unordered_map和unordered_set代码实现

2.3.1unordered_map.h

2.3.2unordered_set.h

2.3.3hash_tables.h

2.3.4测试代码test.cpp

1.unordered_map和unordered_set的使用

这里的介绍与set和map进行对比做介绍。map和set的使用可以参考C++中map和set的使用。

unordered_set和unordered_multiset的参考文档：https://legacy.cplusplus.com/reference/unordered_set/

unordered_map和unordered_multimap的参考文档：https://legacy.cplusplus.com/reference/unordered_map/

1.1unordered_set类的介绍

unordered_set的声明如下，Key就是unordered_set底层关键字的类型：

template < class Key,    //unordered_set::key_type/value_type
           class Hash = hash<Key>,    // unordered_set::hasher
           class Pred = equal_to<Key>,    // unordered_set::key_equal
           class Alloc = allocator<Key>    // unordered_set::allocator_type
>
class unordered_set;

unordered_set默认要求Key⽀持转换为整形，如果不⽀持或者想按⾃⼰的需求⾛可以⾃⾏实现⽀持将Key转成整形的仿函数传给第⼆个模板参数。

unordered_set底层是⽤哈希桶实现，增删查平均效率是 O(1) ，迭代器遍历不再有序，为了跟set区分，所以取名unordered_set。

1.2unordered_set和set的使用差异

1.unordered_set和set的第⼀个差异是对key的要求不同，set要求Key⽀持⼩于⽐较，⽽unordered_set要求Key⽀持转成整形且⽀持等于⽐较。

2.unordered_set和set的第⼆个差异是迭代器的差异，set的iterator是双向迭代器，unordered_set是单向迭代器，其次set底层是红⿊树，红⿊树是⼆叉搜索树，⾛中序遍历是有序的，所以set迭代器遍历是有序+去重。⽽unordered_set底层是哈希表，迭代器遍历是⽆序+去重。

3.unordered_set和set的第三个差异是性能的差异，整体⽽⾔⼤多数场景下，unordered_set的增删查改更快⼀些，因为红⿊树增删查改效率是 O(logN) ，⽽哈希表增删查平均效率是 O(1) ，具体可以参看下⾯代码的演⽰的对⽐差异。

#include <vector>
#include <unordered_set>
#include <unordered_map>
#include <set>
#include <iostream>
using namespace std;

void test()
{
	const size_t N = 1000000;

	unordered_set<int> us;
	set<int> s;

	vector<int> v;
	v.reserve(N);

	srand(time(0));
	for (size_t i = 0; i < N; i++)
	{
		//v.push_back(rand()); // N⽐较⼤时，重复值⽐较多
		v.push_back(rand() + i); // 重复值相对少
		//v.push_back(i); // 没有重复，有序
	}

	size_t begin1 = clock();
	for (auto e : v)
	{
		s.insert(e);
	}
	size_t end1 = clock();
	cout << "set insert:" << end1 - begin1 << endl;

	size_t begin2 = clock();
	us.reserve(N);
	for (auto e : v)
	{
		us.insert(e);
	}
	size_t end2 = clock();
	cout << "unordered_set insert:" << end2 - begin2 << endl;

	int m1 = 0;
	size_t begin3 = clock();
	for (auto e : v)
	{
		auto ret = s.find(e);
		if (ret != s.end())
		{
			++m1;
		}
	}
	size_t end3 = clock();
	cout << "set find:" << end3 - begin3 << "->" << m1 << endl;

	int m2 = 0;
	size_t begin4 = clock();
	for (auto e : v)
	{
		auto ret = us.find(e);
		if (ret != us.end())
		{
			++m2;
		}
	}
	size_t end4 = clock();
	cout << "unorered_set find:" << end4 - begin4 << "->" << m2 << endl;

	cout << "set插入数据个数：" << s.size() << endl;
	cout << "unordered_set插入数据个数：" << us.size() << endl;

	size_t begin5 = clock();
	for (auto e : v)
	{
		s.erase(e);
	}
	size_t end5 = clock();
	cout << "set erase:" << end5 - begin5 << endl;

	size_t begin6 = clock();
	for (auto e : v)
	{
		us.erase(e);
	}
	size_t end6 = clock();
	cout << "unordered_set erase:" << end6 - begin6 << endl << endl;
}

int main()
{
	test();
	return 0;
}

可以看到，在Release版本下，unordered_set的增删查效率都要高于set。

1.3unordered_map和map的使用差异

unordered_map和map的使用差异与unordered_set和set的使用差异完全相同，不同的就是存储的节点数据类型不同。

1.4unordered_multimap/unordered_multiset

unordered_multimap/unordered_multiset跟multimap/multiset功能完全类似，⽀持Key冗余。unordered_multimap/unordered_multiset跟multimap/multiset的差异也是三个⽅⾯的差异，key的要求的差异，iterator及遍历顺序的差异，性能的差异。

2.用哈希表封装实现unordered_set和unordered_map

2.1实现出复用哈希表的框架并支持insert

这⾥相⽐源码调整⼀下，key参数就⽤K，value参数就⽤V，哈希表中的数据类型，我们使⽤T。
unordered_map和unordered_set的模拟实现类结构更复杂⼀点，但是⼤框架和思路是完全类似的。因为HashTable实现了泛型不知道T参数到底是K，还是pair<K, V>，那么insert内部进⾏插⼊时要将K对象转换成整形并取模，因为pair的value不参与计算取模，且默认⽀持的是key和value⼀起⽐较相等，我们需要的是任何时候只⽐较K对象，所以我们在unordered_map和unordered_set层分别实现⼀个MapKeyOfT和SetKeyOfT的仿函数传给HashTable的KeyOfT，然后HashTable中通过KeyOfT仿函数取出T类型对象中的K对象，再转换成整形并取模。

2.1.1初步的unordered_map和unordered_set的框架

//unordered_set.h
#pragma once
#include "hash_tables.h"

namespace xiaoc
{
    //K为存储的key的类型
    //Hash用于接收仿函数，用于将key转换为无符号整型进行后续的映射
	template<class K, class Hash = HashFunc<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};

	public:
		bool insert(const K& key)
		{
			return _ht.Insert(key);
		}

	private:
		HashTables<K, K, SetKeyOfT, Hash> _ht;
	};
}

//unordered_map.h
#pragma once
#include "hash_tables.h"

namespace xiaoc
{
    //K为key数据的类型
    //V为value数据的类型
	template<class K, class V, class Hash = HashFunc<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};

	public:
		bool insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}
	private:
		HashTables<K, pair<K, V>, MapKeyOfT, Hash> _ht;
	};
}

2.1.2初步泛型哈希表框架的实现

这里与之前C++哈希表的实现中链地址法实现的哈希表基本相同，修改的就是存储的数据不同以及增加了一个模板参数KeyOfT来接收上层传来的仿函数。

#pragma once
#include <vector>
using namespace std;

namespace xiaoc
{
	//不管是unordered_map还是unordered_set
	//对哈希值key的处理调用相同的HashFunc就行
	//所以这里对HashFunc的实现在hash_tables.h中
	template<class K>
	struct HashFunc
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};

	//哈希表节点类
	//存储的节点的类型结构
	//T是节点中存储的数据类型
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}
	};

    //哈希表类
	template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
	class HashTables
	{
		typedef HashNode<T> Node;

		inline unsigned long __stl_next_prime(unsigned long n)
		{
			static const int __stl_num_primes = 28;
			static const unsigned long __stl_prime_list[__stl_num_primes] =
			{
				53,97,193,389,769,1543,
				3079,6151,12289,24593,49157,
				98317,196613,393241,786433,1572869,
				3145739,6291469,12582917, 25165843,
				50331653,100663319, 201326611, 402653189, 
				805306457,1610612741, 3221225473, 4294967291
			};
			const unsigned long* first = __stl_prime_list;
			const unsigned long* last = __stl_prime_list + __stl_num_primes;
			const unsigned long* pos = lower_bound(first, last, n);
			return pos == last ? *(last - 1) : *pos;
		}

	public:
		HashTables()
			//:_tables(__stl_next_prime(0))
			:_tables(11)
			,_n(0)
		{}

		~HashTables()
		{
			for (int i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}

				_tables[i] = nullptr;
			}
		}

		bool Insert(const T& data)
		{
			KeyOfT kot;
			Hash hash;
			size_t hashi = hash(kot(data)) % _tables.size();

			//扩容
			if (_n == _tables.size())
			{
				vector<Node*> newtables(__stl_next_prime(_tables.size() + 1), nullptr);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;
						size_t hashi = hash(kot(cur->_data)) % newtables.size();
						//头插
						cur->_next = newtables[hashi];
						newtables[hashi] = cur;
						cur = next;
					}

					_tables[i] = nullptr;
				}

				_tables.swap(newtables);
			}

			//头插
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;

			return true;
		}

	private:
		vector<Node*> _tables;
		size_t _n;
	};
}

2.2iterator的实现

iterator实现的⼤框架跟list的iterator思路是⼀致的，⽤⼀个类型封装结点的指针，再通过重载运算符实现，迭代器像指针⼀样访问的⾏为，要注意的是哈希表的迭代器是单向迭代器。

这⾥的难点是operator++的实现。iterator中有⼀个指向结点的指针，如果当前桶下⾯还有结点，则结点的指针指向下⼀个结点即可。如果当前桶⾛完了，则需要想办法计算找到下⼀个桶。这⾥的难点反⽽是结构设计的问题，iterator中除了有结点的指针，还有哈希表对象的指针，这样当前桶⾛完了，要计算下⼀个桶就相对容易多了，⽤key值计算出当前桶位置，依次往后找下⼀个不为空的桶即可。

begin()返回第⼀个桶中第⼀个节点指针构造的迭代器，这⾥end()返回迭代器可以⽤空表⽰。

unordered_set的iterator也不⽀持修改，我们把unordered_set的第⼆个模板参数改成const K即可， HashTable<K, const K, SetKeyOfT, Hash> _ht。

unordered_map的iterator不⽀持修改key但是可以修改value，我们把unordered_map的第⼆个模板参数pair的第⼀个参数改成const K即可， HashTable<K, pair<const K, V>,MapKeyOfT, Hash> _ht。

下列是hash_tables.h中哈希表迭代器类的实现：

    //类模板的前置声明
	template<class K, class T, class KeyOfT, class Hash>
	class HashTables;

	//哈希表迭代器类
	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	class HTIterator
	{
		typedef HashNode<T> Node;
		typedef HashTables<K, T, KeyOfT, Hash> HashTables;
		typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

		Node* _node; 
		const HashTables* _ht;

	public:
		HTIterator(Node* node, const HashTables* ht)
			:_node(node)
			,_ht(ht)
		{}

		Self& operator++()
		{
			if (_node->_next)
			{
				//当前桶还有节点
				_node = _node->_next;
			}
			else
			{
				//当前桶走完了，找下一个不为空的桶
				KeyOfT kot;
				Hash hash;
				size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
				++hashi;
				while (hashi < _ht->_tables.size())
				{
					//找到了跳出循环
					if (_ht->_tables[hashi])
					{
						break;
					}

					++hashi;
				}

				if (hashi == _ht->_tables.size())
				{
					_node = nullptr;
				}
				else
				{
					_node = _ht->_tables[hashi];
				}
			}

			return *this;
		}

		Ref operator*()
		{
			return _node->_data;
		}

		Ptr operator->()
		{
			return &_node->_data;
		}

		bool operator==(const Self& s)
		{
			return _node == s._node;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}
	};

2.3unordered_map和unordered_set代码实现

这里的实现思路和C++中封装红黑树模拟实现map和set中的思路一样，上层的unordered_map和unordered_set都是调用下层哈希表的接口实现对应的功能。

2.3.1unordered_map.h

#pragma once
#include "hash_tables.h"

namespace xiaoc
{
	template<class K, class V, class Hash = HashFunc<K>>
	class unordered_map
	{
		struct MapKeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return kv.first;
			}
		};

	public:
		typedef typename HashTables<K, pair<const K, V>, MapKeyOfT, Hash>::Iterator iterator;
		typedef typename HashTables<K, pair<const K, V>, MapKeyOfT, Hash>::ConstIterator const_iterator;

		iterator begin()
		{
			return _ht.Begin();
		}

		iterator end()
		{
			return _ht.End();
		}

		const_iterator begin() const
		{
			return _ht.Begin();
		}

		const_iterator end() const
		{
			return _ht.End();
		}

		pair<iterator, bool> insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}

		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = _ht.Insert({ key, V() });
			return ret.first->second;
		}
	private:
		HashTables<K, pair<const K, V>, MapKeyOfT, Hash> _ht;
	};

	void test_map()
	{
		unordered_map<string, string> dict;
		dict.insert({ "sort", "排序" });
		dict.insert({ "left", "左边" });
		dict.insert({ "right", "右边" });
		dict["left"] = "左边，剩余";
		dict["insert"] = "插入";
		dict["string"];
		unordered_map<string, string>::iterator it = dict.begin();
		while (it != dict.end())
		{
			// 不能修改first，可以修改second
			//it->first += 'x';
			it->second += 'x';
			cout << it->first << ":" << it->second << endl;
			++it;
		}
		cout << endl;
	}
}

2.3.2unordered_set.h

#pragma once
#include "hash_tables.h"

namespace xiaoc
{
	template<class K, class Hash = HashFunc<K>>
	class unordered_set
	{
		struct SetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};

	public:
		typedef typename HashTables<K, const K, SetKeyOfT, Hash>::Iterator iterator;
		typedef typename HashTables<K, const K, SetKeyOfT, Hash>::ConstIterator const_iterator;

		iterator begin()
		{
			return _ht.Begin();
		}

		iterator end()
		{
			return _ht.End();
		}

		const_iterator begin() const
		{
			return _ht.Begin();
		}

		const_iterator end() const
		{
			return _ht.End();
		}

		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}

	private:
		HashTables<K, const K, SetKeyOfT, Hash> _ht;
	};

	void test_set()
	{
		unordered_set<int> s;
		int a[] = { 4, 2, 6, 100, 37, 53, 15, 70, 162, 147, 333,150 };
		for (auto e : a)
		{
			s.insert(e);
		}
		for (auto e : s)
		{
			cout << e << " ";
		}
		cout << endl;
		unordered_set<int>::iterator it = s.begin();
		while (it != s.end())
		{
			// 不⽀持修改
			//*it += 1;
			cout << *it << " ";
			++it;
		}
		cout << endl;
	}
}

2.3.3hash_tables.h

#pragma once
#include <vector>
using namespace std;

namespace xiaoc
{
	//不管是unordered_map还是unordered_set
	//对哈希值key的处理调用相同的HashFunc就行
	//所以这里对HashFunc的实现在hash_tables.h中
	template<class K>
	struct HashFunc
	{
		size_t operator()(const K& key)
		{
			return (size_t)key;
		}
	};

	//特化
	template<>
	struct HashFunc<string>
	{
		size_t operator()(const string& key)
		{
			size_t hash = 0;
			for (auto e : key)
			{
				hash *= 131;
				hash += e;
			}
			return hash;
		}
	};

	//哈希表节点类
	//存储的节点的类型结构
	//T是节点中存储的数据类型
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}
	};

	//类模板的前置声明
	template<class K, class T, class KeyOfT, class Hash>
	class HashTables;

	//哈希表迭代器类
	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	class HTIterator
	{
		typedef HashNode<T> Node;
		typedef HashTables<K, T, KeyOfT, Hash> HashTables;
		typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

		Node* _node; 
		const HashTables* _ht;

	public:
		HTIterator(Node* node, const HashTables* ht)
			:_node(node)
			,_ht(ht)
		{}

		Self& operator++()
		{
			if (_node->_next)
			{
				//当前桶还有节点
				_node = _node->_next;
			}
			else
			{
				//当前桶走完了，找下一个不为空的桶
				KeyOfT kot;
				Hash hash;
				size_t hashi = hash(kot(_node->_data)) % _ht->_tables.size();
				++hashi;
				while (hashi < _ht->_tables.size())
				{
					//找到了跳出循环
					if (_ht->_tables[hashi])
					{
						break;
					}

					++hashi;
				}

				if (hashi == _ht->_tables.size())
				{
					_node = nullptr;
				}
				else
				{
					_node = _ht->_tables[hashi];
				}
			}

			return *this;
		}

		Ref operator*()
		{
			return _node->_data;
		}

		Ptr operator->()
		{
			return &_node->_data;
		}

		bool operator==(const Self& s)
		{
			return _node == s._node;
		}

		bool operator!=(const Self& s)
		{
			return _node != s._node;
		}
	};


	template<class K, class T, class KeyOfT, class Hash = HashFunc<K>>
	class HashTables
	{
		//类模板的友元声明
		template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
		friend class HTIterator;

		typedef HashNode<T> Node;

		inline unsigned long __stl_next_prime(unsigned long n)
		{
			static const int __stl_num_primes = 28;
			static const unsigned long __stl_prime_list[__stl_num_primes] =
			{
				53,97,193,389,769,1543,
				3079,6151,12289,24593,49157,
				98317,196613,393241,786433,1572869,
				3145739,6291469,12582917, 25165843,
				50331653,100663319, 201326611, 402653189, 
				805306457,1610612741, 3221225473, 4294967291
			};
			const unsigned long* first = __stl_prime_list;
			const unsigned long* last = __stl_prime_list + __stl_num_primes;
			const unsigned long* pos = lower_bound(first, last, n);
			return pos == last ? *(last - 1) : *pos;
		}

	public:
		typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;
		typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;

		Iterator Begin()
		{
			if (_n == 0)
			{
				return End();
			}

			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					return Iterator(cur, this);
				}
			}

			return End();
		}

		Iterator End()
		{
			return Iterator(nullptr, this);
		}

		ConstIterator Begin() const
		{
			if (_n == 0)
			{
				return End();
			}

			for (size_t i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				if (cur)
				{
					return ConstIterator(cur, this);
				}
			}

			return End();
		}

		ConstIterator End() const
		{
			return ConstIterator(nullptr, this);
		}

		HashTables()
			:_tables(__stl_next_prime(0))
			//:_tables(11)
			,_n(0)
		{}

		~HashTables()
		{
			for (int i = 0; i < _tables.size(); i++)
			{
				Node* cur = _tables[i];
				while (cur)
				{
					Node* next = cur->_next;
					delete cur;
					cur = next;
				}

				_tables[i] = nullptr;
			}
		}

		pair<Iterator, bool> Insert(const T& data)
		{
			KeyOfT kot;
			Iterator it = Find(kot(data));
			if (it != End())
				return { it, false };

			Hash hash;
			size_t hashi = hash(kot(data)) % _tables.size();

			//扩容
			if (_n == _tables.size())
			{
				vector<Node*> newtables(__stl_next_prime(_tables.size() + 1), nullptr);
				for (size_t i = 0; i < _tables.size(); i++)
				{
					Node* cur = _tables[i];
					while (cur)
					{
						Node* next = cur->_next;
						size_t hashi = hash(kot(cur->_data)) % newtables.size();
						//头插
						cur->_next = newtables[hashi];
						newtables[hashi] = cur;
						cur = next;
					}

					_tables[i] = nullptr;
				}

				_tables.swap(newtables);
			}

			//头插
			Node* newnode = new Node(data);
			newnode->_next = _tables[hashi];
			_tables[hashi] = newnode;
			++_n;

			return {Iterator(newnode, this), true};
		}

		Iterator Find(const K& key)
		{
			KeyOfT kot;
			Hash hash;
			size_t hashi = hash(key) % _tables.size();
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return Iterator(cur, this);
				}

				cur = cur->_next;
			}

			return End();
		}

		bool Erase(const K& key)
		{
			KeyOfT kot;
			Hash hash;

			size_t hashi = hash(key) % _tables.size();
			Node* prev = nullptr;
			Node* cur = _tables[hashi];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					if (prev == nullptr)
					{
						_tables[hashi] = cur->_next;
					}
					else
					{
						prev->_next = cur->_next;
					}

					delete cur;
					--_n;
					return true;
				}

				prev = cur;
				cur = cur->_next;
			}

			return false;
		}

	private:
		vector<Node*> _tables;
		size_t _n;
	};
}

2.3.4测试代码test.cpp

这里测试用例具体的实现在unordered_map和unordered_set中，test.cpp只是调用了其中的测试函数。

#include <iostream>
using namespace std;

#include "unordered_map.h"
#include "unordered_set.h"

int main()
{
	xiaoc::test_set();
	cout << "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" << endl;
	xiaoc::test_map();
	return 0;
}