unordered_map与unordered_set的实现（含迭代器）

unordered_map与unordered_set的实现

文章目录

unordered_map与unordered_set的实现
前言
一、问题一
- HashTable.h
二、问题二&问题三
- 1.封装时如何取出key
- 2.不同类型key如何建立对应关系
三、问题四&问题五
- 问题四
- 问题五
四、实现代码
- MyUnorderedSet.h
- MyUnorderedMap.h
- HashTable.h

前言

在C++11中新增了两个很好用的容器，分别是unordered_map与unordered_set，和map和set有不同之处

map与set的底层实现是平衡树——红黑树，并且采取中序遍历时默认有序

而本文的unordered_map与unordered_set底层实现是哈希思想（拉链法），并且他的存储方式不支持排序，所以是unordered

两者的查找效率综合比较，会发现unordered_map与unordered_set的效率综合会更高，所以在C++11中支持了unordered_map与unordered_set

本文中是用哈希桶的拉链法来封装unordered_map与unordered_set
这里的封装与红黑树封装map和set的封装相似，但此处会更难

具体通过解决问题来实现

HashTable的迭代器实现
封装时HashTable如何取出map的key和set的key
取出key后如何针对不同类型key建立映射关系
如何解决Set中key不能被修改，Map中key不能被修改，value能被修改的问题
Insert插入返回值问题以及Map[]的重载实现

关于哈希思想及其具体实现细节看我的上篇文章：数据结构之哈希表

一、问题一

这里就一步步，先实现iterator再实现const_iterator版本了，而是直接放出能适配iterator与const_iterator的版本，本质上就是用类模板泛型编程，需要什么就调用什么，是什么类型就返回什么类型的迭代器

这里尤为注意的一点是，这里的迭代器是单向迭代器，只支持++，而由于底层是哈希表的拉链法实现的，是数组与链表结合的方式
在实现运算符重载++时，本质上就是在逐个遍历哈希桶，而当前桶走完的时候，需要进入下一个桶，那么如何判断当前桶的位置，以及如何找到下一个桶，就需要把这个数组或者整个哈希表传过来，这里我们做的是把整个哈希表传过来

注意：其实将数组传过来会更简单些，传哈希表会有一些问题

我们将哈希表传过来，是可能要访问哈希表内的私有变量来获得下一个桶，而直接在_HTIterator这个类内使用哈希表内的私有变量是不可取的，所以需要在哈希表内声明友元
此处还涉及编译问题，由于编译器是从上往下编译代码，我们将迭代器写在哈希表代码的上面，而迭代器中有哈希表，这里编译器并不认识哈希表，因为哈希表的定义还未出现，所以还需要哈希表对应的类的声明

	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	friend struct _HTIterator;

	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

至于class KeyOfT 与 class Hash这两个类模板的作用，则在下文中解答

HashTable.h

namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}

		T _data;
		HashNode* _next;
	};


	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	struct _HTIterator
	{
		typedef HashNode<T> Node;
		typedef _HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

		typedef _HTIterator<K, T, T&, T*, KeyOfT, Hash> iterator;


		_HTIterator(Node* node, HashTable<K, T, KeyOfT, Hash>* pht)
			:_node(node)
			,_pht(pht)
		{}

		_HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>* pht)
			:_node(node)
			, _pht(pht)
		{}
		

		_HTIterator(const iterator& x)
			:_node(x._node)
			, _pht(x._pht)
		{}


		Ref operator*()
		{
			return _node->_data;
		}

		Ptr operator->()
		{
			return &_node->_data;
		}



		Self operator++()
		{
			Hash hf;
			KeyOfT kot;

			size_t hashi = hf(kot(_node->_data)) % _pht->_t.size();

			if (_node->_next != nullptr)
			{
				_node = _node->_next;
			}
			else
			{
				hashi++;

				while (hashi < _pht->_t.size())
				{
					if (_pht->_t[hashi])
					{
						_node = _pht->_t[hashi];
						break;
					}

					hashi++;
				}

			}
			
			if (hashi == _pht->_t.size())
			{
				_node = nullptr;
			}

			return *this;
		}
		
		bool operator==(const Self& it)
		{
			return _node == it._node;
		}

		bool operator!=(const Self& it)
		{
			return _node != it._node;
		}

		const HashTable<K, T, KeyOfT, Hash>* _pht;
		Node* _node;
	};



	template<class K, class T, class KeyOfT, class Hash>
	class HashTable
	{
		typedef HashNode<T> Node;

		template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
		friend struct _HTIterator;

	public:

		HashTable(size_t n = 10)
		{
			_t.resize(n);
		}


		typedef _HTIterator<K, T, T&, T*, KeyOfT, Hash> iterator;
		typedef _HTIterator<K, T, const T&, const T*, KeyOfT, Hash> const_iterator;


		iterator begin()
		{
			size_t hashi = 0;

			while (hashi < _t.size())
			{
				if (_t[hashi])
				{
					break;
				}

				hashi++;
			}

			if (hashi == _t.size())
			{
				return iterator(nullptr, this);
			}

			return iterator(_t[hashi], this);
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}



		const_iterator begin()const
		{
			size_t hashi = 0;

			while (hashi < _t.size())
			{
				if (_t[hashi])
				{
					break;
				}

				hashi++;
			}

			if (hashi == _t.size())
			{
				return iterator(nullptr, this);
			}

			return iterator(_t[hashi], this);
		}

		const_iterator end()const
		{
			return iterator(nullptr, this);
		}



		pair<iterator,bool> Insert(const T& data)
		{
			KeyOfT kot;
			Hash hf;

			iterator ret = Find(kot(data));
			if (ret != end())
			{
				return make_pair(ret, false);
			}
	
			//扩容
			if (_n / _t.size() == 1)
			{
				size_t newsize = _t.size() * 2;

				HashTable newtable(newsize);

				for (int i = 0; i < _n; i++)
				{
					Node* cur = _t[i];
					while (cur)
					{
						Node* next = cur->_next;

						size_t hashi = hf(kot(cur->_data)) % newsize;

						cur->_next = newtable._t[hashi];
						newtable._t[hashi] = cur;

						cur = next;
					}

					_t[i] = nullptr;
				}

				swap(_t, newtable._t);
			}


			size_t  hashi = hf(kot(data)) % _t.size();

			Node* newnode = new Node(data);

			newnode->_next = _t[hashi];
			_t[hashi] = newnode;

			_n++;

			return make_pair(iterator(newnode, this), true);
		}

		iterator Find(const K& key)
		{
			Hash hf;
			KeyOfT kot;
			size_t hashi = hf(key) % _t.size();

			Node* cur = _t[hashi];

			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return iterator(cur, this);
				}
				
				cur = cur->_next;
			}

			return iterator(nullptr, this);
		}

		bool Erase(const K& key)
		{
			Hash hf;
			KeyOfT kot;
			size_t hashi = hf(key) % _t.size();

			Node* cur = _t[hashi];
			Node* prev = nullptr;

			if (kot(cur->_data) == key)
			{
				_t[hashi] = cur->_next;
				delete cur;
				return true;
			}

			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					prev->_next = cur->_next;
					delete cur;
					_n--;
					return true;
				}
				prev = cur;
				cur = cur->_next;
			}
			return false;
		}


	private:
		vector<HashNode<T>*> _t;
		size_t _n = 0;
	};
}

二、问题二&问题三

1.封装时如何取出key

首先解释一下问题，我们的目的是将unordered_map与unordered_set用哈希表封装实现，map中存的是pair，set中存的是key，而如何用一份哈希表适配两种结构呢

在封装的时候解决这个问题，在unordered_map与unordered_set中写一个内部类，这个类之中实现了一个仿函数，用来返回key，并且将其传给哈希表内

	template<class K, class V, class Hash = HashFunc<K>>
	class unordered_map
	{
	public:

		struct KeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return  kv.first;
			}
		};
	private:
		hash_bucket::HashTable<K, pair<const K, V>, KeyOfT, Hash> _ht;
	};

template<class K, class Hash = HashFunc<K>>
	class unordered_set
	{
	public:
		struct KeyOfT
		{
			const K& operator()(const K& key)
			{
				return  key;
			}
		};

	private:
		hash_bucket::HashTable<K, K, KeyOfT, Hash> _ht;
	};

2.不同类型key如何建立对应关系

本文的拉链法中，使用的哈希函数是除留余数法，如果key是int类型的话那正好，可以直接用key除，但如果key是string或者自定义类型，那么就不能够直接除了，则需要将其转换成int类型，另外一个模板参数Hash，则是将其转换方式传给哈希表

如果是内置类型的float，double之类的，我们可以直接强转成size_t返回
如果是string类型，由于string比较常用，我们可以为string搞个特化，默认支持string

上面两个都是默认支持的，用默认缺省值就行，不需要手动传Hash

而如果是自定义类型，则需要使用者通过接口手动传Hash，因为默认的缺省值用不了

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t hash = 0;
		for (auto e : key)
		{
			hash *= 13;
			hash += e;
		}

		return hash;
	}
};

三、问题四&问题五

问题四与问题五与set与map用红黑树封装的问题相同

问题四

set的iterator和const_iterator都是红黑树的const_iterator复用而来
map中的iterator是红黑树的iterator复用而来，const_iterator是红黑树的const_iterator复用而来
既然set中的迭代器都是const_iterator所以key自然不能被修改

	typedef typename hash_bucket::HashTable<K, K, KeyOfT, Hash>::const_iterator iterator;
	typedef typename hash_bucket::HashTable<K, K, KeyOfT, Hash>::const_iterator const_iterator;

map解决key不能被修改，value能被修改的原理也很简单，就是在实例化的时候，声明第二个模板参数——在map中也就是pair，pair的first是const类型

	private:
		hash_bucket::HashTable<K, pair<const K, V>, KeyOfT, Hash> _ht;

问题五

unordered_map与unordered_set与map和set的红黑树封装相同，insert的返回值都是一个键值对
在这里插入图片描述

first是一个迭代器，second是一个bool类型

基于此性质，引出了map的计数功能，可以通过insert返回的迭代器查看是否有key值，如果不存在则插入，将value值赋值为1，如果key已经存在，则通过insert返回的迭代器将value++，以此实现计数功能，所以map实现了operator[]，用来计数

	V& operator[](const K& key)
	{
		return _ht.Insert(make_pair(key, V())).first->second;
	}

四、实现代码

MyUnorderedSet.h

#include "HashTable.h"

namespace Tlzns
{
	template<class K, class Hash = HashFunc<K>>
	class unordered_set
	{
	public:
		struct KeyOfT
		{
			const K& operator()(const K& key)
			{
				return  key;
			}
		};



		typedef typename hash_bucket::HashTable<K, K, KeyOfT, Hash>::const_iterator iterator;
		typedef typename hash_bucket::HashTable<K, K, KeyOfT, Hash>::const_iterator const_iterator;



		iterator begin()const
		{
			return _ht.begin();
		}

		iterator end()const
		{
			return _ht.end();
		}


		pair<iterator, bool> Insert(const K& key)
		{
			auto ret = _ht.Insert(key);
			return pair<iterator, bool>(iterator(ret.first._node, ret.first._pht), ret.second);
		}

		bool Erase(const K& key)
		{
			return _ht.Erase(key);
		}

		iterator Find(const K& key)
		{
			return _ht.Find(key);
		}

	private:
		hash_bucket::HashTable<K, K, KeyOfT, Hash> _ht;
	};


	void test_set()
	{
		unordered_set<int> us;
		us.Insert(5);
		us.Insert(15);
		us.Insert(52);
		us.Insert(3);

		unordered_set<int>::iterator it = us.begin();
		while (it != us.end())
		{
			//*it += 5;
			cout << *it << " ";
			++it;
		}
		cout << endl;

		for (auto e : us)
		{
			cout << e << " ";
		}
		cout << endl;
	}
}

MyUnorderedMap.h

#include "HashTable.h"

namespace Tlzns
{
	template<class K, class V, class Hash = HashFunc<K>>
	class unordered_map
	{
	public:

		struct KeyOfT
		{
			const K& operator()(const pair<K, V>& kv)
			{
				return  kv.first;
			}
		};



		typedef typename hash_bucket::HashTable<K, pair<const K, V>, KeyOfT, Hash>::iterator iterator;
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, KeyOfT, Hash>::const_iterator const_iterator;


		iterator begin()
		{
			return _ht.begin();
		}

		iterator end()
		{
			return _ht.end();
		}

		const_iterator begin()const
		{
			return _ht.begin();
		}

		const_iterator end()const
		{
			return _ht.end();
		}


		pair<iterator, bool> Insert(const pair<K, V>& kv)
		{
			return _ht.Insert(kv);
		}
		
		bool Erase(const K& key)
		{
			return _ht.Erase(key);
		}

		iterator Find(const K& key)
		{
			return _ht.Find(key);
		}

		V& operator[](const K& key)
		{
			return _ht.Insert(make_pair(key, V())).first->second;
		}


	private:
		hash_bucket::HashTable<K, pair<const K, V>, KeyOfT, Hash> _ht;
	};

	void test_map()
	{
		unordered_map<string, string> dict;
		dict.Insert(make_pair("sort", ""));
		dict.Insert(make_pair("string", ""));
		dict.Insert(make_pair("insert", ""));

		unordered_map<string, string>::const_iterator it = dict.begin();

		for (auto& kv : dict)
		{
			//kv.first += 'x';
			kv.second += 'x';

			cout << kv.first << ":" << kv.second << endl;
		}
		cout << endl;

		string arr[] = { "香蕉", "甜瓜","苹果", "西瓜", "苹果", "西瓜", "苹果", "苹果", "西瓜", "苹果", "香蕉", "苹果", "香蕉" };
		unordered_map<string, int> count_map;
		for (auto& e : arr)
		{
			count_map[e]++;
		}

		for (auto& kv : count_map)
		{
			cout << kv.first << ":" << kv.second << endl;
		}
		cout << endl;
	}
}

HashTable.h

#include <iostream>
#include <vector>

using namespace std;

template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct HashFunc<string>
{
	size_t operator()(const string& key)
	{
		size_t hash = 0;
		for (auto e : key)
		{
			hash *= 13;
			hash += e;
		}

		return hash;
	}
};

namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		HashNode(const T& data)
			:_data(data)
			,_next(nullptr)
		{}

		T _data;
		HashNode* _next;
	};


	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

	template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
	struct _HTIterator
	{
		typedef HashNode<T> Node;
		typedef _HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

		typedef _HTIterator<K, T, T&, T*, KeyOfT, Hash> iterator;


		_HTIterator(Node* node, HashTable<K, T, KeyOfT, Hash>* pht)
			:_node(node)
			,_pht(pht)
		{}

		_HTIterator(Node* node, const HashTable<K, T, KeyOfT, Hash>* pht)
			:_node(node)
			, _pht(pht)
		{}
		

		_HTIterator(const iterator& x)
			:_node(x._node)
			, _pht(x._pht)
		{}


		Ref operator*()
		{
			return _node->_data;
		}

		Ptr operator->()
		{
			return &_node->_data;
		}



		Self operator++()
		{
			Hash hf;
			KeyOfT kot;

			size_t hashi = hf(kot(_node->_data)) % _pht->_t.size();

			if (_node->_next != nullptr)
			{
				_node = _node->_next;
			}
			else
			{
				hashi++;

				while (hashi < _pht->_t.size())
				{
					if (_pht->_t[hashi])
					{
						_node = _pht->_t[hashi];
						break;
					}

					hashi++;
				}

			}
			
			if (hashi == _pht->_t.size())
			{
				_node = nullptr;
			}

			return *this;
		}
		
		bool operator==(const Self& it)
		{
			return _node == it._node;
		}

		bool operator!=(const Self& it)
		{
			return _node != it._node;
		}

		const HashTable<K, T, KeyOfT, Hash>* _pht;
		Node* _node;
	};



	template<class K, class T, class KeyOfT, class Hash>
	class HashTable
	{
		typedef HashNode<T> Node;

		template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
		friend struct _HTIterator;

	public:

		HashTable(size_t n = 10)
		{
			_t.resize(n);
		}


		typedef _HTIterator<K, T, T&, T*, KeyOfT, Hash> iterator;
		typedef _HTIterator<K, T, const T&, const T*, KeyOfT, Hash> const_iterator;


		iterator begin()
		{
			size_t hashi = 0;

			while (hashi < _t.size())
			{
				if (_t[hashi])
				{
					break;
				}

				hashi++;
			}

			if (hashi == _t.size())
			{
				return iterator(nullptr, this);
			}

			return iterator(_t[hashi], this);
		}

		iterator end()
		{
			return iterator(nullptr, this);
		}



		const_iterator begin()const
		{
			size_t hashi = 0;

			while (hashi < _t.size())
			{
				if (_t[hashi])
				{
					break;
				}

				hashi++;
			}

			if (hashi == _t.size())
			{
				return iterator(nullptr, this);
			}

			return iterator(_t[hashi], this);
		}

		const_iterator end()const
		{
			return iterator(nullptr, this);
		}



		pair<iterator,bool> Insert(const T& data)
		{
			KeyOfT kot;
			Hash hf;

			iterator ret = Find(kot(data));
			if (ret != end())
			{
				return make_pair(ret, false);
			}
	
			//扩容
			if (_n / _t.size() == 1)
			{
				size_t newsize = _t.size() * 2;

				HashTable newtable(newsize);

				for (int i = 0; i < _n; i++)
				{
					Node* cur = _t[i];
					while (cur)
					{
						Node* next = cur->_next;

						size_t hashi = hf(kot(cur->_data)) % newsize;

						cur->_next = newtable._t[hashi];
						newtable._t[hashi] = cur;

						cur = next;
					}

					_t[i] = nullptr;
				}

				swap(_t, newtable._t);
			}


			size_t  hashi = hf(kot(data)) % _t.size();

			Node* newnode = new Node(data);

			newnode->_next = _t[hashi];
			_t[hashi] = newnode;

			_n++;

			return make_pair(iterator(newnode, this), true);
		}

		iterator Find(const K& key)
		{
			Hash hf;
			KeyOfT kot;
			size_t hashi = hf(key) % _t.size();

			Node* cur = _t[hashi];

			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return iterator(cur, this);
				}
				
				cur = cur->_next;
			}

			return iterator(nullptr, this);
		}

		bool Erase(const K& key)
		{
			Hash hf;
			KeyOfT kot;
			size_t hashi = hf(key) % _t.size();

			Node* cur = _t[hashi];
			Node* prev = nullptr;

			if (kot(cur->_data) == key)
			{
				_t[hashi] = cur->_next;
				delete cur;
				return true;
			}

			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					prev->_next = cur->_next;
					delete cur;
					_n--;
					return true;
				}
				prev = cur;
				cur = cur->_next;
			}
			return false;
		}

	private:
		vector<HashNode<T>*> _t;
		size_t _n = 0;
	};
}