C++相关概念和易错语法（13）（string的模拟实现）

string由于存在字符串和单字符的概念，使得它的一些接口，实现要比vector多一些。本质上来看string的实现是在顺序表的基础上加入串相关的操作。下面我会分享如何模拟实现string，这可以进一步提高我们对string的熟练程度。

1.构造函数、拷贝构造和析构函数

string::string(const char* str)//构造函数
	:_size(strlen(str))
{
	if (_size <= 15)
		strcpy(_buff, str);
	else
	{
		delete[] _buff, _buff = nullptr;
		_capacity = _size;

		_str = new char[_capacity + 1];
		strcpy(_str, str);
	}
}


string::string(const string& str)//拷贝构造
	:_size(str._size)
	,_capacity(str._capacity)
{
	if (_capacity <= 15)
		strcpy(_buff, str._buff);
	else
	{
		delete[] _buff, _buff = nullptr;

		_str = new char[_capacity + 1];
		strcpy(_str, str._str);
	}
}



string::~string()//析构函数
{
	if (_capacity <= 15)
		delete[] _buff, _buff = nullptr;
	else
		delete[] _str, _str = nullptr;

	_capacity = _size = 0;
}

这里我使用了_buff和_str，使他们分情况存储。当存储字符少于15个的时候（实际开了16个字节，预留一个给\0），就存到_buff里，多了就全部移到_str中。

在这里需要注意的是_str和_buff一定要控制好，当转移数据，从_str转移到_buff时，一定要在释放_str后让_str置空，否则很多地方会出现连续delete两次导致报错，我当时实现的时候这个bug找了很久，如果想像VS2022那样使用两个字符串来管理string的话一定要注意。

2.swap


	void string::swap(string& str)//交换
	{
		std::swap(_str, str._str);
		std::swap(_size, str._size);
		std::swap(_capacity, str._capacity);
		std::swap(_buff, str._buff);
	}

我们实现swap绝大多数情况是为了交换数据，不需要深拷贝，因此直接交换成员变量的所有值即可。swap在后续的复用特别好用，后面会讲到。

3.迭代器的模拟

迭代器是封装的进一步体现，即不需要了解底层数据类型也能正确的用指针的方式对串的内容进行访问。

在串中，我们使用char*和const char*就可以很好地模拟迭代器了


	string::iterator string::begin()//迭代器
	{
		return _capacity <= 15 ? _buff : _str;
	}

	string::iterator string::end()//迭代器
	{
		return _capacity <= 15 ? _buff + _size : _str + _size;
	}

	string::const_iterator string::begin() const//迭代器
	{
		return _capacity <= 15 ? _buff : _str;
	}

	string::const_iterator string::end() const//迭代器
	{
		return _capacity <= 15 ? _buff + _size : _str + _size;
	}

4.访问

使用运算符重载可以使我们像普通数组那样去访问数据，注意区分_buff和_str

char string::operator[](size_t pos)//下标取值
{
	assert(pos < _size);
	return _capacity <= 15 ? _buff[pos] : _str[pos];
}

const char string::operator[](size_t pos) const//下标取值
{
	assert(pos < _size);
	return _capacity <= 15 ? _buff[pos] : _str[pos];
}

5.赋值运算符=

这里就能很好的体现出swap的复用优势了，因为赋值意味着原来的数据可以被丢弃，我们借助自动调用析构的特性将可以丢弃的数据和有用的数据交换，使无用的数据存到临时变量（形参）中，在函数结束的时候自动调用析构帮我们销毁了。

	string& string::operator=(string str)//赋值
	{
		swap(str);
		return *this;
	}

利用复用，我们的代码可以非常简约，但是实际上执行的复杂度是没有改变的，因为复用是函数套用使得表层逻辑简单，底层实现的逻辑是没有变的。

6.insert

insert是string的核心，因为这个函数能够实现几乎所有情况的插入，而erase也可借助insert实现，push_back、append可以直接复用insert，所以这个函数的实现直接影响后续所有插入删除。


	string& string::insert(size_t pos, size_t n, char c)//连续插入n个字符
	{
		assert(pos <= _size);//防止越界

		_size += n;
		if (_size + n > _capacity)
			reserve((_capacity + n) * 2);

		for (size_t cur = _size + n; cur > pos + n - 1; cur--)
		{
			_capacity <= 15 ? _buff[cur] = _buff[cur - n] : _str[cur] = _str[cur - n];
		}

		for (size_t count = 0; count < n; count++)
		{
			_capacity <= 15 ? _buff[pos + count] = c : _str[pos + count] = c;
		}

		return *this;
	}

	string& string::insert(size_t pos, const string& str, size_t subpos, size_t len)//插入字符串
	{
		assert(pos <= _size);
		assert(subpos <= str._size);

		if (len > str._size - subpos)
			len = str._size - subpos;

		if (len)
		{
			if (_size + len > _capacity)
				reserve((_capacity + len) * 2);

			for (size_t cur = _size + len; cur > pos + len - 1; cur--)
			{
				_capacity <= 15 ? _buff[cur] = _buff[cur - len] : _str[cur] = _str[cur - len];
			}

			for (size_t count = 0; count < len; count++)
			{
				_capacity <= 15 ? _buff[pos + count] = (str._capacity <= 15 ? str._buff[subpos + count] : str._str[subpos + count]) : _str[pos + count] = (str._capacity <= 15 ? str._buff[subpos + count] : str._str[subpos + count]);
			}

			_size += len;
		}
		
		return *this;
	}

大部分的代码是很简单的，但是在数据挪动的时候下标是一个大难题。这需要我们总结一些技巧。

其中着重理解闭、开、个数之间的关系，可以很好帮我们判断下标问题

7.erase


	string& string::erase(size_t pos, size_t len)//删除字符串内容
	{
		assert(pos < _size);

		if (len > _size - pos)
			swap(string().insert(0, *this, 0, pos));
		else
		{
			string s1, s2;
			s1.insert(0, *this, 0, pos);
			s2.insert(0, *this, pos + len);

			*this = s1 + s2;
		}

		return *this;
	}

借助复用我们可以进一步实现erase，当从pos开始全部删除时用swap，其余情况分成两段insert，最后加起来，这里我提前用了operator+，operator+很好实现，只是我这里讲解的顺序不一样。

insert、erase附加操作这里就不展开了，最后我会分享全部代码。

8.比较

比较运算符重载我们其实只需要实现其中的一两个，其它全部用上层逻辑联系起来，可以很快实现


	bool string::operator>(const string& str) const
	{
		return strcmp(_capacity <= 15 ? _buff : _str, str._capacity <= 15 ? str._buff : str._str) > 0;
	}

	bool string::operator>=(const string& str) const
	{
		return strcmp(_capacity <= 15 ? _buff : _str, str._capacity <= 15 ? str._buff : str._str) >= 0;
	}

	bool string::operator<(const string& str) const
	{
		return !(*this >= str);
	}

	bool string::operator<=(const string& str) const
	{
		return !(*this > str);
	}

	bool string::operator!=(const string& str) const
	{
		return !(*this == str);
	}

	bool string::operator==(const string& str) const
	{
		return strcmp(_capacity <= 15 ? _buff : _str, str._capacity <= 15 ? str._buff : str._str) == 0;
	}

9.流插入和流提取


	ostream& operator<<(ostream& out, const string& str)//流提取
	{
		if (str.capacity() <= 15)
			out << "_buff: ";
		else
			out << "_str: ";

		for (auto e : str)
			out << e;

		out << "    _size:" << str.size() << "    _capacity:" << str.capacity() << endl;

		return out;
	}

	istream& operator>>(istream& in, string& str)
	{
		char ch = in.get();

		while (ch != ' ' && ch != '\n')
		{
			str += ch;
			ch = in.get();
		}

		return in;
	}

唯一需要注意的是流插入要使用cin.get()，最好不要用scanf，因为C++和C的缓冲区不互通，也不要用cin>>，读的时候直接忽略了空格，根本停不下来。

全部代码汇总：

string.h



#include <iostream>
#include <assert.h>
using namespace std;

namespace my_string
{
	class string
	{
	public:

		static const int npos;

		typedef char* iterator;
		typedef const char* const_iterator;

		void swap(string& str);
		string(const char* str = "");
		string(const string& str);
		~string();

		size_t size() const;
		size_t capacity() const;
		const char* c_str() const;

		iterator begin();
		iterator end();
		const_iterator begin() const;
		const_iterator end() const;

		string& operator=(string str);
		char operator[](size_t pos);
		const char operator[](size_t pos) const;
		string& operator+=(const string& s);
		string& operator+=(char c);

		string& reserve(size_t newcapacity);
		string& insert(size_t pos, size_t n, char c);
		string& insert(size_t pos, const string& str, size_t subpos = 0, size_t len = npos);
		string& erase(size_t pos, size_t len = npos);
		string& push_back(char c);
		string& append(const string& str);
		string substr(size_t pos = 0, size_t len = npos) const;

		size_t find(char c, size_t pos = 0) const;
		size_t find(const string& str, size_t pos = 0) const;
		size_t find_first_of(const string& str, size_t pos = 0) const;
		size_t find_first_not_of(const string& str, size_t pos = 0) const;

		bool operator>(const string& str) const;
		bool operator>=(const string& str) const;
		bool operator<(const string& str) const;
		bool operator<=(const string& str) const;
		bool operator!=(const string& str) const;
		bool operator==(const string& str) const;

	private:
		char* _str = nullptr;
		size_t _size;
		size_t _capacity = 15;
		char* _buff = new char[16];
	};

	ostream& operator<<(ostream& out, const string& str);
	istream& operator>>(istream& in, string& str);
	string operator+(const string& s1, const string& s2);


}

string.cpp

#define _CRT_SECURE_NO_WARNINGS 1

#include "string.h"

namespace my_string
{
	const int string::npos = -1;

	void string::swap(string& str)//交换
	{
		std::swap(_str, str._str);
		std::swap(_size, str._size);
		std::swap(_capacity, str._capacity);
		std::swap(_buff, str._buff);
	}

	string::string(const char* str)//构造函数
		:_size(strlen(str))
	{
		if (_size <= 15)
			strcpy(_buff, str);
		else
		{
			delete[] _buff, _buff = nullptr;
			_capacity = _size;

			_str = new char[_capacity + 1];
			strcpy(_str, str);
		}
	}

	string::string(const string& str)//拷贝构造
		:_size(str._size)
		,_capacity(str._capacity)
	{
		if (_capacity <= 15)
			strcpy(_buff, str._buff);
		else
		{
			delete[] _buff, _buff = nullptr;

			_str = new char[_capacity + 1];
			strcpy(_str, str._str);
		}
	}


	string::~string()//析构函数
	{
		if (_capacity <= 15)
			delete[] _buff, _buff = nullptr;
		else
			delete[] _str, _str = nullptr;

		_capacity = _size = 0;
	}

	size_t string::size() const//获取size
	{
		return _size;
	}

	size_t string::capacity() const//获取capacity
	{
		return _capacity;
	}

	const char* string::c_str() const//取有效字符串地址
	{
		return _capacity <= 15 ? _buff : _str;
	}

	string::iterator string::begin()//迭代器
	{
		return _capacity <= 15 ? _buff : _str;
	}

	string::iterator string::end()//迭代器
	{
		return _capacity <= 15 ? _buff + _size : _str + _size;
	}

	string::const_iterator string::begin() const//迭代器
	{
		return _capacity <= 15 ? _buff : _str;
	}

	string::const_iterator string::end() const//迭代器
	{
		return _capacity <= 15 ? _buff + _size : _str + _size;
	}


	string& string::operator=(string str)//赋值
	{
		swap(str);
		return *this;
	}

	char string::operator[](size_t pos)//下标取值
	{
		assert(pos < _size);
		return _capacity <= 15 ? _buff[pos] : _str[pos];
	}

	const char string::operator[](size_t pos) const//下标取值
	{
		assert(pos < _size);
		return _capacity <= 15 ? _buff[pos] : _str[pos];
	}

	string& string::operator+=(const string& s)//字符串自增
	{
		insert(_size, s, 0);
		return *this;
	}

	string& string::operator+=(char c)//追加同种字符
	{
		insert(_size, 1, c);
		return *this;
	}

	string& string::reserve(size_t newcapacity)//扩容或合理缩容
	{
		if (newcapacity < _size)
			return *this;

		char* tmp = new char[_size + 1];
		strcpy(tmp, _capacity <= 15 ? _buff : _str);

		delete[] _buff, delete[] _str;
		_str = nullptr, _buff = nullptr;//置空防止后续报错

		if (newcapacity <= 15)
			_buff = new char[newcapacity + 1];
		else
			_str = new char[newcapacity + 1];

		strcpy(newcapacity <= 15 ? _buff : _str, tmp);

		_capacity = newcapacity;
		delete[] tmp;

		return *this;
	}

	string& string::insert(size_t pos, size_t n, char c)//连续插入n个字符
	{
		assert(pos <= _size);//防止越界

		_size += n;
		if (_size + n > _capacity)
			reserve((_capacity + n) * 2);

		for (size_t cur = _size + n; cur > pos + n - 1; cur--)
		{
			_capacity <= 15 ? _buff[cur] = _buff[cur - n] : _str[cur] = _str[cur - n];
		}

		for (size_t count = 0; count < n; count++)
		{
			_capacity <= 15 ? _buff[pos + count] = c : _str[pos + count] = c;
		}

		return *this;
	}

	string& string::insert(size_t pos, const string& str, size_t subpos, size_t len)//插入字符串
	{
		assert(pos <= _size);
		assert(subpos <= str._size);

		if (len > str._size - subpos)
			len = str._size - subpos;

		if (len)
		{
			if (_size + len > _capacity)
				reserve((_capacity + len) * 2);

			for (size_t cur = _size + len; cur > pos + len - 1; cur--)
			{
				_capacity <= 15 ? _buff[cur] = _buff[cur - len] : _str[cur] = _str[cur - len];
			}

			for (size_t count = 0; count < len; count++)
			{
				_capacity <= 15 ? _buff[pos + count] = (str._capacity <= 15 ? str._buff[subpos + count] : str._str[subpos + count]) : _str[pos + count] = (str._capacity <= 15 ? str._buff[subpos + count] : str._str[subpos + count]);
			}

			_size += len;
		}
		
		return *this;
	}


	string& string::erase(size_t pos, size_t len)//删除字符串内容
	{
		assert(pos < _size);

		if (len > _size - pos)
			swap(string().insert(0, *this, 0, pos));
		else
		{
			string s1, s2;
			s1.insert(0, *this, 0, pos);
			s2.insert(0, *this, pos + len);

			*this = s1 + s2;
		}

		return *this;
	}


	string& string::push_back(char c)//追加单个字符
	{
		(*this) += c;
		return *this;
	}

	string& string::append(const string& str)//追加字符串
	{
		(*this) += str;
		return *this;
	}

	string string::substr(size_t pos, size_t len) const//子字符串
	{
		string tmp = *this;

		tmp.swap(string().insert(0, *this, pos));

		return tmp;
	}

	size_t string::find(char c, size_t pos) const//找单个字符
	{
		for (size_t count = pos; count < _size; count++)
		{
			if ((*this)[count] == c)
				return count;
		}

		return npos;
	}

	size_t string::find(const string& str, size_t pos) const//找字符串
	{
		for (size_t count = pos; count < _size - str._size + 1; count++)
		{
			if ((*this)[count] == str[0])
			{
				size_t start = count;

				for (auto e : str)
				{
					if (e == (*this)[start++])
						continue;
					break;
				}

				if (start - count == str._size)
					return count;

			}

		}
		return npos;
	}

	size_t string::find_first_of(const string& str, size_t pos) const//找字符串第一次出现位置
	{
		bool stage = false;

		size_t* count = new size_t[str._size], min = 0, i = 0;

		for (auto e : str)
		{
			count[i++] = find(e, pos);
		}

		for (size_t k = 0; k < str._size; k++)
		{
			if (count[k] <= count[min])
			{
				min = k;
				stage = true;
			}
		}

		if (stage)
			return count[min];
		return npos;

	}

	size_t string::find_first_not_of(const string& str, size_t pos) const//找字符串第一次没出现的位置
	{
		size_t cur_pos = 0, next_pos = 0;

		if (cur_pos = find_first_of(str, pos) != 0)
			return cur_pos == npos ? 0 : cur_pos - 1;

		for (size_t count = 1; count < _size; count++)
		{
			next_pos = find_first_of(str, pos + count);
			if (cur_pos + 1 != next_pos)
				return cur_pos + 1;
			cur_pos = next_pos;
		}

		return npos;
	}

	bool string::operator>(const string& str) const
	{
		return strcmp(_capacity <= 15 ? _buff : _str, str._capacity <= 15 ? str._buff : str._str) > 0;
	}

	bool string::operator>=(const string& str) const
	{
		return strcmp(_capacity <= 15 ? _buff : _str, str._capacity <= 15 ? str._buff : str._str) >= 0;
	}

	bool string::operator<(const string& str) const
	{
		return !(*this >= str);
	}

	bool string::operator<=(const string& str) const
	{
		return !(*this > str);
	}

	bool string::operator!=(const string& str) const
	{
		return !(*this == str);
	}

	bool string::operator==(const string& str) const
	{
		return strcmp(_capacity <= 15 ? _buff : _str, str._capacity <= 15 ? str._buff : str._str) == 0;
	}

	ostream& operator<<(ostream& out, const string& str)//流提取
	{
		if (str.capacity() <= 15)
			out << "_buff: ";
		else
			out << "_str: ";

		for (auto e : str)
			out << e;

		out << "    _size:" << str.size() << "    _capacity:" << str.capacity() << endl;

		return out;
	}

	istream& operator>>(istream& in, string& str)
	{
		char ch = in.get();

		while (ch != ' ' && ch != '\n')
		{
			str += ch;
			ch = in.get();
		}

		return in;
	}

	string operator+(const string& s1, const string& s2)//两个字符串相加
	{
		string tmp;
		tmp += s1, tmp += s2;
		return tmp;
	}

}