一、概念和特性
1、定义
B-Tree是一种平衡的多叉树,适用于外查找多路搜索树,这种数据结构能够保证数据节点查找、顺序访问、插入、删除的动作,其平均时间复杂读控制在O(logN)内;B树为系统大块数据的读写操作做了优化,少定位记录时所经历的中间过程,加快存储速度。
理解B-Tree的工作原理,必须先介绍阶的概念。假如一个m阶的B-Tree,则这个数需要满足以下条件:
(1)定义任意非叶子结点最多只有m(m>2)个子节点;
(2)根节点的儿子节点数为[2, m],除根节点以外的非叶子节点的儿子数为[m/2, m];
(3)每个节点存放[m/2-1,m-1]个关键字(m/2向上取整);
(4)非叶子节点有k个子节点,则关键字个数为k-1;
(5)非叶子节点关键字k[i+1]大于k[i]所指向的儿子节点,k[i]小于自生所指向的儿子节点;
(6)所有的叶子节点位于同一层;
2、特性
(1)关键字集合分布在整颗树中,任何一个关键字出现且只出现在一个结点中;
(2)搜索有可能在非叶子节点结束;
(3)其搜索性能等价于在关键字全集内做一次二分查找;
(4)自动层次控制。
由于m/2的限制,在插入结点时,如果结点已满,需要将结点分裂为两个各占m/2的结点;删除结点时,需将两个不足m/2的兄弟节点合并。
二、B树的应用
B-Tree这种数据结构常被应用在数据库和文件系统的实现上。
B-Tree是为磁盘等外存储设备设计的一种平衡查找树。系统从磁盘读取数据到内存时是以磁盘块(block)为基本单位的,位于同一个磁盘块中的数据会被一次性读取到内存中,而不是需要什么取什么,这将会减少磁盘I/O次数,提高查询效率,这遵循计算机的局部性原理。
三、B-Tree的变体B+Tree
B+Tree是在B-Tree基础上的一种优化,使其更适合实现外存储索引结构, B+Tree具有以下几个特点:
1、有m个子树的节点包含有m个元素(B-Tree中是m-1);
2、根节点和分支节点中不保存数据,只用于索引,所有数据都保存在叶子节点中;
3、所有分支节点和根节点都同时存在于子节点中,在子节点元素中是最大或者最小的元素;
4、叶子节点会包含所有的关键字,以及指向数据记录的指针,并且叶子节点本身是根据关键字的大小从小到大顺序链接。
以数据库mysql为例,mysql的InnoDB存储引擎中有页的概念,页是其磁盘管理的最小单位。InnoDB存储引擎中默认每个页的大小为16KB,因此InnoDB每次申请磁盘空间时都会是若干地址连续磁盘块来达到页的大小16KB,InnoDB在把磁盘数据读入到磁盘时会以页为基本单位,在查询数据时如果一个页中的每条数据都能有助于定位数据记录的位置,这将会减少磁盘I/O次数,提高查询效率。
假如非叶子节点索引关键字平均大小为100byte,那么三层树高可以支持(16*1024/100)*(16*1024/100)*(16*1024*100),大约支持430万条记录高效查询。
四、B-Tree编码实现
1、函数声明
#ifndef _B_TREE_H__
#define _B_TREE_H__
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <math.h>
#include <string.h>
#define DEGREE 5
#define KEY_NUM (DEGREE-1)
#define MID (DEGREE/2)
// 插入数据成功
#define INSERT_SUC 0;
// 插入数据失败
#define INSERT_FAIL 500;
typedef int BtreeData;
typedef struct btreenode
{
/**关键字数组, 数组长度为DEGREE-1*/
BtreeData* keys;
/**孩子结点*/
struct btreenode** childNodes;
/**父节点:为了便于操作减少遍历*/
struct btreenode* parent;
/**当前关键字个数*/
int curKeysNum;
}BtreeNode;
/**
* 创建结点
* @brief create_btreenode
* @return
*/
BtreeNode* create_btreenode();
/**
* 插入数据
* @brief insert_btreenode
* @param root
* @param data
* @return
*/
int insert_btreenode(BtreeNode** root,BtreeData data);
/**
* 查找关键字下标位置
* @brief find_keys_index
* @return
*/
int find_keys_index(BtreeNode* node,BtreeData data);
/**
* 通过下标查找child节点
* @brief find_node_by_index
* @param node
* @param index
* @return
*/
BtreeNode* find_childnode_by_keyindex(BtreeNode* node,BtreeData data);
/**
* 将当前数值插入到关键字数组
* @brief insert_data_to_keys
* @return index 返回关键字坐标
*/
int add_data_to_keys(BtreeNode* node,BtreeData data);
/**
* 分裂操作
* @brief division_btreenode
* @param node
*/
void division_btreenode(BtreeNode** root,BtreeNode* node);
/**
* 查找关键字所在的节点
* @brief find_btrenode_by_key
* @param root
* @param node
* @return
*/
BtreeNode* find_btrenode_by_key(BtreeNode* root,BtreeData data);
/**
* 删除节点
* @brief delete_btreenode
* @param root
* @param data
*/
void delete_btreenode(BtreeNode** root,BtreeData data);
/**
* 删除关键字
* @brief delete_key_by_index
* @param root
* @param data
*/
void delete_key_by_index(BtreeNode* node,BtreeData data);
/**
* 合并操作
* @brief merge_btreenode
* @param root
* @param node
*/
void merge_btreenode(BtreeNode** root,BtreeNode* node,BtreeData data);
/**
* 遍历某个节点的关键字
* @brief show_node_kyes
* @param node
* @param func
*/
void show_node_kyes(BtreeNode* node);
/**
* 遍历B-Tree
* @brief show_btreenode
* @param root
*/
void show_btreenode(BtreeNode* root);
#endif
2、函数定义
#include "btree.h"
BtreeNode* create_btreenode()
{
BtreeNode* btreeNode = (BtreeNode*) malloc(sizeof(BtreeNode));
if(btreeNode==NULL)
{
perror("malloc btree_node failed");
return NULL;
}
int keySizeLen = sizeof(BtreeData)*KEY_NUM;
BtreeData* keys = (BtreeData*) malloc(keySizeLen);
memset(keys,0,keySizeLen);
if(keys==NULL)
{
perror("malloc keys failed");
free(btreeNode);
return NULL;
}
int childSizeLen = sizeof(BtreeNode*)*DEGREE;
BtreeNode** childNode = (BtreeNode**) malloc(sizeof(BtreeNode*)*DEGREE);
memset(childNode,0,childSizeLen);
if(childNode==NULL)
{
perror("malloc childNode failed");
free(keys);
free(btreeNode);
return NULL;
}
btreeNode->keys = keys;
btreeNode->childNodes=childNode;
btreeNode->parent=NULL;
btreeNode->curKeysNum=0;
return btreeNode;
}
int insert_btreenode(BtreeNode** rootPtr,BtreeData data)
{
///初始化根节点
if((*rootPtr)==NULL)
{
BtreeNode* newNode = create_btreenode();
if(newNode==NULL)
{
return INSERT_FAIL;
}
newNode->keys[0] = data;
newNode->curKeysNum = 1;
(*rootPtr)= newNode;
return INSERT_SUC;
}
///查找适合插入数据的节点
BtreeNode* tragetNode = find_childnode_by_keyindex(*rootPtr,data);
show_node_kyes(tragetNode);
printf("\n");
/// 添加关键字
if(tragetNode!=NULL)
{
//添加关键字
add_data_to_keys(tragetNode,data);
//当关键字达到KEY_NUM时候,插入新的关键字需要对节点进行分裂操作
division_btreenode(rootPtr,tragetNode);
return INSERT_SUC;
}
return INSERT_FAIL;
}
int find_keys_index(BtreeNode* node,BtreeData data)
{
int index = 0;
BtreeData* keys = node->keys;
//平衡查找当前节点关键字的索引
while(index<node->curKeysNum && keys[index]<data)
{
index++;
}
return index;
}
BtreeNode* find_childnode_by_keyindex(BtreeNode* node,BtreeData data)
{
///printf("find_childnode_by_keyindex-----data=%d\n",data);
if(node == NULL)
{
perror("find_childnode_by_index fail ,node is null ");
return NULL;
}
BtreeNode** chids=node->childNodes;
if((*chids)==NULL)
{
// 到达叶子节点终止递归
return node;
}
/// 根据索引查找当前数值适合存放的节点
int index = find_keys_index(node,data);
///递归查找子节点
return find_childnode_by_keyindex(chids[index],data);
}
int add_data_to_keys(BtreeNode* node,BtreeData data)
{
///printf("add_data_to_keys key -> data=%d\n",data);
int c = 0 ;
int index = find_keys_index(node,data);
BtreeData* keys = node->keys;
node->curKeysNum = node->curKeysNum +1;
BtreeData tmp[KEY_NUM];
for(;c<KEY_NUM;c++)
{
tmp[c] = keys[c];
}
c = 0 ;
for(;c<node->curKeysNum;c++)
{
if(c >index)
{
keys[c] = tmp[c-1];
}
}
keys[index] = data;
return index;
}
/**
* 分裂操作
* @brief division_btreenode
* @param root
* @param node
*/
void division_btreenode(BtreeNode** root,BtreeNode* node)
{
if(node->curKeysNum<=KEY_NUM)
{
return;
}
printf("division_btreenode start \n");
BtreeNode* leftNode = create_btreenode();
if(leftNode==NULL)
{
perror("create left node fail");
return;
}
BtreeNode* rightNode = create_btreenode();
if(rightNode==NULL)
{
perror("create right node fail");
free(leftNode);
return;
}
BtreeData* keys = node->keys;
int index = 0;
while(index < DEGREE/2)
{
leftNode->keys[index]=node->keys[index];
index++;
}
index = 0;
while(index<DEGREE/2)
{
rightNode->keys[index] = node->keys[DEGREE/2+index+1];
index++;
}
leftNode->curKeysNum = DEGREE/2;
rightNode->curKeysNum = DEGREE/2;
BtreeData divisionKey = keys[MID];
BtreeNode* parent = node->parent;
if(parent == NULL)
{
/// 根节点分裂
BtreeNode** childs = node->childNodes;
if((*childs)!=NULL)
{
index = 0 ;
int m=0,n=0;
while (index<=DEGREE) {
if(index<=MID)
{
leftNode->childNodes[m]=childs[index];
m++;
}
if(index>MID)
{
rightNode->childNodes[n]=childs[index];
n++;
}
index++;
}
}
index = 0;
//重置当前node节点的childs的parent节点
while (index<=DEGREE && (*(node->childNodes))!=NULL) {
node->childNodes[index]->parent =index<=MID?leftNode:rightNode;
index++;
}
leftNode->parent = node;
rightNode->parent= node;
node->childNodes[0] = leftNode;
node->childNodes[1] = rightNode;
node->curKeysNum=1;
/// 重置根结点
node->keys[0]=divisionKey;
index= 1;
while (index<KEY_NUM) {
node->keys[index]= 0;
index++;
}
(*root)=node;
}
else
{
int keyIndex = add_data_to_keys(parent,divisionKey);
index = 0;
BtreeNode** childs = parent->childNodes;
BtreeNode* temp[DEGREE];
for(;index<DEGREE;index++)
{
temp[index]= childs[index];
}
index = keyIndex;
while(index<=KEY_NUM-2)
{
childs[index+2]=temp[index+1];
index++;
}
index = 0;
重置当前node节点的childs的parent节点
while (index<=DEGREE && (*(node->childNodes))!=NULL) {
node->childNodes[index]->parent =index<=MID?leftNode:rightNode;
index++;
}
leftNode->parent = parent;
rightNode->parent= parent;
childs[keyIndex] = leftNode;
childs[keyIndex+1]= rightNode;
///递归处理分裂操作
division_btreenode(root,parent);
}
}
BtreeNode* find_btrenode_by_key(BtreeNode* root,BtreeData data)
{
if((root)==NULL)
{
return NULL;
}
int index = find_keys_index(root,data);
BtreeData keyData = root->keys[index];
if(keyData==data)
{
return root;
}
BtreeNode* node = root->childNodes[index];
return find_btrenode_by_key(node, data);
}
/**
* 删除节点
* @brief delete_btreenode
* @param root
* @param data
*/
void delete_btreenode(BtreeNode** root,BtreeData data)
{
BtreeNode* tragetNode = find_btrenode_by_key(*root,data);
///BtreeNode** childs = tragetNode->childNodes;
delete_key_by_index(tragetNode,data);
//处逻辑理合并
merge_btreenode(root,tragetNode,data);
}
void delete_key_by_index(BtreeNode* node,BtreeData data)
{
int index = find_keys_index(node,data);
BtreeData* keys = node->keys;
BtreeData tmp[KEY_NUM];
int c = 0;
for(;c<KEY_NUM;c++)
{
tmp[c] = c<node->curKeysNum?keys[c]:0;
}
while(index < node->curKeysNum)
{
keys[index]=tmp[index+1];
index++;
}
node->curKeysNum =node->curKeysNum-1;
}
/**
* 合并操作
* @brief merge_btreenode
* @param root
* @param node
*/
void merge_btreenode(BtreeNode** root,BtreeNode* node,BtreeData data)
{
if(node==NULL)
{
return;
}
if(node->parent==NULL)
{
(*root) = node;
return;
}
BtreeNode* parent = node->parent;
int index = find_keys_index(parent,data);
BtreeNode** childs = parent->childNodes;
BtreeNode* leftNode;
BtreeNode* rightNode;
if(index<parent->curKeysNum)
{
leftNode=childs[index];
rightNode=childs[index+1];
}
else
{
leftNode=childs[index-1];
rightNode=childs[index];
}
int totalKeysNum= 1 + leftNode->curKeysNum + rightNode->curKeysNum;
if(totalKeysNum>KEY_NUM)
{
return;
}
///执行合并
int c = 0;
int leftMidIndex = leftNode->curKeysNum;
BtreeData parentMergeData = parent->keys[index-1];
leftNode->keys[leftMidIndex] = parentMergeData;
//重置关键字
while (c < rightNode->curKeysNum) {
leftNode->keys[leftMidIndex+1+c]=rightNode->keys[c];
c++;
}
c= 0;
/// 重置子节点
while(c<=rightNode->curKeysNum)
{
leftNode->childNodes[leftMidIndex+1+c]= rightNode->childNodes[c];
c++;
}
leftNode->curKeysNum = totalKeysNum;
free(rightNode);
if(parent->parent==NULL)
{
(*root) = leftNode;
free(parent);
return;
}
delete_key_by_index(parent,parentMergeData);
merge_btreenode(root,parent,data);
}
void show_node_kyes(BtreeNode* node)
{
if(node==NULL)
{
printf("show_node_kyes node is null \n");
return;
}
int len = node->curKeysNum;
BtreeData* keys = node->keys;
int c = 0;
for(;c<len;c++)
{
printf("%d ",keys[c]);
}
}
void show_btreenode(BtreeNode* root)
{
if(root==NULL)
{
return;
}
BtreeNode** childs = root->childNodes;
int len = root->curKeysNum;
int c = 0 ;
show_node_kyes(root);
printf("\n");
/// 为了方便看结果,这里值展示三层效果,如果需要展多层,可采用递归查询
while((*childs)!=NULL && c<=len){
BtreeNode* chid = childs[c];
show_node_kyes(chid);
c++;
}
printf("\n");
c=0;
while((*childs)!=NULL && c<=len){
BtreeNode* chid = childs[c];
BtreeNode** subChids = chid->childNodes;
int n = 0;
int subLen = chid->curKeysNum;
while ((*subChids)!=NULL && n<=subLen ) {
BtreeNode* tmp = subChids[n];
show_node_kyes(tmp);
n++;
}
c++;
}
printf("\n");
}
3、测试
#include <stdio.h>
#include "btree.h"
int main()
{
printf("Hello World Btree come !\n");
BtreeData arr[] = {20,10,11,12,30,31,32,33,15,17,18,5,8,26,35,6,36,37};
//-----------------添加元素 分裂--------------------------
// 18
// 8 12 -> 31 35
//5 6 -> 10 11 -> 15 17 -> 20 26 30 -> 32 33 -> 36 37
BtreeNode* root = NULL;
int i ;
int len = sizeof(arr)/sizeof(BtreeData);
for(i=0;i<len;i++)
{
insert_btreenode(&root,arr[i]);
}
printf("\nstart ------show tree--------------add\n");
show_btreenode(root);
//-----------------删除元素 合并---------------------------
// 8 12 18 31
//5 6 -> 10 11 -> 15 17 -> 20 26 30 -> 32 33 35 36
printf("\nstart ------show tree--------------del\n");
delete_btreenode(&root,37);
show_btreenode(root);
return 0;
}