排序分为内排序和外排序,内排序是在内存中的排序。外排序指在磁盘中文件的排序,因为在磁盘中,不能进行下标访问,归并排序经常用于磁盘中文件的排序
假如有10亿个整形数据在磁盘中,要对它排序,内存中只有1G空间,10亿需要4G。可以将这个文件划分成好几块1G大小的排序,再对这些1G文件两两归并,得到4G的排序后文件
方便演示外排序,手动生成100的数据量,命名为data.txt文件每10个数据读到内存中一排序,可以用快速排序,排完序写到新文件,sub+编号.txt,不断循环对这些小文件归并汇总并生成新文件
#define _CRT_SECURE_NO_WARNINGS 1
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
void Swap(int* a, int* b)
{
int temp = *a;
*a = *b;
*b = temp;
}
//三数取中
int GetIndex(int ary[], int left, int right)
{
//int mid = (left + right) / 2;
int mid = left + rand() % (right - left);
if (ary[mid] > ary[left])
{
if (ary[mid] < ary[right])
{
return mid;
}
else if (ary[left] > ary[right])
{
return left;
}
else
{
return right;
}
}
//ary[mid] < ary[left]
else
{
if (ary[mid] > ary[right])
{
return mid;
}
else if (ary[left] > ary[right])
{
return right;
}
else
{
return left;
}
}
}
int QSplit(int ary[], int left, int right)
{
//三数取中
int mid = GetIndex(ary, left, right);
Swap(&ary[left], &ary[mid]);
int keyi = left;
while (left < right)
{
//右边找小
while (left < right && ary[right] >= ary[keyi])
{
right--;
}
//左边找大
while (left < right && ary[left] <= ary[keyi])
{
left++;
}
Swap(&ary[left], &ary[right]);
}
Swap(&ary[left], &ary[keyi]);
return left;
}
//升序排列 区间[left,right]
void Qsort(int ary[], int left, int right)
{
//区间错误,返回
if (left >= right)
{
return;
}
int keyi = QSplit(ary, left, right);
//递归左右区间,keyi处除外
Qsort(ary, left, keyi - 1);
Qsort(ary, keyi + 1, right);
}
void MergeFile(const char* file1, const char* file2, const char* mfile)
{
FILE* fout1;
fout1 = fopen(file1, "r");
FILE* fout2;
fout2 = fopen(file2, "r");
FILE* fin;
fin = fopen(mfile, "w");
int num1 = 0;
int num2 = 0;
int ret1 = fscanf(fout1, "%d", &num1);
int ret2 = fscanf(fout2, "%d", &num2);
while (ret1 != EOF && ret2 != EOF)
{
if (num1 <= num2)
{
fprintf(fin, "%d\n", num1);
ret1 = fscanf(fout1, "%d", &num1);
}
else
{
fprintf(fin, "%d\n", num2);
ret2 = fscanf(fout2, "%d", &num2);
}
}
while (ret1 != EOF)
{
fprintf(fin, "%d\n", num1);
ret1 = fscanf(fout1, "%d", &num1);
}
while (ret2 != EOF)
{
fprintf(fin, "%d\n", num2);
ret2 = fscanf(fout2, "%d", &num2);
}
fclose(fout1);
fclose(fout2);
fclose(fin);
}
int main()
{
FILE* fdata;
fdata = fopen("data.txt", "r");
//分割一段段数据,内存排序写到小文件
int ary[10];
int n = 10; //每10个排序
int num = 0; //临时读取内容
char subfile[20]; //小文件名字
int filei = 1; //小文件编号
int i = 0;
while (fscanf(fdata, "%d", &num) != EOF)
{
if (i < n)
{
ary[i++] = num;
}
//排序
if (i == 10)
{
Qsort(ary, 0, n - 1);
sprintf(subfile, "sub%d.txt", filei++);
FILE* fsub;
fsub = fopen(subfile, "w");
for (int i = 0; i < n; i++)
{
fprintf(fsub, "%d\n", ary[i]);
}
fclose(fsub);
i = 0;
}
}
//互相归并到文件,实现整体有序
char file1[100] = "sub1.txt";
char file2[100] = "sub2.txt";
char mfile[100] = "merge.txt";
for (int i = 2; i <= filei - 1; i++)
{
//读取file1和file2,归并出mfile
MergeFile(file1, file2, mfile);
remove(file1);
remove(file2);
strcpy(file1, mfile);
sprintf(file2, "sub%d.txt", i + 1);
sprintf(mfile, "merge%d.txt",i + 1);
}
printf("hello world\r\n");
return 0;
}