1.第一步 统计所有的字符【*】出现的频次并按频次进行从小到大的排序
2.第二步 进行权值的合并
3.第三步 编码 左0 右 1
huffman编码大致分为以下步骤:
- 统计所有字符出现的频次
- 构建huffman树
- huffman树生成huffman编码
- 将源文件压缩成huffman编码结构
- 收到源文件之后利用Huffman树进行Huffman解码
定义huffman树的数据结构
// huffman编码数据结构
class HuffmanNode{
constructor(char,frequency) {
this.char = char
this.frequency = frequency
this.left = null
this.right = null
}
}
统计字频
// 统计频次
const countFrequency = function () {
const data = fs.readFileSync(path.resolve(__dirname, './a.txt'))
const dataTxt = data.toString();
const map = new Map();
// 计算频次
for (let index = 0; index < dataTxt.length; index++) {
const element = dataTxt[index];
if (map.has(element)) {
map.set(element, map.get(element) + 1);
} else {
map.set(element, 1);
}
}
return map;
}
构建huffman树
/**
* 构建huffman树
* @param {Map<char,number>} frequency
*/
const buildHuffmanTree = function (frequency) {
const keyValues = Array.from(frequency);
const nodes = []
for (const [key,value] of keyValues) {
const huffmanNodeItem = new HuffmanNode(key, value)
nodes.push(huffmanNodeItem)
}
while (nodes.length > 1) {
// 从大到小排序
nodes.sort((a, b) => b.frequency - a.frequency)
// 取最小的两个
leftNode = nodes.pop()
rightNode = nodes.pop();
// 最小的node合并成一个新的huffman节点
parentNode = new HuffmanNode(null, leftNode.frequency + rightNode.frequency);
parentNode.left = leftNode
parentNode.right = rightNode
nodes.push(parentNode)
}
return nodes[0]
}
生成huffman编码
利用huffman树生成huffman编码
/**
* 将huffman树生成huffman编码
* @param {HuffmanNode} huffmanTree
* @returns
*/
const huffmanCode = function (huffmanTree) {
const result = new Map();
const dfs = function (huffmanTree, str) {
if (huffmanTree && huffmanTree.left && huffmanTree.right) {
dfs(huffmanTree.left, str + '0')
dfs(huffmanTree.right,str+'1')
} else {
result.set(huffmanTree.char, str)
return;
}
}
dfs(huffmanTree, '');
return result;
}
huffman编码
/**
* 霍夫曼编码
* @returns
*/
const HuffmanEncode = function() {
const data = fs.readFileSync(path.resolve(__dirname, './a.txt'))
const dataTxt = data.toString();
let result = ''
for (let index = 0; index < dataTxt.length; index++) {
const element = dataTxt[index];
result+= codes.get(element)
}
fs.writeFileSync(path.resolve(__dirname, './b.txt'), result)
return result;
}
huffman解码
/**
* 霍夫曼解码
* @param {*} huffmanTree
* @param {*} source
*/
function HUffmanDecode(huffmanTree,source){
let current = huffmanTree
let result = ''
for (let index = 0; index < source.length; index++) {
const element = source[index];
if (element == '0') {
current = current.left
if (current.char != null) {
result += current.char;
current = huffmanTree;
}
} else {
current = current.right;
if (current.char != null) {
result += current.char;
current = huffmanTree;
}
}
}
fs.writeFileSync(path.resolve(__dirname,'./c.txt'),result)
}
完整代码
运行环境 Nodejs
需要将代码保存为 .js类型文件
并在.js文件所在文件夹下放一个a.txt文件方可成功运行
a.txt文件为需要进行编码的源文本,建议为英文
const fs = require('fs')
const path = require('path');
// 统计频次
const countFrequency = function () {
const data = fs.readFileSync(path.resolve(__dirname, './a.txt'))
const dataTxt = data.toString();
const map = new Map();
// 计算频次
for (let index = 0; index < dataTxt.length; index++) {
const element = dataTxt[index];
if (map.has(element)) {
map.set(element, map.get(element) + 1);
} else {
map.set(element, 1);
}
}
return map;
}
// huffman编码数据结构
class HuffmanNode{
constructor(char,frequency) {
this.char = char
this.frequency = frequency
this.left = null
this.right = null
}
}
/**
* 构建huffman树
* @param {Map<char,number>} frequency
*/
const buildHuffmanTree = function (frequency) {
const keyValues = Array.from(frequency);
const nodes = []
for (const [key,value] of keyValues) {
const huffmanNodeItem = new HuffmanNode(key, value)
nodes.push(huffmanNodeItem)
}
while (nodes.length > 1) {
// 从大到小排序
nodes.sort((a, b) => b.frequency - a.frequency)
// 取最小的两个
leftNode = nodes.pop()
rightNode = nodes.pop();
// 最小的node合并成一个新的huffman节点
parentNode = new HuffmanNode(null, leftNode.frequency + rightNode.frequency);
parentNode.left = leftNode
parentNode.right = rightNode
nodes.push(parentNode)
}
return nodes[0]
}
const frequency = countFrequency()
const huffmanTree = buildHuffmanTree(frequency)
fs.writeFileSync(path.resolve(__dirname, './huffmanTree.json'), JSON.stringify(huffmanTree))
/**
* 将huffman树生成huffman编码
* @param {HuffmanNode} huffmanTree
* @returns
*/
const huffmanCode = function (huffmanTree) {
const result = new Map();
const dfs = function (huffmanTree, str) {
if (huffmanTree && huffmanTree.left && huffmanTree.right) {
dfs(huffmanTree.left, str + '0')
dfs(huffmanTree.right,str+'1')
} else {
result.set(huffmanTree.char, str)
return;
}
}
dfs(huffmanTree, '');
return result;
}
const codes = huffmanCode(huffmanTree);
/**
* 霍夫曼编码
* @returns
*/
const HuffmanEncode = function() {
const data = fs.readFileSync(path.resolve(__dirname, './a.txt'))
const dataTxt = data.toString();
let result = ''
for (let index = 0; index < dataTxt.length; index++) {
const element = dataTxt[index];
result+= codes.get(element)
}
fs.writeFileSync(path.resolve(__dirname, './b.txt'), result)
return result;
}
const result = HuffmanEncode()
/**
* 霍夫曼解码
* @param {*} huffmanTree
* @param {*} source
*/
function HuffmanDecode(huffmanTree,source){
let current = huffmanTree
let result = ''
for (let index = 0; index < source.length; index++) {
const element = source[index];
if (element == '0') {
current = current.left
if (current.char != null) {
result += current.char;
current = huffmanTree;
}
} else {
current = current.right;
if (current.char != null) {
result += current.char;
current = huffmanTree;
}
}
}
fs.writeFileSync(path.resolve(__dirname,'./c.txt'),result)
}
HuffmanDecode(huffmanTree,result)