/**
* 方法类
*/
public class CoreUtils {
/**
* 将byte数组转为单个节点
*/
public static List<TreeNode> getNode(byte[] bytes) {
//计算每个byte出现的次数
Map<Byte, Integer> map = new HashMap<>();
for (byte b : bytes) {
Integer num = map.get(b);
map.put(b, num != null ? num + 1 : 1);
}
//将每个byte创建为节点
List<TreeNode> nodeList = new ArrayList<>();
for (Byte b : map.keySet()) {
nodeList.add(new TreeNode(b, map.get(b)));
}
return nodeList;
}
/**
* 根据单个节点,获取哈夫曼树
*/
public static TreeNode getTree(List<TreeNode> nodeList) {
while (true) {
if (nodeList.size() == 1) {
break;
}
//按出现次数降序排序
nodeList = nodeList.stream().sorted(Comparator.comparing(TreeNode::getNum)).collect(Collectors.toList());
//取前两个节点合成新节点
TreeNode left = nodeList.get(0);
TreeNode right = nodeList.get(1);
TreeNode node = new TreeNode(null, left.getNum() + right.getNum());
node.setLeft(left);
node.setRight(right);
//新节点代替旧节点
nodeList.set(1, node);
nodeList.remove(0);
}
return nodeList.get(0);
}
/**
* 根据哈夫曼树,获取哈夫曼编码表
*
* @param rootNode 哈夫曼树根节点
* @return Map<byte数据, 新的编码>
*/
public static Map<Byte, String> getDict(TreeNode rootNode) {
Map<Byte, String> dictMap = new HashMap<>();
//分别递归左子节点和右子节点(左节点编码为0,右节点编码为1)
getDict(dictMap, rootNode.getLeft(), "0");
getDict(dictMap, rootNode.getRight(), "1");
return dictMap;
}
/**
* 递归方法
*/
public static void getDict(Map<Byte, String> dictMap, TreeNode treeNode, String code) {
if (treeNode.getData() == null) {
//左子节点
getDict(dictMap, treeNode.getLeft(), code + "0");
//右子节点
getDict(dictMap, treeNode.getRight(), code + "1");
} else {
dictMap.put(treeNode.getData(), code);
}
}
/**
* 编码
*/
public static byte[] encode(Map<Byte, String> dictMap, byte[] bytes) {
StringBuffer sb = new StringBuffer();
for (byte b : bytes) {
sb.append(dictMap.get(b));
}
int length = sb.length() % 8 == 0 ? sb.length() / 8 : sb.length() / 8 + 1;
//数组长度+1,最后一个存储倒数第二个byte的长度,解码时使用
byte[] result = new byte[length + 1];
int i = 0;
int j = 0;
while (true) {
if (j + 8 >= sb.length()) {
//二进制字符串转为十进制(转byte后)
result[i] = (byte) Integer.parseInt(sb.substring(j), 2);
//因为最后一条数据sb.substring(j)的长度不一定等于8,所以需要记录最后一条数据的长度,用于解码时还原数据
result[i + 1] = (byte) sb.substring(j).length();
break;
} else {
//二进制字符串转为十进制
result[i] = (byte) Integer.parseInt(sb.substring(j, j + 8), 2);
}
j += 8;
i++;
}
return result;
}
/**
* 解码
*
* @param dictMap 编码表
* @param bytes 编码后的byte
*/
public static byte[] decode(Map<Byte, String> dictMap, byte[] bytes) {
//dictMap的key和value反转
Map<String, Byte> map = new HashMap<>();
for (Map.Entry<Byte, String> item : dictMap.entrySet()) {
map.put(item.getValue(), item.getKey());
}
List<Byte> byteList = new ArrayList<>();
StringBuffer sb = new StringBuffer();
for (int i = 0; i < bytes.length - 1; i++) {
//byte转为二进制字符串(补够8位)
String binary = StringUtils.leftPad(Integer.toBinaryString(bytes[i] & 0xff), 8, '0');
//如果是最后一条,则获取最后一条二进制串长度,长度不是8则截取
if (i == bytes.length - 2) {
//在编码时,bytes最后一条记录的是最后一条二进制串长度
int lastDateLength = bytes[bytes.length - 1];
if (lastDateLength < 8) {
binary = binary.substring(8 - lastDateLength);
}
}
for (int j = 0; j < binary.length(); j++) {
sb.append(binary.charAt(j));
Byte data = map.get(sb.toString());
if (data != null) {
byteList.add(data);
sb.delete(0, sb.length());
}
}
}
byte[] result = new byte[byteList.size()];
for (int i = 0; i < byteList.size(); i++) {
result[i] = byteList.get(i);
}
return result;
}
public static void main(String[] args) {
String str = "http://blog.xinpapa.com/";
//将字符串转为单个节点
List<TreeNode> nodeList = getNode(str.getBytes());
//将节点转为哈夫曼树
TreeNode rootNode = getTree(nodeList);
//根据哈夫曼树获取哈夫曼编码表
Map<Byte, String> dictMap = getDict(rootNode);
System.out.println("编码前长度:" + str.length());
//编码
byte[] encode = encode(dictMap, str.getBytes());
System.out.println("编码后长度:" + encode.length);
//解码
byte[] decode = decode(dictMap, encode);
System.out.println("解码后结果:" + new String(decode));
System.out.println("解码后相等:" + new String(decode).equals(str));
}
}