自己动手写个哈夫曼压缩软件

人生难得糊涂

浏览: 114481 次

最近访客更多访客>>

anlinko

jw216

zjy_369

zt1102

博主相关

博客

微博

相册

留言

关于我

文章分类

社区版块

存档分类

本文做的压缩软件其压缩算法是哈夫曼压缩，不了解哈夫曼编码基本思想的同学请自行百度。

哈夫曼压缩软件基本思路：

一.压缩

1.统计文件中各字节出现的次数。

2.根据第一步结果建立哈夫曼树

3.得到每个字节的哈夫曼编码

4.对文件每个字节进行编码，以每8位为一字节写入到压缩后的文件

压缩文件内容：

新的哈夫曼编码表（原哈夫曼表的每对KEY 与VALUE 调换位置）+文件编码+补0个数（占一字节）

二：解压

1.读入哈夫曼编码表

2.每读入一个字节就将其转换为其二进制形式的字符串，判断是否为哈夫曼码

3.第二步的结果如果哈夫曼编码，则译码并写入解压文件，否则继续读入

基本思路如上，下面看看需要注意的问题

1.ObjectInputStream 的available方法并不能得到流中剩余字节数

如： System.out.println("ois ava:   " + ois.available());
            reMap = (HashMap<String, Byte>) ois.readObject();
            System.out.println("ois ava2:   " + ois.available());

结果为 0 和 1024（不同文件结果可能不同）

2.若用BufferedIntputStream 包装ObjectInputStream 则 bis(BufferedIntputStream的对象)的available().方法的最大值也为1024

File file=new File("F:\\JavaTest\\source.txt");
FileInputStream fis=new FileInputStream(file);
BufferedInputStream bis=new BufferedInputStream(fis);
int b;
b=fis.read();
System.out.println(b);
b=bis.read();
System.out.println(b);

source.txt中存的是abcd。

输出的结果是 97 98 。

因此用BufferedInputStream包装FileInputStream以后，BufferedInputStream的读入会随着FileInputStream的读入变化。反过来不成立。如：

File file=new File("F:\\JavaTest\\source.txt");
			FileInputStream fis=new FileInputStream(file);
			BufferedInputStream bis=new BufferedInputStream(fis);
			int b;
			b=bis.read();
			System.out.println(b);
			b=fis.read();
			System.out.println(b);

输出为 97 -1

4.切忌不要用如下形式读入数据

FileInputStream fis=new FileInputStream(file);
BufferedInputStream bis=new BufferedInputStream(fis);
for(int i=0;i<bis.available();i++)//因为没执行一次read方法  available返回的数都会变化
{
int t=bis.read();
System.out.printlen(t);
}

好了。下面看代码

package data_0719哈夫曼树;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Comparator;
import java.util.HashMap;
import java.util.PriorityQueue;
import java.util.Set;

/**
 * 该类实现了哈夫曼压缩功能 1.统计各字符出现频度 2.创建哈夫曼树 3.进行哈夫曼编码 4.根据哈夫曼编码进行压缩
 * 
 * @author ZhangZunC
 * 
 */

public class HuffMan {
	//6810ms
	/**
	 * 压缩文件
	 * @param fileIn
	 *            要压缩的文件
	 * @param fileOut
	 *            压缩文件的存放地址
	 */
	public void toRar(File fileIn, File fileOut) {
		TreeNode huffTree = new TreeNode(0);// 哈夫曼树
		TreeNode[] treeArray = new TreeNode[257];// 保存频度信息的树数组
		// 统计频度
		countFile(fileIn, treeArray);

		// 建立优先队列
		NodeCompare nodecompare = new NodeCompare();
		PriorityQueue<TreeNode> queue = new PriorityQueue<TreeNode>(12,
				nodecompare);
		for (int i = 0; i < treeArray.length; i++) {
			if (treeArray[i].data != 0)
				queue.add(treeArray[i]);
		}

		// 创建树
		huffTree = creatTree1(queue);
		HashMap<Byte, String> map = new HashMap();
		// printTree(huffTree);
		// 编码
		huffCode(map, huffTree, "");

		// 输出编码
		System.out.println("**********   哈夫曼编码表:  *******************");
		Set<Byte> nodes = map.keySet();
		for (Byte node : nodes) {
			String t = map.get(node);
			System.out.println(node + "<>" + t);
		}
		System.out.println("***********************************************");

		// 压缩文件到fileOut
		huffmanrar(fileIn, fileOut, map);
		System.out.println("压缩前文件大小：" + fileIn.length()
				+ "  bytes   压缩后的文件的大小：" + fileOut.length() + "  bytes");

	}// main

	public void unrar(File rarFile, File unRarFile) {
		long startTime=System.currentTimeMillis();
		// 读入压缩文件
		FileInputStream fis;
		try {
			fis = new FileInputStream(rarFile);
			// 得到保存哈弗曼编码的哈希表 (与压缩时的哈希表 映射正好相反)
			HashMap<String, Byte> reMap = new HashMap();
			ObjectInputStream ois = new ObjectInputStream(fis);
			reMap = (HashMap<String, Byte>) ois.readObject();
		//	BufferedInputStream bis = new BufferedInputStream(ois);
			FileOutputStream fos = new FileOutputStream(unRarFile);
			BufferedOutputStream bos = new BufferedOutputStream(fos);
			// 将字符串（哈夫曼编码）翻译
			String  temp = "";
			Byte wByte;
			int bt = ois.read();
			while (bt != -1) {
				int readLen = 8;
				//只剩下最后一个了
				if(fis.available()==1)//这里不能用ois.available()
				{
					readLen=8-ois.read();//0的个数
				}
				for (int j = 0; j < readLen; j++) {
					temp +=byte2String((byte) bt).charAt(j);// 每次取一个字符
					// 如果包含编码 
					if (reMap.containsKey(temp)) {
						//转换成源文件的字节
						wByte = reMap.get(temp);
						bos.write(wByte);
						temp="";
					}
				}
				bt = ois.read();
			}
			long endTime=System.currentTimeMillis();
			System.out.println("解压成功  所花时间:   "+(endTime-startTime)+"ms");
			fis.close();
			 bos.flush();
			 fos.close();

		} catch (Exception e) {
			e.printStackTrace();
		}

	}

	/**
	 * 根据哈夫曼表 压缩文件
	 * 
	 * @param fileIn
	 *            要压缩的文件
	 * @param fileOut
	 *            压缩后的文件
	 * @param map
	 *            哈希表
	 */
	private void huffmanrar(File fileIn, File fileOut,
			HashMap<Byte, String> map) {
		try {
			FileOutputStream fos = new FileOutputStream(fileOut);
			ObjectOutputStream oos = new ObjectOutputStream(fos);
			//将HashMap逆转然后写入到文件
			HashMap<String, Byte> reMap = new HashMap();
			Set<Byte> nodes = map.keySet();
			for (Byte node : nodes) {
				//	System.out.println(node+"<>"+map.get(node));
				reMap.put(map.get(node), node);
			}
			oos.writeObject(reMap);
			// 将文件内容每8位为一字节写到文件
			FileInputStream fis = new FileInputStream(fileIn);
			BufferedInputStream bis = new BufferedInputStream(fis);
			String sTemp = "";
			int num=0;
			int tb = bis.read();
			while (tb != -1) {
				//System.out.println((byte)tb+"         "+map.get((byte)tb));
				sTemp += map.get((byte)tb);
				//大于8位则将前8位写入
				while (sTemp.length() >= 8) {
					//System.out.println("sTemp  "+sTemp);
					String wr = sTemp.substring(0, 8);
					sTemp = sTemp.substring(8, sTemp.length());
					byte wt = string2Int(wr);
					oos.write(wt);
					num++;
				}
				tb = bis.read();
			}
			System.out.println("num:  "+(num+1));
			byte zeroAdd = 0;
			if (sTemp.length() != 0) {
				while (sTemp.length() != 8) {
					sTemp += "0";
					zeroAdd++;
				}
			}
			oos.write(string2Int(sTemp));
			oos.write(zeroAdd);
			fis.close();
			oos.flush();
			fos.close();
		} catch (Exception e) {
			e.printStackTrace();
		}

	}

	/**
	 * 用优先队列创建树
	 * 
	 * @param treeArray
	 * @return
	 */

	private TreeNode creatTree1(PriorityQueue<TreeNode> queue) {
		TreeNode huffTree = new TreeNode(0);
		while (queue.size() > 1) {
			TreeNode t1 = queue.poll();
			TreeNode t2 = queue.poll();
			huffTree = huffTree.Union(t1, t2);
			queue.add(huffTree);
		}

		return huffTree;
	}

	/**
	 * 哈夫曼编码
	 * 
	 * @map 哈希表
	 * @param huffTree
	 *            当前树
	 * @param code
	 *            哈夫曼码
	 */
	private void huffCode(HashMap<Byte, String> map, TreeNode huffTree,
			String code) {
		// 如果是叶子节点
		if (huffTree.left == null && huffTree.right == null) {
			map.put(huffTree.index, code);
			// System.out.println("index: "+huffTree.index+"  code : "+code);
			return;
		} else {
			// 访问左子树
			if (huffTree.left != null)
				huffCode(map, huffTree.left, code + "0");
			// 访问右子树
			if (huffTree.right != null)
				huffCode(map, huffTree.right, code + "1");
		}

	}
	
	
	/**
	 * 统计文件夹的每个字节出现的次数
	 * 
	 * @param fileIn
	 */
	private void countFile(File fileIn, TreeNode[] root) {
		try {
			FileInputStream fis = new FileInputStream(fileIn);
			BufferedInputStream bis = new BufferedInputStream(fis);
			// 初始化
			for (int i = 0; i < root.length; i++)
				root[i] = new TreeNode(0);

			HashMap<Integer, TreeNode> map = new HashMap();
			int inum = bis.available();
			int nodeNum = 0;
			for (int i = 0; i < 257; i++)
				root[i] = new TreeNode();
			for (int i = 0; i < inum; i++) {
				int t = bis.read();
				if (map.containsKey(t)) {
					map.get(t).data += 1;
					// System.out.println("index: "+map.get(t).index+"data "+
					// map.get(t).data);
				} else {
					root[nodeNum].index = (byte)t;
					root[nodeNum].data = 1;
					map.put(t, root[nodeNum]);
					nodeNum++;
				}

			}

			fis.close();
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	
	/**
	 * 字节转int
	 * @param b
	 * @return
	 */
	private  int byteToint(byte b[]) {
		int t1 = (b[3] & 0xff) << 24;
		int t2 = (b[2] & 0xff) << 16;
		int t3 = (b[1] & 0xff) << 8;
		int t4 = b[0] & 0xff;
		// System.out.println(b[1]&0xff);//输出的是一个整形数据
		// 在java中，int和比int位数来的小的类型b，如byte,char等，都是先把小类型扩展成int再来运算，
		// return( t1<<24)+(t2<<16)+(t3<<8)+t4;//必须加括号
		return t1 + t2 + t3 + t4;
	}

	/**
	 * int 转换byte
	 * @param a
	 * @param len
	 * @return
	 */
	private  byte[] intTobyte(int a, int len) {
		byte[] t = new byte[len];
		t[0] = (byte) ((a & 0xff));
		switch (len) {
		case 4:
			t[3] = (byte) ((a & 0xff000000) >> 24);
		case 3:
			t[2] = (byte) ((a & 0xff0000) >> 16);
		case 2:
			t[1] = (byte) ((a & 0xff00) >> 8);
		case 1:
			break;
		}
		return t;
	}

	/**
	 * 将byte 转成其二进制的字符串
	 * @param b
	 * @return
	 */
	private String byte2String(byte b) {
		String s = "";
		for (int i = 7; i >= 0; i--) {
			int temp = b >> i & 1;
			s += temp;
		}
		return s;
	}

	/**
	 * 输出哈夫曼树
	 * 
	 * @param node
	 */
	private void printTree(TreeNode node) {
		if (node != null) {
			printTree(node.left);
			System.out.println(node.data);
			printTree(node.right);
		}
	}



	/**
	 * 将8位字符串转换为整数
	 * 
	 * @param s
	 * @return
	 */
	private byte string2Int(String s) {

		byte ans = 0;
		for (int i = 0; i < s.length(); i++) {
			int t = s.charAt(i) - 48;
			if (i != s.length() - 1) {
				if ((t == 1))
					t = 2;
				ans += Math.pow(t, s.length() - i - 1);
			} else {
				ans += t;
			}

		}
		if (ans > 127)
			System.out.println("error");
		return ans;

	}
	


}

package data_0719哈夫曼树;

import java.awt.FlowLayout;
import java.awt.event.ActionEvent;
import java.awt.event.ActionListener;
import java.io.File;

import javax.swing.JButton;
import javax.swing.JFileChooser;
import javax.swing.JFrame;
import javax.swing.JOptionPane;

public class HaffManFrame extends JFrame {
	JButton rar;
	JButton unrar;

	ButtonListener bl;

	public HaffManFrame() {
		// 设置窗体相关
		this.setSize(400, 300);
		this.setDefaultCloseOperation(3);
		this.setVisible(true);
		this.setLayout(new FlowLayout());
		// 初始化按钮以及添加监听器
		rar = new JButton("压缩文件");
		this.add(rar);
		unrar = new JButton("解压文件");
		this.add(unrar);
		bl = new ButtonListener();
		rar.addActionListener(bl);
		unrar.addActionListener(bl);

	}

	/**
	 * 按钮监听器类
	 * 
	 * @author ZhangZunC
	 * 
	 */
	class ButtonListener implements ActionListener {
		private JFileChooser fileChooser = null;
		private HuffMan huffMan = null;
		private File chooseFile = null;
		private File saveFile = null;

		public ButtonListener() {
			// 初始化文件选择框
			fileChooser = new JFileChooser();
			// 设置当前目录
			fileChooser.setCurrentDirectory(new File("F:\\JavaTest"));

			huffMan = new HuffMan(); 
		}

		public void actionPerformed(ActionEvent e) {

			if (e.getActionCommand().equals("压缩文件")) {
				yasuo();
			}
			if (e.getActionCommand().equals("解压文件")) {
				jieya();
			}
		}

		public void yasuo() {
			// 如果点击的是确定按钮
			if (fileChooser.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) {
				chooseFile = fileChooser.getSelectedFile();
				// 选择要保存的目录
				if (fileChooser.showSaveDialog(null) == JFileChooser.APPROVE_OPTION) {
					saveFile = fileChooser.getSelectedFile();
					// 压缩文件
					huffMan.toRar(chooseFile, saveFile);
					System.out.println("压缩成功");
				}
			}
		}

		public void jieya() {
			//解压文件
			if (fileChooser.showOpenDialog(null) == JFileChooser.APPROVE_OPTION) {
				chooseFile = fileChooser.getSelectedFile();
				// 选择要保存的目录
				if (fileChooser.showSaveDialog(null) == JFileChooser.APPROVE_OPTION) {
					saveFile = fileChooser.getSelectedFile();
					huffMan.unrar(chooseFile, saveFile);
					System.out.println("解压成功");
				}
			}
		//	unrar(fileOut,unRarFile,map);
			
		}
	}

	public static void main(String[] args) {
		HaffManFrame hmf = new HaffManFrame();
	}
}

package data_0719哈夫曼树;

import java.util.Comparator;

/**
 * 比较类
 * @author ZhangZunC
 *
 */
class NodeCompare implements Comparator<TreeNode> {

	@Override
	public int compare(TreeNode o1, TreeNode o2) {
		// TODO Auto-generated method stub
		if (o1.data > o2.data)
			return 1;
		if (o1.data < o2.data)
			return -1;
		// if(o1.data==o2.data)
		return 0;
	}

}

软件还存在的问题：
压缩大的文件时，速度会非常慢。

解决方案：

使用消费者模型，构建双缓冲读入写出。现在正在尝试写。。。。。

欢迎指点交流！

5
顶

3
踩

分享到：

poj2239-二分图最大匹配匈牙利算法 | 人工智能原来如此简单——你所见过的最好的 ...

2014-07-22 23:04
浏览 1995
评论(0)
分类:编程语言
查看更多

发表评论

您还没有登录,请您登录后再发表评论

最近访客更多访客>>

博主相关

文章分类

社区版块

存档分类

最新评论