Test.java 1.94 KB
/**@author Sherlock_yb
 * Created time:2014年9月25日
 */
package JTest;

import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.Hashtable;
import java.util.List;

import nlp.whu.model.Pair;
import nlp.whu.utils.KeytermExtraction;
import nlp.whu.utils.KeytermExtraction.KeyTermPair;

/**@description:
 */
public class Test {
	public String readFromFile(String filename, String charsetName){
		StringBuilder sb = new StringBuilder();
		BufferedReader bf = null;
		try {
			bf = new BufferedReader(new InputStreamReader(new FileInputStream(filename), charsetName));
			String str = null;
			while((str = bf.readLine()) != null){
				sb.append(str.trim().replaceAll("\\s", " "));
			}
		} catch (UnsupportedEncodingException e) {
			e.printStackTrace();
		} catch (FileNotFoundException e) {
			e.printStackTrace();
		} catch (IOException e) {
			e.printStackTrace();
		}finally{
			if(bf != null){
				try {
					bf.close();
				} catch (IOException e) {
					e.printStackTrace();
				}
			}
		}
		return sb.toString();
	}
	public void KeytermExtractionTest(){
		KeytermExtraction ke = new KeytermExtraction();
		String content = readFromFile("16.后宫甄嬛传.txt", "GBK");
		Hashtable<String, Integer> result = new Hashtable<String, Integer>();
		long time = System.currentTimeMillis();
		result = ke.execute(content, true);
		time = System.currentTimeMillis() -time;
		System.out.println("time-consuming: " + time + " ms, str length: " + content.length());
		List<KeyTermPair> pairs = ke.getTopN(0, result);
		System.out.println("pairs size: " + pairs.size());
		for(Pair pair : pairs){
			System.out.println(pair.first+"\t"+pair.second);
		}
	}
	public static void main(String[] args){
		Test t = new Test();
		t.KeytermExtractionTest();
	}
}