/**@author Sherlock_yb * Created time:2014年9月25日 */ package JTest; import java.io.BufferedReader; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.UnsupportedEncodingException; import java.util.Hashtable; import java.util.List; import nlp.whu.model.Pair; import nlp.whu.utils.KeytermExtraction; import nlp.whu.utils.KeytermExtraction.KeyTermPair; /**@description: */ public class Test { public String readFromFile(String filename, String charsetName){ StringBuilder sb = new StringBuilder(); BufferedReader bf = null; try { bf = new BufferedReader(new InputStreamReader(new FileInputStream(filename), charsetName)); String str = null; while((str = bf.readLine()) != null){ sb.append(str.trim().replaceAll("\\s", " ")); } } catch (UnsupportedEncodingException e) { e.printStackTrace(); } catch (FileNotFoundException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); }finally{ if(bf != null){ try { bf.close(); } catch (IOException e) { e.printStackTrace(); } } } return sb.toString(); } public void KeytermExtractionTest(){ KeytermExtraction ke = new KeytermExtraction(); String content = readFromFile("16.后宫甄嬛传.txt", "GBK"); Hashtable result = new Hashtable(); long time = System.currentTimeMillis(); result = ke.execute(content, true); time = System.currentTimeMillis() -time; System.out.println("time-consuming: " + time + " ms, str length: " + content.length()); List pairs = ke.getTopN(0, result); System.out.println("pairs size: " + pairs.size()); for(Pair pair : pairs){ System.out.println(pair.first+"\t"+pair.second); } } public static void main(String[] args){ Test t = new Test(); t.KeytermExtractionTest(); } }