ca4e3e24
tu
first commit
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
|
/**@author Sherlock_yb
* Created time:2014年9月25日
*/
package JTest;
import java.io.BufferedReader;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.Hashtable;
import java.util.List;
import nlp.whu.model.Pair;
import nlp.whu.utils.KeytermExtraction;
import nlp.whu.utils.KeytermExtraction.KeyTermPair;
/**@description:
*/
public class Test {
public String readFromFile(String filename, String charsetName){
StringBuilder sb = new StringBuilder();
BufferedReader bf = null;
try {
bf = new BufferedReader(new InputStreamReader(new FileInputStream(filename), charsetName));
String str = null;
while((str = bf.readLine()) != null){
sb.append(str.trim().replaceAll("\\s", " "));
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}finally{
if(bf != null){
try {
bf.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return sb.toString();
}
public void KeytermExtractionTest(){
KeytermExtraction ke = new KeytermExtraction();
String content = readFromFile("16.后宫甄嬛传.txt", "GBK");
Hashtable<String, Integer> result = new Hashtable<String, Integer>();
long time = System.currentTimeMillis();
result = ke.execute(content, true);
time = System.currentTimeMillis() -time;
System.out.println("time-consuming: " + time + " ms, str length: " + content.length());
List<KeyTermPair> pairs = ke.getTopN(0, result);
System.out.println("pairs size: " + pairs.size());
for(Pair pair : pairs){
System.out.println(pair.first+"\t"+pair.second);
}
}
public static void main(String[] args){
Test t = new Test();
t.KeytermExtractionTest();
}
}
|