如题:
统计某个文本文件当中出现单词个数,以及每一种单词出现的频率占所有单词总数的比例。还要拿到前五个频率最高的单词,求出它们的数量之和 占所有单词总数的比
今天下午两点左右要用的,还请尽快!感激不尽!
代码如下:
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.*;
public class ReadTest {
public static void main(String[] args) {
//逐行读文件
BufferedReader br = null;
try {
Map<String,Integer> map = new HashMap<String,Integer>();
// 读取文件
br = new BufferedReader(new FileReader("/home/c3736/IdeaProjects/Helloword/src/com/tinno/test1/words.txt"));
String line;
int total = 0;
while(null != (line = br.readLine())){
System.out.println(line);
//将字符串用空格分隔
String[]ss = line.split("\\s+");
for(String s : ss){
// 计算单词总数
total ++;
if(map.containsKey(s)){
map.put(s, map.get(s)+1);
}else{
map.put(s, 1);
}
}
}
List<Map.Entry<String,Integer>> list = new ArrayList<Map.Entry<String,Integer>>(map.entrySet());
Collections.sort(list,new Comparator<Map.Entry<String,Integer>>() {
//对map value值降序排序
public int compare(Map.Entry<String, Integer> o1,
Map.Entry<String, Integer> o2) {
int compare = (o1.getValue()).compareTo(o2.getValue());
return -compare;
}
});
System.out.println("单词总数为:" + total);
int i = 5;
int f_total = 0;
for(Map.Entry<String,Integer> mapping:list){
String key = mapping.getKey();
Integer value = mapping.getValue();
if(i>0){
f_total += value;
i--;
}
double x = (double)value/ total; // 计算占比
System.out.println("单词 "+ key+"\t数量为:"+value+"\t\t占比为:" + x*100+ "%");
}
double y = (double) f_total/total;
System.out.println("前五的单词数量和为:"+ f_total+"\t比例为:" + y*100+"%");
}catch(FileNotFoundException e) {
e.printStackTrace();
}catch(IOException e) {
e.printStackTrace();
}finally {
if(null != br){
try {
br.close();
}catch(IOException e) {
e.printStackTrace();
}
}
}
}
}
文件内容:
运行结果:
如有帮助,请点采纳
public class WordCount {
public static void main(String[] args) throws Exception {
//创建一个HashMap对象
HashMap<String,Integer> map = new HashMap<>();
// 新建BufferedReader对象
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("d:/test.txt")));
//定义一个数组,将读取到的元素先放到数组中
String line = "";
while((line = br.readLine())!=null){
String[] wordline = line.split(" ");
for(String word:wordline){
if(map.containsKey(word)){ // 判断是否存在该单词
Integer value = map.get(word); //存在则将value+1,不存在则直接新增即可
map.put(word, value+1);
}else{
map.put(word, 1);
}
}
}
// 关闭流
br.close();
/**
* 循环遍历map
*/
// 取出set集合
Set<String> keyset = map.keySet();
// 根据key取出value
for(String key:keyset){
int value = map.get(key);
System.out.println(key+":"+value);
}
System.out.println("----------------------------");
// 使用Entry打印
Set<Entry<String, Integer>> entrySet = map.entrySet();
for(Entry<String, Integer> ent:entrySet){
System.out.println(ent.getKey() + ":" + ent.getValue());
}
}
}
总个数和每个单词的个数算出来后,再把每个单词的个数放在一个数组中排序,取前五,再获取对应的单词就搞定了