java读取文本数据实现连续行之间按照某一字段进行去重,保留首行和末行。
private static final Logger logger = LoggerFactory.getLogger(CSVTest.class);
/**
* 456换成要判断去重的字符串
*
* @param inpPath
*/
private static List<String> bufferReaderAndPrintWriter(String inpPath) {
BufferedReader bufferedReader = null;
List<String> values = Lists.newArrayList();
try {
bufferedReader = new BufferedReader(new FileReader(inpPath));
String str = null;
List<String> inputStrList = Lists.newArrayList();
while ((str = bufferedReader.readLine()) != null) {
if (str.indexOf("lisi") > -1) {
inputStrList.add(str);
} else if (!CollectionUtils.isEmpty(inputStrList)) {
values.add(inputStrList.get(0));
if (inputStrList.size() > 1) {
values.add(inputStrList.get(inputStrList.size() - 1));
}
inputStrList.clear();
values.add(str);
} else {
values.add(str);
}
}
if (!CollectionUtils.isEmpty(inputStrList)) {
values.add(inputStrList.get(0));
if (inputStrList.size() > 1) {
values.add(inputStrList.get(inputStrList.size() - 1));
}
}
} catch (IOException e) {
e.printStackTrace();
} finally {
try {
if (bufferedReader != null) {
bufferedReader.close();
}
} catch (IOException e) {
e.printStackTrace();
}
}
return values;
}
public static void main(String[] args) {
System.out.println(CSVTest.bufferReaderAndPrintWriter("C:\\Users\\xionglang\\Desktop\\test.txt"));
}

首先循环读取,连续的行装入相同的hashset
然后调用java8的groupby,按你要的字段