数据量上百万级的时候 以下方法比较找出10000条记录的时候 要7分钟 效率太慢了 跪求高手指点
public void compareTwoSideOneToOne(BerkeleyDB firstBerkeleyDBinstance,String firstDataBaseName,String firstFeedbackFilePath,
BerkeleyDB secondBerkeleyDBinstance,String secondDataBaseName,String secondFeedbackFilePath,
String filePath,boolean isStandard) {
//初始化游标
Cursor firstCursor = null;
Cursor secondCursor = null;
//初始化事务
Transaction firstTransaction = null;
Transaction secondTransaction = null;
//获取数据库环境
Environment firstEnvironment=firstBerkeleyDBinstance.getDatabaseEnvironment(filePath+firstDataBaseName);
Environment secondEnvironment=secondBerkeleyDBinstance.getDatabaseEnvironment(filePath+secondDataBaseName);
//获取数据库配置
DatabaseConfig firstDataBaseConfig =firstBerkeleyDBinstance.getDatabaseConfig();
firstDataBaseConfig.setReadOnly(true);
DatabaseConfig secondDataBaseConfig =secondBerkeleyDBinstance.getDatabaseConfig();
secondDataBaseConfig.setReadOnly(true);
//获取数据库实例
Database firstDatabase=firstBerkeleyDBinstance.getDataBase(firstEnvironment, firstDataBaseName, firstDataBaseConfig);
Database secondDatabase=secondBerkeleyDBinstance.getDataBase(secondEnvironment, secondDataBaseName, secondDataBaseConfig);
try {
//第一方
firstTransaction = firstEnvironment.beginTransaction(null, null); //获取事务实例
CursorConfig firstCursorConfig = new CursorConfig(); //获取游标配置实例
//firstCursorConfig.setReadCommitted(true); //设置自动提交属性
if (firstCursor == null){
firstCursor = firstDatabase.openCursor(firstTransaction, firstCursorConfig);
}
DatabaseEntry firstFoundKey = new DatabaseEntry();
DatabaseEntry firstFoundData = new DatabaseEntry();
//第二方
secondTransaction=secondEnvironment.beginTransaction(null, null);
CursorConfig secondCursorConfig = new CursorConfig();
//secondCursorConfig.setReadCommitted(true);
if (secondCursor == null){
secondCursor = secondDatabase.openCursor(secondTransaction, secondCursorConfig);
}
DatabaseEntry secondFoundData = new DatabaseEntry();
List<String> firstXtList = new ArrayList<String>();
List<String> secondXtList = new ArrayList<String>();
logger.info("-------------遍历第一方游标获取数据 与第二方比----------------");
while (firstCursor.getNext(firstFoundKey, firstFoundData, LockMode.DEFAULT) == OperationStatus.SUCCESS) {
String firstKey = new String(firstFoundKey.getData(), EncodingUtils.UTF_8);
String firstData = new String(firstFoundData.getData(), EncodingUtils.UTF_8);
//logger.info("-------------用 第一方库表的key 在第二方库表查找开始时间----------------" + new Date());
OperationStatus status=secondDatabase.get(null, firstFoundKey, secondFoundData, LockMode.DEFAULT);
if(status==OperationStatus.SUCCESS){
//logger.info("-------------找到与第二方相同的key值,则将数据写入相同文件中----------------");
String secondData=new String(secondFoundData.getData(),EncodingUtils.UTF_8);
if(isStandard){//如果数据基准方为第一方,一致数据文件中写入第一方的数据
firstXtList.add(firstData);
if(firstXtList.size()%1000==0){
FileUtils.appendContentList(firstFeedbackFilePath,firstDataBaseName+"_XT.txt", firstXtList);
}
}else{//否则写入第二方的数据
secondXtList.add(secondData);
if(secondXtList.size()%10000==0){
logger.info("-------------10000条写入文件开始时间----------------" + new Date());
FileUtils.appendContentList(secondFeedbackFilePath,secondDataBaseName+"_XT.txt", secondXtList);
logger.info("-------------10000条写入文件结束时间----------------" + new Date());
}
}
}
}
firstCursor.close();
firstTransaction.commit();
secondCursor.close();
secondTransaction.commit();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
firstTransaction.abort();
if (firstCursor != null) {
firstCursor.close();
}
secondTransaction.abort();
if (secondCursor != null) {
secondCursor.close();
}
}
}
将某个数据库的数据放入另一个,然后连接查询,你这样相当于两重循环,算法复杂度是N^2,连接查询可以降低到LogN
关键是berkeleyDB文件数据库 不能像数据库那样2个表连接查询