比如有如下数据:
_id | time | value |
---|---|---|
1 | 2023-06-20 00:00:00 | 24 |
2 | 2023-06-21 23:00:00 | 12 |
3 | 2023-06-22 12:43:13 | 345 |
4 | 2023-06-23 13:35:34 | 35 |
5 | 2023-06-24 17:34:12 | 46 |
6 | 2023-06-25 00:00:00 | 68 |
7 | 2023-06-26 12:44:34 | 464 |
8 | 2023-06-27 00:00:00 | 24 |
9 | 2023-06-28 00:00:00 | 67 |
10 | 2023-06-29 00:00:00 | 24 |
我有一个List数组作为查询条件,里面有一组时间段,比如:
TimeRange
Date startTime;
Date endTime;
现在我需要查询出多个时间段的平均值,并能将平均值与时间段相匹配,比如:
时间段1:2023-06-19 00:00:00 ~ 2023-06-21 23:59:59
时间段2:2023-06-23 00:00:00 ~ 2023-06-25 23:59:59
时间段2:2023-06-27 00:00:00 ~ 2023-06-29 23:59:59
若得到的最终结果存入map,则最终结果应该是
{
"2023-06-19 00:00:00~2023-06-21 23:59:59":18,
"2023-06-23 00:00:00~2023-06-25 23:59:59":49.67,
"2023-06-27 00:00:00~2023-06-29 23:59:59":38.33
}
我该如何使用mongoTemplate实现这种聚合?
目前我是使用循环查询,但是实在影响性能,有什么一次性就查出的方法吗?
在网上找了很多资料,包括chatGPT问了很多,都没有可行的手段,但从逻辑上看来是个简单问题,希望得到帮助,谢谢各位!
1、mongo插入数据:
db.getCollection("artists").insertMany([
{ "_id" : 1, "time":new Date("2023-06-20 00:00:00") , "value":NumberInt(24)},
{ "_id" : 2, "time":new Date("2023-06-21 23:00:00") , "value":NumberInt(12)},
{ "_id" : 3, "time":new Date("2023-06-22 12:43:13") , "value":NumberInt(345)},
{ "_id" : 4, "time":new Date("2023-06-23 13:35:34") , "value":NumberInt(35)},
{ "_id" : 5, "time":new Date("2023-06-24 17:34:12") , "value":NumberInt(46)},
{ "_id" : 6, "time":new Date("2023-06-25 00:00:00") , "value":NumberInt(68)},
{ "_id" : 7, "time":new Date("2023-06-26 12:44:34") , "value":NumberInt(464)},
{ "_id" : 8, "time":new Date("2023-06-27 00:00:00") , "value":NumberInt(24)},
{ "_id" : 9, "time":new Date("2023-06-28 00:00:00") , "value":NumberInt(67)},
{ "_id" : 10, "time":new Date("2023-06-29 00:00:00") , "value":NumberInt(24)},
])
2、库中查询,由于存储UTC时间,比实际少8小时
3、求多个时间段的平均值
db.getCollection("artists").aggregate([
{'$bucket':{
groupBy: "$time",
boundaries: [ new Date("2023-06-19 00:00:00"), new Date("2023-06-21 23:59:59"), new ISODate("2023-06-23 00:00:00"), new ISODate("2023-06-25 23:59:59"), new Date("2023-06-27 00:00:00"), new Date("2023-06-29 23:59:59") ], // Boundaries for the buckets
default: "other",
output: {
"count": { $sum: 1 },
"values":{'$push':"$value"},
"total_value": {'$sum':"$value"},
"artists":{
'$push':{
"vakue": "$value",
"time": "$time"
}
}
}
}},
{'$project':{
"values": 1,
"average": {'$divide':["$total_value", "$count"]},
}}
])
结果:
import org.springframework.data.mongodb.core.aggregation.Aggregation;
import org.springframework.data.mongodb.core.aggregation.AggregationResults;
import org.springframework.data.mongodb.core.aggregation.GroupOperation;
import org.springframework.data.mongodb.core.aggregation.MatchOperation;
import org.springframework.data.mongodb.core.aggregation.ProjectionOperation;
import org.springframework.data.mongodb.core.aggregation.TypedAggregation;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query;
import java.text.SimpleDateFormat;
import java.util.*;
public class MongoAggregationExample {
public static void main(String[] args) {
// 定义时间段列表
List<TimeRange> timeRanges = new ArrayList<>();
timeRanges.add(new TimeRange(parseDate("2023-06-19 00:00:00"), parseDate("2023-06-21 23:59:59")));
timeRanges.add(new TimeRange(parseDate("2023-06-23 00:00:00"), parseDate("2023-06-25 23:59:59")));
timeRanges.add(new TimeRange(parseDate("2023-06-27 00:00:00"), parseDate("2023-06-29 23:59:59")));
// 构建聚合管道
List<Map<String, Object>> result = aggregateTimeRanges(timeRanges);
// 输出结果
for (Map<String, Object> entry : result) {
String timeRange = (String) entry.get("_id");
double average = (double) entry.get("average");
System.out.println(timeRange + ": " + average);
}
}
private static List<Map<String, Object>> aggregateTimeRanges(List<TimeRange> timeRanges) {
List<AggregationOperation> operations = new ArrayList<>();
for (TimeRange timeRange : timeRanges) {
Criteria criteria = Criteria.where("time").gte(timeRange.getStartTime()).lte(timeRange.getEndTime());
MatchOperation matchOperation = Aggregation.match(criteria);
operations.add(matchOperation);
GroupOperation groupOperation = Aggregation.group().avg("value").as("average");
operations.add(groupOperation);
ProjectionOperation projectionOperation = Aggregation.project("_id", "average")
.andExpression("concat(formatDate('%Y-%m-%d %H:%M:%S', _id.startTime), '~', formatDate('%Y-%m-%d %H:%M:%S', _id.endTime))")
.as("timeRange");
operations.add(projectionOperation);
}
TypedAggregation<YourDocumentClass> aggregation = Aggregation.newAggregation(YourDocumentClass.class, operations);
AggregationResults<Map<String, Object>> aggregationResults = mongoTemplate.aggregate(aggregation, "yourCollectionName", Map.class);
return aggregationResults.getMappedResults();
}
private static Date parseDate(String dateString) {
try {
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return format.parse(dateString);
} catch (Exception e) {
e.printStackTrace();
return null;
}
}
private static class TimeRange {
private Date startTime;
private Date endTime;
public TimeRange(Date startTime, Date endTime) {
this.startTime = startTime;
this.endTime = endTime;
}
public Date getStartTime() {
return startTime;
}
public Date getEndTime() {
return endTime;
}
}
}
可以使用 MongoDB 的聚合框架 Aggregation Pipeline
来实现这种聚合查询。聚合框架提供了一系列操作符,可以用于对文档进行分组、筛选、排序、投影、计算等操作,从而实现复杂的聚合查询。
以下是使用聚合框架查询多个时间段平均值的示例代码:
import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.aggregation.Aggregation;
import org.springframework.data.mongodb.core.aggregation.AggregationResults;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static org.springframework.data.mongodb.core.aggregation.Aggregation.*;
public class Test {
private MongoTemplate mongoTemplate;
public Map<String, Double> aggregate(List<TimeRange> timeRanges) {
Aggregation aggregation = newAggregation(
match(Criteria.where("time").gte(timeRanges.get(0).getStartTime()).lte(timeRanges.get(timeRanges.size() - 1).getEndTime())),
project("value").and("time").dateAsFormattedString("%Y-%m-%d %H:%M:%S").as("time"),
group("time").avg("value").as("avg"),
project("_id").and("avg").as("value").and("time").concat(" ").and("time").substring(0, 19).concat("~").and("time").substring(21).as("timeRange"),
match(Criteria.where("timeRange").in(getTimeRangesAsString(timeRanges))),
sort(ascending("timeRange"))
);
AggregationResults<Map> results = mongoTemplate.aggregate(aggregation, "collectionName", Map.class);
List<Map> list = results.getMappedResults();
Map<String, Double> result = new HashMap<>();
for (Map map : list) {
result.put((String) map.get("timeRange"), (Double) map.get("value"));
}
return result;
}
private List<String> getTimeRangesAsString(List<TimeRange> timeRanges) {
List<String> list = new ArrayList<>();
for (TimeRange timeRange : timeRanges) {
String startTime = formatDate(timeRange.getStartTime());
String endTime = formatDate(timeRange.getEndTime());
list.add(startTime + "~" + endTime);
}
return list;
}
private String formatDate(Date date) {
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
return formatter.format(date);
}
}
在上面的代码中,aggregate()
方法接受一个 TimeRange
列表作为参数,其中包含多个时间段。首先使用 match()
操作筛选出所有在这些时间段内的文档,然后使用 project()
操作将时间字段转换为字符串格式,并提取出 value
字段作为计算平均值的依据。
接下来使用 group()
操作将文档按照时间分组,并计算每个时间段内 value
字段的平均值。然后使用 project()
操作将 _id
字段去掉,并将时间段格式化为字符串,例如 "2023-06-19 00:00:00~2023-06-21 23:59:59"
,以便后续筛选。
使用 match()
操作筛选出在指定时间段内的文档,并使用 sort()
操作按照时间段排序。最后将结果转换为 Map<String, Double>
格式,并返回。
需要注意的是,在使用聚合框架进行查询时,要注意查询语句的效率和可读性。可以使用多个操作符组合来实现复杂的查询,但是要注意不要使用过多的操作符,以免影响查询性能。
使用MongoDB的聚合框架来实现这个功能
伪代码
List<TimeRange> timeRanges = ...; // 时间段列表
List<Document> pipeline = new ArrayList<>();
for (TimeRange timeRange : timeRanges) {
Document match = new Document("$match", new Document("time",
new Document("$gte", timeRange.startTime).append("$lte", timeRange.endTime)));
pipeline.add(match);
}
Document groupFields = new Document("_id", new Document("start", new Document("$dateToString",
new Document("format", "%Y-%m-%d %H:%M:%S").append("date", "$time"))).append("end", new Document("$dateToString",
new Document("format", "%Y-%m-%d %H:%M:%S").append("date", "$time"))));
groupFields.append("avgValue", new Document("$avg", "$value"));
Document group = new Document("$group", groupFields);
pipeline.add(group);
Document projectFields = new Document("_id", 0);
for (TimeRange timeRange : timeRanges) {
String key = timeRange.startTime.toString() + "~" + timeRange.endTime.toString();
projectFields.append(key, new Document("$cond", Arrays.asList(new Document("$and", Arrays.asList(
new Document("$gte", Arrays.asList("$_id.start", timeRange.startTime.toString())),
new Document("$lte", Arrays.asList("$_id.end", timeRange.endTime.toString()))
)), "$avgValue", null)));
}
Document project = new Document("$project", projectFields);
pipeline.add(project);
AggregateIterable<Document> result = mongoTemplate.getCollection("your_collection").aggregate(pipeline);
// 定义查询条件
List<TimeRange> timeRanges = new ArrayList<>();
timeRanges.add(new TimeRange(parseDate("2023-06-19 00:00:00"), parseDate("2023-06-21 23:59:59")));
timeRanges.add(new TimeRange(parseDate("2023-06-23 00:00:00"), parseDate("2023-06-25 23:59:59")));
timeRanges.add(new TimeRange(parseDate("2023-06-27 00:00:00"), parseDate("2023-06-29 23:59:59")));
// 构建聚合管道
List<Document> pipeline = new ArrayList<>();
for (TimeRange timeRange : timeRanges) {
Document match = new Document("$match", new Document("time", new Document("$gte", timeRange.getStartTime()).append("$lte", timeRange.getEndTime())));
pipeline.add(match);
Document group = new Document("$group", new Document("_id", timeRange.toString()).append("avgValue", new Document("$avg", "$value")));
pipeline.add(group);
}
Document project = new Document("$project", new Document("_id", 0).append("timeRange", "$_id").append("avgValue", 1));
pipeline.add(project);
// 执行聚合查询
AggregationResults<Document> results = mongoTemplate.aggregate(Aggregation.newAggregation(pipeline), "collectionName", Document.class);
// 将结果转换为Map
Map<String, Double> resultMap = new HashMap<>();
for (Document document : results) {
String timeRange = document.getString("timeRange");
Double avgValue = document.getDouble("avgValue");
resultMap.put(timeRange, avgValue);
}