mongoTemplate如何查询多个时间段平均值

比如有如下数据:

_idtimevalue
12023-06-20 00:00:0024
22023-06-21 23:00:0012
32023-06-22 12:43:13345
42023-06-23 13:35:3435
52023-06-24 17:34:1246
62023-06-25 00:00:0068
72023-06-26 12:44:34464
82023-06-27 00:00:0024
92023-06-28 00:00:0067
102023-06-29 00:00:0024

我有一个List数组作为查询条件,里面有一组时间段,比如:
TimeRange

Date startTime;
Date endTime;

现在我需要查询出多个时间段的平均值,并能将平均值与时间段相匹配,比如:
时间段1:2023-06-19 00:00:00 ~ 2023-06-21 23:59:59
时间段2:2023-06-23 00:00:00 ~ 2023-06-25 23:59:59
时间段2:2023-06-27 00:00:00 ~ 2023-06-29 23:59:59

若得到的最终结果存入map,则最终结果应该是

{
    "2023-06-19 00:00:00~2023-06-21 23:59:59":18,
    "2023-06-23 00:00:00~2023-06-25 23:59:59":49.67,
    "2023-06-27 00:00:00~2023-06-29 23:59:59":38.33
}

我该如何使用mongoTemplate实现这种聚合?
目前我是使用循环查询,但是实在影响性能,有什么一次性就查出的方法吗?

在网上找了很多资料,包括chatGPT问了很多,都没有可行的手段,但从逻辑上看来是个简单问题,希望得到帮助,谢谢各位!

1、mongo插入数据:

db.getCollection("artists").insertMany([
  { "_id" : 1, "time":new Date("2023-06-20 00:00:00") , "value":NumberInt(24)},
  { "_id" : 2, "time":new Date("2023-06-21 23:00:00") , "value":NumberInt(12)},
  { "_id" : 3, "time":new Date("2023-06-22 12:43:13") , "value":NumberInt(345)},
  { "_id" : 4, "time":new Date("2023-06-23 13:35:34") , "value":NumberInt(35)},
  { "_id" : 5, "time":new Date("2023-06-24 17:34:12") , "value":NumberInt(46)},
  { "_id" : 6, "time":new Date("2023-06-25 00:00:00") , "value":NumberInt(68)}, 
  { "_id" : 7, "time":new Date("2023-06-26 12:44:34") , "value":NumberInt(464)},
  { "_id" : 8, "time":new Date("2023-06-27 00:00:00") , "value":NumberInt(24)}, 
  { "_id" : 9, "time":new Date("2023-06-28 00:00:00") , "value":NumberInt(67)},
  { "_id" : 10, "time":new Date("2023-06-29 00:00:00") , "value":NumberInt(24)}, 
])

2、库中查询,由于存储UTC时间,比实际少8小时

img

3、求多个时间段的平均值

db.getCollection("artists").aggregate([
       {'$bucket':{
           groupBy: "$time",
           boundaries: [ new Date("2023-06-19 00:00:00"), new Date("2023-06-21 23:59:59"), new ISODate("2023-06-23 00:00:00"), new ISODate("2023-06-25 23:59:59"), new Date("2023-06-27 00:00:00"), new Date("2023-06-29 23:59:59") ],   // Boundaries for the buckets
           default: "other",
           output: {   
              "count": { $sum: 1 },
              "values":{'$push':"$value"},
              "total_value": {'$sum':"$value"},
              "artists":{
                  '$push':{
                      "vakue": "$value",
                      "time": "$time"
                  }
              }
          }
       }},
       
       {'$project':{
           "values": 1,
           "average": {'$divide':["$total_value", "$count"]},
       }}
])

结果:

img

import org.springframework.data.mongodb.core.aggregation.Aggregation;
import org.springframework.data.mongodb.core.aggregation.AggregationResults;
import org.springframework.data.mongodb.core.aggregation.GroupOperation;
import org.springframework.data.mongodb.core.aggregation.MatchOperation;
import org.springframework.data.mongodb.core.aggregation.ProjectionOperation;
import org.springframework.data.mongodb.core.aggregation.TypedAggregation;
import org.springframework.data.mongodb.core.query.Criteria;
import org.springframework.data.mongodb.core.query.Query;

import java.text.SimpleDateFormat;
import java.util.*;

public class MongoAggregationExample {
    public static void main(String[] args) {
        // 定义时间段列表
        List<TimeRange> timeRanges = new ArrayList<>();
        timeRanges.add(new TimeRange(parseDate("2023-06-19 00:00:00"), parseDate("2023-06-21 23:59:59")));
        timeRanges.add(new TimeRange(parseDate("2023-06-23 00:00:00"), parseDate("2023-06-25 23:59:59")));
        timeRanges.add(new TimeRange(parseDate("2023-06-27 00:00:00"), parseDate("2023-06-29 23:59:59")));

        // 构建聚合管道
        List<Map<String, Object>> result = aggregateTimeRanges(timeRanges);

        // 输出结果
        for (Map<String, Object> entry : result) {
            String timeRange = (String) entry.get("_id");
            double average = (double) entry.get("average");
            System.out.println(timeRange + ": " + average);
        }
    }

    private static List<Map<String, Object>> aggregateTimeRanges(List<TimeRange> timeRanges) {
        List<AggregationOperation> operations = new ArrayList<>();

        for (TimeRange timeRange : timeRanges) {
            Criteria criteria = Criteria.where("time").gte(timeRange.getStartTime()).lte(timeRange.getEndTime());
            MatchOperation matchOperation = Aggregation.match(criteria);
            operations.add(matchOperation);

            GroupOperation groupOperation = Aggregation.group().avg("value").as("average");
            operations.add(groupOperation);

            ProjectionOperation projectionOperation = Aggregation.project("_id", "average")
                    .andExpression("concat(formatDate('%Y-%m-%d %H:%M:%S', _id.startTime), '~', formatDate('%Y-%m-%d %H:%M:%S', _id.endTime))")
                    .as("timeRange");
            operations.add(projectionOperation);
        }

        TypedAggregation<YourDocumentClass> aggregation = Aggregation.newAggregation(YourDocumentClass.class, operations);

        AggregationResults<Map<String, Object>> aggregationResults = mongoTemplate.aggregate(aggregation, "yourCollectionName", Map.class);
        return aggregationResults.getMappedResults();
    }

    private static Date parseDate(String dateString) {
        try {
            SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
            return format.parse(dateString);
        } catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    private static class TimeRange {
        private Date startTime;
        private Date endTime;

        public TimeRange(Date startTime, Date endTime) {
            this.startTime = startTime;
            this.endTime = endTime;
        }

        public Date getStartTime() {
            return startTime;
        }

        public Date getEndTime() {
            return endTime;
        }
    }
}

可以使用 MongoDB 的聚合框架 Aggregation Pipeline 来实现这种聚合查询。聚合框架提供了一系列操作符,可以用于对文档进行分组、筛选、排序、投影、计算等操作,从而实现复杂的聚合查询。

以下是使用聚合框架查询多个时间段平均值的示例代码:

import org.springframework.data.mongodb.core.MongoTemplate;
import org.springframework.data.mongodb.core.aggregation.Aggregation;
import org.springframework.data.mongodb.core.aggregation.AggregationResults;

import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.springframework.data.mongodb.core.aggregation.Aggregation.*;

public class Test {

    private MongoTemplate mongoTemplate;

    public Map<String, Double> aggregate(List<TimeRange> timeRanges) {
        Aggregation aggregation = newAggregation(
                match(Criteria.where("time").gte(timeRanges.get(0).getStartTime()).lte(timeRanges.get(timeRanges.size() - 1).getEndTime())),
                project("value").and("time").dateAsFormattedString("%Y-%m-%d %H:%M:%S").as("time"),
                group("time").avg("value").as("avg"),
                project("_id").and("avg").as("value").and("time").concat(" ").and("time").substring(0, 19).concat("~").and("time").substring(21).as("timeRange"),
                match(Criteria.where("timeRange").in(getTimeRangesAsString(timeRanges))),
                sort(ascending("timeRange"))
        );
        AggregationResults<Map> results = mongoTemplate.aggregate(aggregation, "collectionName", Map.class);
        List<Map> list = results.getMappedResults();
        Map<String, Double> result = new HashMap<>();
        for (Map map : list) {
            result.put((String) map.get("timeRange"), (Double) map.get("value"));
        }
        return result;
    }

    private List<String> getTimeRangesAsString(List<TimeRange> timeRanges) {
        List<String> list = new ArrayList<>();
        for (TimeRange timeRange : timeRanges) {
            String startTime = formatDate(timeRange.getStartTime());
            String endTime = formatDate(timeRange.getEndTime());
            list.add(startTime + "~" + endTime);
        }
        return list;
    }

    private String formatDate(Date date) {
        SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        return formatter.format(date);
    }
}

在上面的代码中,aggregate() 方法接受一个 TimeRange 列表作为参数,其中包含多个时间段。首先使用 match() 操作筛选出所有在这些时间段内的文档,然后使用 project() 操作将时间字段转换为字符串格式,并提取出 value 字段作为计算平均值的依据。

接下来使用 group() 操作将文档按照时间分组,并计算每个时间段内 value 字段的平均值。然后使用 project() 操作将 _id 字段去掉,并将时间段格式化为字符串,例如 "2023-06-19 00:00:00~2023-06-21 23:59:59",以便后续筛选。

使用 match() 操作筛选出在指定时间段内的文档,并使用 sort() 操作按照时间段排序。最后将结果转换为 Map<String, Double> 格式,并返回。

需要注意的是,在使用聚合框架进行查询时,要注意查询语句的效率和可读性。可以使用多个操作符组合来实现复杂的查询,但是要注意不要使用过多的操作符,以免影响查询性能。

使用MongoDB的聚合框架来实现这个功能

  1. $match:用于筛选出符合时间段条件的文档。
  2. $group:用于按时间段对文档进行分组,并计算每个时间段的平均值。
  3. $project:用于将结果重新格式化,将时间段和平均值合并成一个字符串键值对。

伪代码

List<TimeRange> timeRanges = ...; // 时间段列表

List<Document> pipeline = new ArrayList<>();
for (TimeRange timeRange : timeRanges) {
    Document match = new Document("$match", new Document("time", 
        new Document("$gte", timeRange.startTime).append("$lte", timeRange.endTime)));
    pipeline.add(match);
}

Document groupFields = new Document("_id", new Document("start", new Document("$dateToString", 
    new Document("format", "%Y-%m-%d %H:%M:%S").append("date", "$time"))).append("end", new Document("$dateToString", 
    new Document("format", "%Y-%m-%d %H:%M:%S").append("date", "$time"))));
groupFields.append("avgValue", new Document("$avg", "$value"));
Document group = new Document("$group", groupFields);
pipeline.add(group);

Document projectFields = new Document("_id", 0);
for (TimeRange timeRange : timeRanges) {
    String key = timeRange.startTime.toString() + "~" + timeRange.endTime.toString();
    projectFields.append(key, new Document("$cond", Arrays.asList(new Document("$and", Arrays.asList(
        new Document("$gte", Arrays.asList("$_id.start", timeRange.startTime.toString())),
        new Document("$lte", Arrays.asList("$_id.end", timeRange.endTime.toString()))
    )), "$avgValue", null)));
}
Document project = new Document("$project", projectFields);
pipeline.add(project);

AggregateIterable<Document> result = mongoTemplate.getCollection("your_collection").aggregate(pipeline);
// 定义查询条件
List<TimeRange> timeRanges = new ArrayList<>();
timeRanges.add(new TimeRange(parseDate("2023-06-19 00:00:00"), parseDate("2023-06-21 23:59:59")));
timeRanges.add(new TimeRange(parseDate("2023-06-23 00:00:00"), parseDate("2023-06-25 23:59:59")));
timeRanges.add(new TimeRange(parseDate("2023-06-27 00:00:00"), parseDate("2023-06-29 23:59:59")));

// 构建聚合管道
List<Document> pipeline = new ArrayList<>();
for (TimeRange timeRange : timeRanges) {
    Document match = new Document("$match", new Document("time", new Document("$gte", timeRange.getStartTime()).append("$lte", timeRange.getEndTime())));
    pipeline.add(match);
    Document group = new Document("$group", new Document("_id", timeRange.toString()).append("avgValue", new Document("$avg", "$value")));
    pipeline.add(group);
}
Document project = new Document("$project", new Document("_id", 0).append("timeRange", "$_id").append("avgValue", 1));
pipeline.add(project);

// 执行聚合查询
AggregationResults<Document> results = mongoTemplate.aggregate(Aggregation.newAggregation(pipeline), "collectionName", Document.class);

// 将结果转换为Map
Map<String, Double> resultMap = new HashMap<>();
for (Document document : results) {
    String timeRange = document.getString("timeRange");
    Double avgValue = document.getDouble("avgValue");
    resultMap.put(timeRange, avgValue);
}