scripted_metric使用HashMap存储和cardinality去重查获取的结果集不一致

Elasticsearch 7.10.0  

GET repair/_search
{
  "size": 0,
  "query": {
    "bool": {
      "must": [
        {
          "term": {
            "typeName": {
              "value": "书面",
              "boost": 1
            }
          }
        },
        {
          "range": {
            "processStatus": {
              "from": null,
              "to": 12,
              "include_lower": true,
              "include_upper": false,
              "boost": 1
            }
          }
        },
        {
          "terms": {
            "projectId": [
              "499"
            ],
            "boost": 1
          }
        }
      ],
      "adjust_pure_negative": true,
      "boost": 1
    }
  },
  "aggregations": {
    "countOrder": {
      "terms": {
        "field": "projectId",
        "size": 732,
        "min_doc_count": 1,
        "shard_min_doc_count": 0,
        "show_term_doc_count_error": false,
        "order": [
          {
            "_count": "desc"
          },
          {
            "_key": "asc"
          }
        ]
      },
      "aggregations": {
        "houseClose": {
          "scripted_metric": {
            "init_script": {
              "source": "state.numas=new HashMap();",
              "lang": "painless"
            },
            "map_script": {
              "source": """
              if(doc.houseId.size()>0){
                String houseKey = doc.houseId.value;
                state.numas.put(houseKey,1);
              }
            """,
              "lang": "painless"
            },
            "combine_script": {
              "source": """
              double item_finish_count=0;
              for(key in state.numas.keySet()){
                item_finish_count+=1;
              }
              return item_finish_count;""",
              "lang": "painless"
            },
            "reduce_script": {
              "source": """double result=0;
              for(e in states){
                result+=e;
              }
              return result;""",
                "lang": "painless"
              },
            "params": {
              "close_sum_key": "close_sum3",
              "house_sum_key": "house_sum3"
            }
          }
        },
        "houseCount": {
          "cardinality": {
            "field": "houseId"
          }
        }
      }
    }
  }
}

 

结果集:

{
  "took" : 392,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 10000,
      "relation" : "gte"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "countOrder" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "499",
          "doc_count" : 17573,
          "houseCount" : {
            "value" : 1256
          },
          "houseClose" : {
            "value" : 4102.0
          }
        }
      ]
    }
  }
}

问题:houseClose和houseCount数据应该一致,但是结果差异很大

这个问题是因为Es针对每个索引是默认建五个分片,所以需要在reduce_script中对结果集进行合并处理去重处理后结果集就对了,为什么要去对Map的key重新计算和去重,场景大概是这样:同一个房间号的key存在不同的分片,然后每个分片的数据源不一样,所以要对这个数据源汇总,再对key去重。