Elastic search for top_hits aggregation on nested

I have an index containing CustomerProfile documents. Each of these documents in CustomerInsightTargets (with the Source, Value property) can be an array with x elements. What I'm trying to achieve is autocomplete (out of the top 5) on CustomerInsightTargets.Value, grouped by CustomerInisghtTarget.Source.

It will be useful if someone tells me how to select only a subset of nested objects from each document and use nested objects in aggregates.

    {
  "customerinsights": {
    "aliases": {},
    "mappings": {
      "customerprofile": {
        "properties": {
          "CreatedById": {
            "type": "long"
          },
          "CreatedDateTime": {
            "type": "date"
          },
          "CustomerInsightTargets": {
            "type": "nested",
            "properties": {
              "CustomerInsightSource": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "CustomerInsightValue": {
                "type": "text",
                "term_vector": "yes",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                },
                "analyzer": "ngram_tokenizer_analyzer"
              },
              "CustomerProfileId": {
                "type": "long"
              },
              "Guid": {
                "type": "text",
                "fields": {
                  "keyword": {
                    "type": "keyword",
                    "ignore_above": 256
                  }
                }
              },
              "Id": {
                "type": "long"
              }
            }
          },
          "DisplayName": {
            "type": "text",
            "term_vector": "yes",
            "analyzer": "ngram_tokenizer_analyzer"
          },
          "Email": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          },
          "Id": {
            "type": "long"
          },
          "ImageUrl": {
            "type": "text",
            "fields": {
              "keyword": {
                "type": "keyword",
                "ignore_above": 256
              }
            }
          }
        }
      }
    },
    "settings": {
      "index": {
        "number_of_shards": "1",
        "provided_name": "customerinsights",
        "creation_date": "1484860145041",
        "analysis": {
          "analyzer": {
            "ngram_tokenizer_analyzer": {
              "type": "custom",
              "tokenizer": "ngram_tokenizer"
            }
          },
          "tokenizer": {
            "ngram_tokenizer": {
              "type": "nGram",
              "min_gram": "1",
              "max_gram": "10"
            }
          }
        },
        "number_of_replicas": "2",
        "uuid": "nOyI0O2cTO2JOFvqIoE8JQ",
        "version": {
          "created": "5010199"
        }
      }
    }
  }
}

As an example of a document:

{
    {
      "Id": 9072856,

      "CreatedDateTime": "2017-01-12T11:26:58.413Z",
      "CreatedById": 9108469,
      "DisplayName": "valentinos",
      "Email": "valentinos@mail.com",
      "CustomerInsightTargets": [
        {
          "Id": 160,
          "CustomerProfileId": 9072856,
          "CustomerInsightSource": "Tags",
          "CustomerInsightValue": "Tag1",
          "Guid": "00000000-0000-0000-0000-000000000000"
        },
        {
          "Id": 160,
          "CustomerProfileId": 9072856,
          "CustomerInsightSource": "ProfileName",
          "CustomerInsightValue": "valentinos",
          "Guid": "00000000-0000-0000-0000-000000000000"
        },
        {
          "Id": 160,
          "CustomerProfileId": 9072856,
          "CustomerInsightSource": "Playground",
          "CustomerInsightValue": "Wiki",
          "Guid": "00000000-0000-0000-0000-000000000000"
        }
      ]
    }
  }

If I run aggregation in top_hits, the result will include all targets from the document → if one of them matches my search text. Example

GET customerinsights/_search
    {
  "query": {
    "bool": {
      "must": [
        {
          "nested": {
            "path": "CustomerInsightTargets",
            "query": {
              "bool": {
                "must": [
                  {
                    "match": {
                      "CustomerInsightTargets.CustomerInsightValue": {
                        "query": "2017",
                        "operator": "AND",
                        "fuzziness": 2
                      }
                    }
                  }
                ]
              }
            }

          }
        }
      ]
    }
  } ,
  "aggs": {
    "root": {
      "nested": {
        "path": "CustomerInsightTargets"
      },
      "aggs": {
        "top_tags": {
          "terms": {
            "field": "CustomerInsightTargets.CustomerInsightSource.keyword"
          },
          "aggs": {
            "top_tag_hits": {
              "top_hits": {
                "sort": [
                  {
                    "_score": {
                      "order": "desc"
                    }
                  }
                ],
                "size": 5,
                "_source": "CustomerInsightTargets"
              }
            }
          }
        }
      }
    }
  },
  "size": 0,
  "_source": "CustomerInsightTargets"
}

, , "", _score. significant_terms, , aggs ( _count), .

+4

Source: https://habr.com/ru/post/1667558/


All Articles