Elasticsearch query and filter give different doc values ​​when using lucene-fuzzy operator

Using ElasticSearch v1.7.2 and a rather large index, I get a different document counter for the next two searches that use a fuzzy search in query_string.

Query:

{
  "query": {
     "query_string": {
        "query": "rapt~4"
     }
  }
}

Filter:

{
 "filter": {
    "query": {
       "query_string": {
          "query": "rapt~4"
       }
    }
 }
}

The filter gives 5% more results than the query. Why should the document be different? Are there options that I can specify to make them consistent?

, , . (< 10) , , , . , .

explain = true, , . boost, queryNorm, idf . , boost queryNorm , idf .

. , , , ( "" ). . , , . .

Query:

curl -XPOST "http://localhost:9200/index-name/example-type/_search" -H "Content-Type: application/json" -d'{"query":{"query_string":{"query":"rapt~"}},"explain":true}'

:

{
"_source": {
  "type": "example",
  "content": "to the fact that"
},
"_explanation": {
  "value": 0.10740301,
  "description": "sum of:",
  "details": [
    {
      "value": 0.10740301,
      "description": "weight(_all:fact^0.5 in 465) [PerFieldSimilarity], result of:",
      "details": [
        {
          "value": 0.10740301,
          "description": "score(doc=465,freq=2.0), product of:",
          "details": [
            {
              "value": 0.11091774,
              "description": "queryWeight, product of:",
              "details": [
                {
                  "value": 0.5,
                  "description": "boost"
                },
                {
                  "value": 7.303468,
                  "description": "idf(docFreq=68, maxDocs=37706)"
                },
                {
                  "value": 0.03037399,
                  "description": "queryNorm"
                }
              ]
            },
            {
              "value": 0.96831226,
              "description": "fieldWeight in 465, product of:",
              "details": [
                {
                  "value": 1.4142135,
                  "description": "tf(freq=2.0), with freq of:",
                  "details": [
                    {
                      "value": 2,
                      "description": "termFreq=2.0"
                    }
                  ]
                },
                {
                  "value": 7.303468,
                  "description": "idf(docFreq=68, maxDocs=37706)"
                },
                {
                  "value": 0.09375,
                  "description": "fieldNorm(doc=465)"
                }
              ]
            }
          ]
        }
      ]
    }
  ]
}
}

:

curl -XPOST "http://localhost:9200/index-name/example-type/_search" -H "Content-Type: application/json" -d'{"query":{"filtered":{"filter":{"fquery":{"query":{"query_string":{"query":"rapt~"}}}}}},"explain":true}'

:

{
"_source": {
  "type": "example",
  "content": "to the fact that"
},
"_explanation": {
  "value": 1,
  "description": "ConstantScore(cache(+_type:example-type +org.elasticsearch.index.search.nested.NonNestedDocsFilter@737a6633)), product of:",
  "details": [
    {
      "value": 1,
      "description": "boost"
    },
    {
      "value": 1,
      "description": "queryNorm"
    }
  ]
}
}

, , ( , , ), :

, :

curl -XPOST "http://localhost:9200/index-name/example-type/_search" -H "Content-Type: application/json" -d'{"query":{"constant_score":{"filter":{"query":{"query_string":{"query":"rapt~"}}}}},"explain":true}'

:

{
"_source": {
  "type": "example",
  "content": "to the fact that"
},
"_explanation": {
  "value": 1,
  "description": "ConstantScore(QueryWrapperFilter(_all:rapt~2)), product of:",
  "details": [
    {
      "value": 1,
      "description": "boost"
    },
    {
      "value": 1,
      "description": "queryNorm"
    }
  ]
}
}

, , , , , 0. , "" , .

: 238 ( , ngram, ). cluster, json Dropbox. , , id = 138:

{
 "explain": true,
 "query": {
    "bool": {
       "must_not": [
          {
             "query_string": {
                "query": "rap~",
                "fields": [
                   "body"
                ]
             }
          }
       ],
       "must": [
          {
             "constant_score": {
                "filter": {
                   "query": {
                      "query_string": {
                         "query": "rap~",
                         "fields": [
                            "body"
                         ]
                      }
                   }
                }
             }
          }
       ]
    }
 }
}
+4
1

Elasticsearch Elasticsearch 5.x, filter post_filter. .

Elasticsearch 5.0 ( ), post_filter, .

, , . , , .

... , ...

, (). ; , "" /.

, Elasticsearch 1.x( 2.x !), fquery. , :

:

{
  "query": {
     "query_string": {
        "query": "rapt~"
     }
  }
}

:

{
  "query": {
    "filtered": {
      "filter": {
        "fquery": {
          "query": {
            "query_string": {
              "query": "rapt~"
            }
          }
        }
      }
    }
  }
}

ES 2.x ( ):

{
  "query": {
    "bool": {
      "filter": {
        "query_string": {
          "query": "rapt~"
        }
      }
    }
  }
}
0

Source: https://habr.com/ru/post/1623107/


All Articles