URL- :
URL- , tokenizer ( , not_analyzed ), , . , url preserve_original.
:
{
"settings": {
"analysis": {
"filter": {
"capture_domain_filter": {
"type": "pattern_capture",
"preserve_original": false,
"flags": "CASE_INSENSITIVE",
"patterns": [
"https?:\/\/([^/]+)"
]
}
},
"analyzer": {
"domain_analyzer": {
"type": "custom",
"tokenizer": "keyword",
"filter": [
"capture_domain_filter"
]
}
}
}
},
"mappings": {
"weblink": {
"properties": {
"url": {
"type": "string",
"analyzer": "domain_analyzer"
}
}
}
}
}
, URL- :
curl -sXGET http:
"tokens" : [ {
"token" : "en.wikipedia.org",
, URL- , ( ).
curl -XGET "http://localhost:9200/url_analyzer/_search?pretty" -d'
{
"aggregations": {
"tokens": {
"terms": {
"field": "url"
}
}
}
}'
:
"aggregations" : {
"tokens" : {
"buckets" : [ {
"key" : "en.wikipedia.org",
"doc_count" : 2
}, {
"key" : "www.elasticsearch.org",
"doc_count" : 1
} ]
}