I have a two node elastic search setup where the same search query on the one node results in different results than on the other and I would like to find out why that is the case. Details:
_cluster/state:
{
"cluster_name": "elasticsearch.abc",
"version": 330,
"master_node": "HexGKOoHSxqRaMmwduCVIA",
"blocks": {},
"nodes": {
"rUZDrUfMR1-RWcy4t0YQNw": {
"name": "Owl",
"transport_address": "inet[/10.123.123.123:9303]",
"attributes": {}
},
"HexGKOoHSxqRaMmwduCVIA": {
"name": "Bloodlust II",
"transport_address": "inet[/10.123.123.124:9303]",
"attributes": {}
}
},
"metadata": {
"templates": {},
"indices": {
"abc": {
"state": "open",
"settings": {
"index": {
"creation_date": "1432297566361",
"uuid": "LKx6Ro9CRXq6JZ9a29jWeA",
"analysis": {
"filter": {
"substring": {
"type": "nGram",
"min_gram": "1",
"max_gram": "50"
}
},
"analyzer": {
"str_index_analyzer": {
"filter": [
"lowercase",
"substring"
],
"tokenizer": "keyword"
},
"str_search_analyzer": {
"filter": [
"lowercase"
],
"tokenizer": "keyword"
}
}
},
"number_of_replicas": "1",
"number_of_shards": "5",
"version": {
"created": "1050099"
}
}
},
"mappings": {
"some_mapping": {
...
}
...
},
"aliases": []
}
}
},
"routing_table": {
"indices": {
"abc": {
"shards": {
"0": [
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 0,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 0,
"index": "abc"
}
],
"1": [
{
"state": "STARTED",
"primary": false,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 1,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 1,
"index": "abc"
}
],
"2": [
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 2,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 2,
"index": "abc"
}
],
"3": [
{
"state": "STARTED",
"primary": false,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 3,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 3,
"index": "abc"
}
],
"4": [
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 4,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 4,
"index": "abc"
}
]
}
}
}
},
"routing_nodes": {
"unassigned": [],
"nodes": {
"HexGKOoHSxqRaMmwduCVIA": [
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 4,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 0,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 3,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 1,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "HexGKOoHSxqRaMmwduCVIA",
"relocating_node": null,
"shard": 2,
"index": "abc"
}
],
"rUZDrUfMR1-RWcy4t0YQNw": [
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 4,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 0,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 3,
"index": "abc"
},
{
"state": "STARTED",
"primary": true,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 1,
"index": "abc"
},
{
"state": "STARTED",
"primary": false,
"node": "rUZDrUfMR1-RWcy4t0YQNw",
"relocating_node": null,
"shard": 2,
"index": "abc"
}
]
}
},
"allocations": []
}
_cluster/health
{
"cluster_name": "elasticsearch.abc",
"status": "green",
"timed_out": false,
"number_of_nodes": 2,
"number_of_data_nodes": 2,
"active_primary_shards": 5,
"active_shards": 10,
"relocating_shards": 0,
"initializing_shards": 0,
"unassigned_shards": 0,
"number_of_pending_tasks": 0
}
_cluster/stats
{
"timestamp": 1432312770877,
"cluster_name": "elasticsearch.abc",
"status": "green",
"indices": {
"count": 1,
"shards": {
"total": 10,
"primaries": 5,
"replication": 1,
"index": {
"shards": {
"min": 10,
"max": 10,
"avg": 10
},
"primaries": {
"min": 5,
"max": 5,
"avg": 5
},
"replication": {
"min": 1,
"max": 1,
"avg": 1
}
}
},
"docs": {
"count": 19965,
"deleted": 4
},
"store": {
"size_in_bytes": 399318082,
"throttle_time_in_millis": 0
},
"fielddata": {
"memory_size_in_bytes": 60772,
"evictions": 0
},
"filter_cache": {
"memory_size_in_bytes": 15284,
"evictions": 0
},
"id_cache": {
"memory_size_in_bytes": 0
},
"completion": {
"size_in_bytes": 0
},
"segments": {
"count": 68,
"memory_in_bytes": 10079288,
"index_writer_memory_in_bytes": 0,
"index_writer_max_memory_in_bytes": 5120000,
"version_map_memory_in_bytes": 0,
"fixed_bit_set_memory_in_bytes": 0
},
"percolate": {
"total": 0,
"time_in_millis": 0,
"current": 0,
"memory_size_in_bytes": -1,
"memory_size": "-1b",
"queries": 0
}
},
"nodes": {
"count": {
"total": 2,
"master_only": 0,
"data_only": 0,
"master_data": 2,
"client": 0
},
"versions": [
"1.5.0"
],
"os": {
"available_processors": 8,
"mem": {
"total_in_bytes": 0
},
"cpu": []
},
"process": {
"cpu": {
"percent": 0
},
"open_file_descriptors": {
"min": 649,
"max": 654,
"avg": 651
}
},
"jvm": {
"max_uptime_in_millis": 2718272183,
"versions": [
{
"version": "1.7.0_40",
"vm_name": "Java HotSpot(TM) 64-Bit Server VM",
"vm_version": "24.0-b56",
"vm_vendor": "Oracle Corporation",
"count": 2
}
],
"mem": {
"heap_used_in_bytes": 2665186528,
"heap_max_in_bytes": 4060086272
},
"threads": 670
},
"fs": {
"total_in_bytes": 631353901056,
"free_in_bytes": 209591468032,
"available_in_bytes": 209591468032
},
"plugins": []
}
}
Example query:
/_search?from=22&size=1
{
"query": {
"bool": {
"should": [{
"match": {
"address.city": {
"query": "Bremen",
"boost": 2
}
}
}],
"must": [{
"match": {
"type": "L"
}
}]
}
}
}
Response for the first request
{
"took": 30,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 19543,
"max_score": 6.407021,
"hits": [{
"_index": "abc",
"_type": "xyz",
"_id": "ABC123",
"_score": 5.8341036,
"_source": {
...
}
}]
}
}
Response for the second request
{
"took": 27,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 19543,
"max_score": 6.407021,
"hits": [
{
"_index": "abc",
"_type": "xyz",
"_id": "FGH12343",
"_score": 5.8341036,
"_source": {
...
}
}
]
}
}
What could be the cause for this and how can I ensure the same results for different nodes?
Explained query as requested: search/abc/mytype/_search?from=0&size=1&search_type=dfs_query_then_fetch&explain=
{
"query": {
"bool": {
"should": [{
"match": {
"address.city": {
"query": "Karlsruhe",
"boost": 2
}
}
}]
}
}
}
Response for the first request
{
"took": 5,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 41,
"max_score": 7.211497,
"hits": [
{
"_shard": 0,
"_node": "rUZDrUfMR1-RWcy4t0YQNw",
"_index": "abc",
"_type": "mytype",
"_id": "abc123",
"_score": 7.211497,
"_source": {...
},
"_explanation": {
"value": 7.211497,
"description": "weight(address.city:karlsruhe^2.0 in 1598) [PerFieldSimilarity], result of:",
"details": [
{
"value": 7.211497,
"description": "fieldWeight in 1598, product of:",
"details": [
{
"value": 1,
"description": "tf(freq=1.0), with freq of:",
"details": [
{
"value": 1,
"description": "termFreq=1.0"
}
]
},
{
"value": 7.211497,
"description": "idf(docFreq=46, maxDocs=23427)"
},
{
"value": 1,
"description": "fieldNorm(doc=1598)"
}
]
}
]
}
}
]
}
}
Response for the second request
{
"took": 6,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"failed": 0
},
"hits": {
"total": 41,
"max_score": 7.194322,
"hits": [
{
"_shard": 0,
"_node": "rUZDrUfMR1-RWcy4t0YQNw",
"_index": "abc",
"_type": "mytype",
"_id": "abc123",
"_score": 7.194322,
"_source": {...
},
"_explanation": {
"value": 7.194322,
"description": "weight(address.city:karlsruhe^2.0 in 1598) [PerFieldSimilarity], result of:",
"details": [
{
"value": 7.194322,
"description": "fieldWeight in 1598, product of:",
"details": [
{
"value": 1,
"description": "tf(freq=1.0), with freq of:",
"details": [
{
"value": 1,
"description": "termFreq=1.0"
}
]
},
{
"value": 7.194322,
"description": "idf(docFreq=48, maxDocs=24008)"
},
{
"value": 1,
"description": "fieldNorm(doc=1598)"
}
]
}
]
}
}
]
}
}
The hits mismatch is, most probably, because of an un-sync between the primary shards and the replica. This can happen if you had a node leaving the cluster (for whatever reason) but kept making changes to documents (indexing, deleting, updating).
The scoring part is a different story, and can be explained by "Relevancy Scoring" section from this blog post:
Elasticsearch faces an interesting dilemma when you execute a search. Your query needs to find all the relevant documents...but these documents are scattered around any number of shards in your cluster. Each shard is basically a Lucene index, which maintains its own TF and DF statistics. A shard only knows how many times "pineapple" appears within the shard, not the entire cluster.
I would give it a try, when searching, to "DFS Query Then Fetch", meaning _search?search_type=dfs_query_then_fetch ....
that should help with the accuracy of scoring.
Also the different document count caused by document changes during the node disconnect affects the score calculation after even after deleting and rebuilding the index. This might be because changes to documents happened differently on the replica and on the primary shards, more specifically documents have been deleted. A deleted document is permanently removed from the index at segments merging time. And segments merging doesn't happen unless certain conditions are met in the underlying Lucene instance.
A forced merging can be initiated by a POST to /_optimize?max_num_segments=1
. Warning: This takes a really long time (depending on the size of the index) and will require significant IO resources and CPU and should not be run on an index where changes are being made. Documentation: Optimize, Segments Merging
이 기사는 인터넷에서 수집됩니다. 재 인쇄 할 때 출처를 알려주십시오.
침해가 발생한 경우 연락 주시기 바랍니다[email protected] 삭제
몇 마디 만하겠습니다