12/03/2016 - ELASTICSEARCH
Although the logic of elasticsearch and SQL queries below are the same, results can slightly differ which is a normal behaviour. It especially applies to full-text search. You can read Filtered Query, Bool Query, Combining Filters, String Sorting and Multifields, Finding Multiple Exact Values, Multiple Query Strings and Combining Queries pages for more information.
If you have a field which is used for full-text search and sorting then you must define it as "Multifield" as shown below. The not_analyzed
version must be used when sorting and the analyzed
version is for full-text search. The fields title
and description
are defined as "Multifield" in this example.
$ curl -X GET 127.0.0.1:9203/_mapping/post?pretty
{
"post_dev" : {
"mappings" : {
"post" : {
"_meta" : {
"model" : "Application\\SearchBundle\\Entity\\Post"
},
"properties" : {
"created_at" : {
"type" : "date",
"format" : "dateOptionalTime"
},
"description" : {
"type" : "string",
"analyzer" : "english",
"fields" : {
"raw" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"id" : {
"type" : "integer"
},
"is_published" : {
"type" : "boolean"
},
"price" : {
"type" : "double"
},
"title" : {
"type" : "string",
"analyzer" : "english",
"fields" : {
"raw" : {
"type" : "string",
"index" : "not_analyzed"
}
}
},
"year" : {
"type" : "integer"
}
}
}
}
}
}
Since listing all or a portion of records doesn't involve searching a keyword in a field, you should ignore sorting by elasticsearch default which is "sort":[{"_score":"desc"}]
(relevance). Instead of that, you should order by a field of your choice.
List 10 records in "id" order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"bool": {
"must": [
{
"match_all": [
]
}
]
}
},
"sort": [
{
"id": {
"order": "asc"
}
}
],
"from": 0,
"size": "10"
}'
# SQL
SELECT * FROM post
ORDER BY id ASC
LIMIT 10
OFFSET 0
List 10 records in "title", "year" and "id" order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"bool": {
"must": [
{
"match_all": [
]
}
]
}
},
"sort": [
{
"title.raw": {
"order": "asc"
}
},
{
"year": {
"order": "desc"
}
},
{
"id": {
"order": "asc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT * FROM post
ORDER BY title ASC, year DESC, id ASC
LIMIT 10
OFFSET 0
Do not ignore ordering by elasticsearch default which is "sort":[{"_score":"desc"}]
because searching is all about full-text search and relevance. This would enable us to show the most relevant record on top.
Scoring works differently and produces different results in elasticsearch and database queries so although the logic is same, result could be different.
When using multiple keywords for searching, elasticsearch splits the words and scores them differently. E.g. "Title 1 Eltit". If there is a record for "Title" and "Title 1" in index, elasticsearch scores "Title 1" higher because it contains an integer value. You need to study results yourself to see what exactly happens.
Full-text search 10 records that contain a single "Title" keyword in "title" field and list them in "_score" (relevance) order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"match": {
"title": {
"query": "Title"
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`) AGAINST ('Title' IN BOOLEAN MODE) AS _score
FROM post
WHERE MATCH(`title`) AGAINST ('Title' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that contain a multiple "Title 1 Eltit" keyword in "title" field and list them in "_score" (relevance) order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"match": {
"title": {
"query": "Title 1 Eltit"
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`) AGAINST ('Title 1 Eltit' IN BOOLEAN MODE) AS _score
FROM post
WHERE MATCH(`title`) AGAINST ('Title 1 Eltit' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that contain a single "Title" keyword in "title" and "description" fields then list them in "_score" (relevance) order. Note: If the keyword appears in both fields then "type": "most_fields"
scores it higher. This is its priority.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"multi_match": {
"type": "most_fields",
"query": "Title",
"fields": [
"title",
"description"
]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`, `description`) AGAINST ('Title' IN BOOLEAN MODE) AS _score
FROM post
WHERE MATCH(`title`, `description`) AGAINST ('Title' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that contain a multiple "Title 1 Eltit" keyword in "title" and "description" fields then list them in "_score" (relevance) order.
Note: If the keywords appear in both fields then "type": "most_fields"
scores it higher. This is its priority.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"multi_match": {
"type": "most_fields",
"query": "Title 1 Eltit",
"fields": [
"title",
"description"
]
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`, `description`) AGAINST ('Title 1 Eltit' IN BOOLEAN MODE) AS _score
FROM post
WHERE MATCH(`title`, `description`) AGAINST ('Title 1 Eltit' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that contain a single "Title" keyword in "title" field, contains "2005" and above in "year" field and list them in "_score" (relevance) order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"filtered": {
"query": {
"match": {
"title": "Title"
}
},
"filter": {
"range": {
"year": {
"gte": "2005"
}
}
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`) AGAINST ('Title' IN BOOLEAN MODE) AS _score
FROM post
WHERE year >= 2005 AND MATCH(`title`) AGAINST ('Title' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that contain a multiple "Title 1 Eltit" keyword in "title" and "description" fields, contains "x>2005 and x<2015" in "year" field then list them in "_score" (relevance) order. Note: If the keywords appear in both fields then "type": "most_fields"
scores it higher. This is its priority.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"filtered": {
"query": {
"multi_match": {
"type": "most_fields",
"query": "Title 1 Eltit",
"fields": [
"title",
"description"
]
}
},
"filter": {
"range": {
"year": {
"gt": "2005",
"lt": "2015"
}
}
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`) AGAINST ('Title' IN BOOLEAN MODE) AS _score
FROM post
WHERE (year > 2005 AND year < 2015) AND MATCH(`title`) AGAINST ('Title' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that should contain either "2015" in "year" field or "5.55" in "price" field and must not contain "false" in "is_published" field then list them in "_score" (relevance) order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"filtered": {
"filter": {
"bool": {
"should": [
{
"term": {
"year": "2015"
}
},
{
"term": {
"price": "5.55"
}
}
],
"must_not": {
"term": {
"is_published": "false"
}
}
}
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *
FROM post
WHERE (year = 2015 OR price = '5.55') AND is_published <> false
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that should contain a single "Title" keyword in "title" field, either "2015" in "year" field or "5.55" in "price" field and must not contain "false" in "is_published" field then list them in "_score" (relevance) order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"filtered": {
"query": {
"match": {
"title": "Title"
}
},
"filter": {
"bool": {
"should": [
{
"term": {
"year": "2015"
}
},
{
"term": {
"price": "5.55"
}
}
],
"must_not": {
"term": {
"is_published": "false"
}
}
}
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`) AGAINST ('Title' IN BOOLEAN MODE) AS _score
FROM post
WHERE (year = 2015 OR price = '5.55') AND is_published <> false AND MATCH(`title`) AGAINST ('Title' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that should contain a multiple "Title 1 Eltit" keyword in "title" and "description" fields, either "2015" in "year" field or "5.55" in "price" field and must not contain "false" in "is_published" field then list them in "_score" (relevance) order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"filtered": {
"query": {
"multi_match": {
"type": "most_fields",
"query": "Title 1 Eltit",
"fields": [
"title",
"description"
]
}
},
"filter": {
"bool": {
"should": [
{
"term": {
"year": "2015"
}
},
{
"term": {
"price": "5.55"
}
}
],
"must_not": {
"term": {
"is_published": "false"
}
}
}
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`, `description`) AGAINST ('Title 1 Eltit' IN BOOLEAN MODE) AS _score
FROM post
WHERE (year = 2015 OR price = '5.55') AND is_published <> false AND MATCH(`title`, `description`) AGAINST ('Title 1 Eltit' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Exact value search 10 records that contain a multiple "2000" and "2010" keywords in "year" field then list them in "id ASC" order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"filtered": {
"filter": {
"terms": {
"year": [
2000,
2010
]
}
}
}
},
"sort": [
{
"id": {
"order": "asc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *
FROM post
WHERE year IN (2000, 2010)
ORDER BY id ASC
LIMIT 10
OFFSET 0;
Full-text search 10 records that must contain a multiple "Title" in "title" field and "Cript" in "description" fields then list them in "id ASC" and "_score" (relevance) order.
# Elasticsearch
curl -XGET "http://127.0.0.1:9203/_search?post_dev" -d'
{
"query": {
"bool": {
"must": [
{
"match": {
"title": "Title"
}
},
{
"match": {
"description": "Cript"
}
}
]
}
},
"sort": [
{
"id": {
"order": "asc"
}
},
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`, `description`) AGAINST ('+Title +Cript' IN BOOLEAN MODE) AS _score
FROM post
WHERE MATCH(`title`, `description`) AGAINST ('+Title +Cript' IN BOOLEAN MODE)
ORDER BY id ASC, _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that must contain either "one", "two" or "three" in "title" field then list them in "_score" (relevance) order.
# Elasticsearch
curl -XPOST "http://127.0.0.1:9200/_search?post_dev" -d'
{
"query": {
"bool": {
"must": {
"match": {
"title": {
"query": "one two three",
"operator": "or" /* if you remove this line, it would still run OR query because is it default behaviour */
}
}
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`) AGAINST ('one two three' IN BOOLEAN MODE) AS _score
FROM post
WHERE MATCH(`title`) AGAINST ('one two three' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;
Full-text search 10 records that must contain all three "one", "two" and "three" in "title" field then list them in "_score" (relevance) order.
# Elasticsearch
curl -XPOST "http://127.0.0.1:9200/_search?post_dev" -d'
{
"query": {
"bool": {
"must": {
"match": {
"title": {
"query": "one two three",
"operator": "and"
}
}
}
}
},
"sort": [
{
"_score": {
"order": "desc"
}
}
],
"from": "0",
"size": "10"
}'
# SQL
SELECT *, MATCH(`title`) AGAINST ('+one +two +three' IN BOOLEAN MODE) AS _score
FROM post
WHERE MATCH(`title`) AGAINST ('+one +two +three' IN BOOLEAN MODE)
ORDER BY _score DESC
LIMIT 10
OFFSET 0;