Friday 13 November 2020

Elasticsearch/Kubernetes/Logstash(ELK) - Part 3

Elastic Search Query & Practice

Everything in Elasticsearch is HTTP and the body uses the JSON format to query the Elasticsearch. This language to query elasticsearch is DSL which will match your search criteria and provide some kind of relevancy score, more occurrences of the words in the document is considered to be more relevant(relevancy score).

Syntax for DSL components

Query: Query context which is used for full text searches and that's supposed to match documents that contain the search criteria as well to specify how well the document matched that particular search criteria by providing that relevancy score.

Filter: Filter context is mostly used for filtering structured data  

But both the query context and the filter context can be combined together to form one large query.

Index Creation and Query using DSL

Create these below 10 records and get hands on how to use query Elasticsearch. This is mainly for practice, create it one by one in the kibal console which you can practice for querying the searches.

PUT /courses/classroom/1
{
    "name": "Accounting 101",
    "room": "E3",
    "professor": {
        "name": "Thomas Baszo",
        "department": "finance",
        "facutly_type": "part-time",
        "email": "baszot@onuni.com"
        },
    "students_enrolled": 27,
    "course_publish_date": "2015-01-19",
    "course_description": "Act 101 is a course from the business school on the introduction to accounting that teaches students how to read and compose basic financial statements"
}

PUT /courses/classroom/2
{
    "name": "Marketing 101",
    "room": "E4",
    "professor": {
        "name": "William Smith",
        "department": "finance",
        "facutly_type": "part-time",
        "email": "wills@onuni.com"
        },
    "students_enrolled": 18,
    "course_publish_date": "2015-06-21",
    "course_description": "Mkt 101 is a course from the business school on the introduction to marketing that teaches students the fundamentals of market analysis, customer retention and online advertisements"
}

PUT /courses/classroom/3
{
    "name": "Anthropology 230",
    "room": "G11",
    "professor": {
        "name": "Devin Cranford",
        "department": "history",
        "facutly_type": "full-time",
        "email": "devinc@onuni.com"
        },
    "students_enrolled": 22,
    "course_publish_date": "2013-08-27",
    "course_description": "Ant 230 is an intermediate course on human societies and cultures and their development. A focus on the Mayans civilization is rooted in this course"
}

PUT /courses/classroom/4
{
    "name": "Computer Science 101",
    "room": "C12",
    "professor": {
        "name": "Gregg Payne",
        "department": "engineering",
        "facutly_type": "full-time",
        "email": "payneg@onuni.com"
        },
    "students_enrolled": 33,
    "course_publish_date": "2013-08-27",
    "course_description": "CS 101 is a first year computer science introduction teaching fundamental data structures and alogirthms using python. "
}

PUT /courses/classroom/5
{
    "name": "Theatre 410",
    "room": "T18",
    "professor": {
        "name": "Sebastian Hern",
        "department": "art",
        "facutly_type": "part-time",
        "email": ""
        },
    "students_enrolled": 47,
    "course_publish_date": "2013-01-27",
    "course_description": "Tht 410 is an advanced elective course disecting the various plays written by shakespere during the 16th century"
}

PUT /courses/classroom/6
{
    "name": "Cost Accounting 400",
    "room": "E7",
    "professor": {
        "name": "Bill Cage",
        "department": "accounting",
        "facutly_type": "full-time",
        "email": "cageb@onuni.com"
        },
    "students_enrolled": 31,
    "course_publish_date": "2014-12-31",
    "course_description": "Cst Act 400 is an advanced course from the business school taken by final year accounting majors that covers the subject of business incurred costs and how to record them in financial statements"
}

PUT /courses/classroom/7
{
    "name": "Computer Internals 250",
    "room": "C8",
    "professor": {
        "name": "Gregg Payne",
        "department": "engineering",
        "facutly_type": "part-time",
        "email": "payneg@onuni.com"
        },
    "students_enrolled": 33,
    "course_publish_date": "2012-08-20",
    "course_description": "cpt Int 250 gives students an integrated and rigorous picture of applied computer science, as it comes to play in the construction of a simple yet powerful computer system. "
}

PUT /courses/classroom/8
{
    "name": "Accounting Info Systems 350",
    "room": "E3",
    "professor": {
        "name": "Bill Cage",
        "department": "accounting",
        "facutly_type": "full-time",
        "email": "cageb@onuni.com"
        },
    "students_enrolled": 19,
    "course_publish_date": "2014-05-15",
    "course_description": "Act Sys 350 is an advanced course providing students a practical understanding of an accounting system in database technology. Students will use MS Access to build a transaction ledger system"
}

PUT /courses/classroom/9
{
    "name": "Tax Accounting 200",
    "room": "E7",
    "professor": {
        "name": "Thomas Baszo",
        "department": "finance",
        "facutly_type": "part-time",
        "email": "baszot@onuni.com"
        },
    "students_enrolled": 17,
    "course_publish_date": "2016-06-15",
    "course_description": "Tax Act 200 is an intermediate course covering various aspects of tax law"
}

PUT /courses/classroom/10
{
    "name": "Capital Markets 350",
    "room": "E3",
    "professor": {
        "name": "Thomas Baszo",
        "department": "finance",
        "facutly_type": "part-time",
        "email": "baszot@onuni.com"
        },
    "students_enrolled": 13,
    "course_publish_date": "2016-01-11",
    "course_description": "This is an advanced course teaching crucial topics related to raising capital and bonds, shares and other long-term equity and debt financial instrucments"
}


The most basic kind of query in a classic search is the match or query. You can see the _score to find the relevance of the document.
There are more examples in this section for practice purposes.

GET /courses/_search
{
  "query":{
    "match_all": {}
  }
}

GET /courses/_search
{
  "query":{
    "match": {"name":"computer"}
  }
}

PUT /courses/classroom/5
{
    "name": "Theatre 410",
    "room": "T18",
    "professor": {
        "name": "Sebastian Hern",
        "department": "art",
        "facutly_type": "part-time"
        },
    "students_enrolled": 47,
    "course_publish_date": "2013-01-27",
    "course_description": "Tht 410 is an advanced elective course disecting the various plays written by shakespere during the 16th century"
}

displays all the records which exists in the field

GET /courses/_search
{
  "query":{
    "exists": {"field":"professor.email"}
  }
}

Match multiple criteria

GET /courses/_search
{
  "query":{
    "bool": {
      "must": [
        {"match": {"name":"computer"}},
        {"match": {"room": "c8"}}
      ]
    }
  }
}


Multi match criteria

GET /courses/_search
{
  "query":{
    "multi_match":{
      "fields": ["name","professor.name"],
      "query": "accounting"
    }
  }
}


Searches for the string with exact words

GET /courses/_search
{
  "query":{
    "match_phrase":{
      "course_description": "financial statements"
    }
  }
}


Searches for the words with partial contexts also.

GET /courses/_search
{
  "query":{
    "match_phrase_prefix":{
      "course_description": "financial statements"
    }
  }
}


Search in range

GET courses/_search
{
  "query": {
    "range": {
      "students_enrolled": {
        "gte": 10,
        "lte": 15
      }
    }
  }
}

Search for date

GET courses/_search
{
  "query": {
    "range": {
      "course_publish_date": {
        "gte": 2013
      }
    }
  }
}


Combine two or more queries.

GET courses/_search
{
  "query": {
    "bool": {
      "must": [
        {"match": {
          "name": "accounting"
        }}
      ]
      , "must_not": [
        {"match": {
          "room": "e7"
        }}
      ]
      , "should": [
        {"range": {
          "students_enrolled": {
            "gte": 10,
            "lte": 20
          }
        }}
      ]
    }
  }
}


Filters

GET courses/_search
{
  "query": {
    "bool": {
      "filter": {
        "bool": {
          "must":[
            {
              "range":{
                "students_enrolled":{
                  "gte": 30
                }
              }
            }
          ]
        }
      }
    }
  }
}


You can create a new set of index, to practice aggregations and filter.
Bulk Indexing

POST /vehicles/cars/_bulk
{ "index": {}}
{ "price" : 10000, "color" : "white", "make" : "honda", "sold" : "2016-10-28", "condition": "okay"}
{ "index": {}}
{ "price" : 20000, "color" : "white", "make" : "honda", "sold" : "2016-11-05", "condition": "new" }
{ "index": {}}
{ "price" : 30000, "color" : "green", "make" : "ford", "sold" : "2016-05-18", "condition": "new" }
{ "index": {}}
{ "price" : 15000, "color" : "blue", "make" : "toyota", "sold" : "2016-07-02", "condition": "good" }
{ "index": {}}
{ "price" : 12000, "color" : "green", "make" : "toyota", "sold" : "2016-08-19" , "condition": "good"}
{ "index": {}}
{ "price" : 18000, "color" : "red", "make" : "dodge", "sold" : "2016-11-05", "condition": "good"  }
{ "index": {}}
{ "price" : 80000, "color" : "red", "make" : "bmw", "sold" : "2016-01-01", "condition": "new"  }
{ "index": {}}
{ "price" : 25000, "color" : "blue", "make" : "ford", "sold" : "2016-08-22", "condition": "new"  }
{ "index": {}}
{ "price" : 10000, "color" : "gray", "make" : "dodge", "sold" : "2016-02-12", "condition": "okay" }
{ "index": {}}
{ "price" : 19000, "color" : "red", "make" : "dodge", "sold" : "2016-02-12", "condition": "good" }
{ "index": {}}
{ "price" : 20000, "color" : "red", "make" : "chevrolet", "sold" : "2016-08-15", "condition": "good" }
{ "index": {}}
{ "price" : 13000, "color" : "gray", "make" : "chevrolet", "sold" : "2016-11-20", "condition": "okay" }
{ "index": {}}
{ "price" : 12500, "color" : "gray", "make" : "dodge", "sold" : "2016-03-09", "condition": "okay" }
{ "index": {}}
{ "price" : 35000, "color" : "red", "make" : "dodge", "sold" : "2016-04-10", "condition": "new" }
{ "index": {}}
{ "price" : 28000, "color" : "blue", "make" : "chevrolet", "sold" : "2016-08-15", "condition": "new" }
{ "index": {}}
{ "price" : 30000, "color" : "gray", "make" : "bmw", "sold" : "2016-11-20", "condition": "good" }

Aggregation
till now we were only searching, now we would do data analytics by insights into data.

Search all data with price in descending order.

GET /vehicles/cars/_search
{
  "from": 0,
  "size": 5

  , "query": {
      "match_all": {}
  }
  , "sort": [
    {
      "price": {
        "order": "desc"
      }
    }
  ]
}


Creating the Aggregation which searches on all the cars which are in color 'red' with price details.

GET /vehicles/cars/_search
{
  "size": 1,
  "query": {
    "match": {
      "color": "red"
    }
  },

  "aggs": {
    "popular_cars": {
      "terms": {
        "field": "make.keyword",
        "size": 10
      }
      , "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        },
          "max_price": {
          "max": {
            "field": "price"
          }
        },
          "min_price": {
          "min": {
            "field": "price"
          }
        }
      }
    }
  }
}

Creating the buckets for the sold date.

GET /vehicles/cars/_search
{
  "aggs": {
    "popular_cars": {
      "terms": {
        "field": "make.keyword",
        "size": 10
      }
      , "aggs": {
        "sold_date_range": {
          "range": {
            "field": "sold",
            "ranges": [
              {
                "from": "2016-01-01",
                "to": "2016-05-18"
              },
              {
                "from": "2016-05-18",
                "to": "2017-01-01"
              }
            ]
          },
          "aggs": {
            "avg_price": {
              "avg": {
                "field": "price"
              }
            }
          }
        }
      }
    }
  }
}


Find the conditions of the cars, their max, min price as an aggregate scoping.

GET /vehicles/cars/_search
{
  "aggs": {
    "car_condition": {
      "terms": {
        "field": "condition.keyword",
        "size": 10
      }
      , "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        },
        "make":{
          "terms": {
            "field": "make.keyword",
            "size": 10
          },
           "aggs":{
            "min_price": { "min": { "field": "price" }},
            "max_price": { "max": { "field": "price" }}
          }
        }
      }
    }
  }
}



No comments:

Post a Comment