Bootstrap

Elasticsearch 基于脚本进行 partial update

文字内容整理自 B 站中华石杉的 Elasticsearch 顶尖高手系列课程核心知识篇,教程原本是基于 Groovy 脚本,现在的 Elasticsearch 似乎已经不怎么支持了,我把脚本改为了 painless 的,过程一点都不 painless。

Elasticsearch 有个内置的脚本支持,可以基于 groovy painless 脚本实现各种各样的复杂操作。

先生成一条测试数据。

PUT test_index/_doc/12
{
  "num": 0,
  "tags": []
}

{
  "_index" : "test_index",
  "_type" : "_doc",
  "_id" : "12",
  "_version" : 1,
  "result" : "created",
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "failed" : 0
  },
  "_seq_no" : 4,
  "_primary_term" : 1
}
内置脚本 inline script

POST test_index/_update/11
{
  "script": "ctx._source.num+=1"
}

{
  "error" : {
    "root_cause" : [
      {
        "type" : "illegal_argument_exception",
        "reason" : "failed to execute script"
      }
    ],
    "type" : "illegal_argument_exception",
    "reason" : "failed to execute script",
    "caused_by" : {
      "type" : "script_exception",
      "reason" : "runtime error",
      "script_stack" : [
        "ctx._source.num+=1",
        "                 ^---- HERE"
      ],
      "script" : "ctx._source.num+=1",
      "lang" : "painless",
      "position" : {
        "offset" : 17,
        "start" : 0,
        "end" : 18
      },
      "caused_by" : {
        "type" : "null_pointer_exception",
        "reason" : null
      }
    }
  },
  "status" : 400
}

POST test_index/_update/11
{
  "script": "ctx._source.num+=1",
  "lang": "groovy"
}

{
  "error" : {
    "root_cause" : [
      {
        "type" : "x_content_parse_exception",
        "reason" : "[3:3] [UpdateRequest] unknown field [lang]"
      }
    ],
    "type" : "x_content_parse_exception",
    "reason" : "[3:3] [UpdateRequest] unknown field [lang]"
  },
  "status" : 400
}

Elasticsearch 似乎已经不支持 groovy 了, 从看大概是从 5.6 版本,默认的脚本语言从 groovy 改为了 painless。

POST test_index/_update/11
{
  "script": {
    "lang": "painless",
    "source": "ctx._source.num+=1"
  }
}

{
  "_index" : "test_index",
  "_type" : "_doc",
  "_id" : "11",
  "_version" : 7,
  "result" : "updated",
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "failed" : 0
  },
  "_seq_no" : 8,
  "_primary_term" : 1
}
外部脚本

POST _scripts/test-add-tags
{
  "script": {
    "lang": "groovy",
    "source": "ctx._source.tags+=new_tag"
  }
}

{
  "error" : {
    "root_cause" : [
      {
        "type" : "illegal_argument_exception",
        "reason" : "unable to put stored script with unsupported lang [groovy]"
      }
    ],
    "type" : "illegal_argument_exception",
    "reason" : "unable to put stored script with unsupported lang [groovy]"
  },
  "status" : 400
}

POST _scripts/test-add-tags
{
  "script": {
    "lang": "painless",
    "source": "ctx._source.tags+=new_tag"
  }
}

{
  "acknowledged" : true
}

虽然 painless 的脚本写进去了,但是后来执行的时候却发生了编译错误,似乎是不支持"+=" 操作。于是我把脚本改的简单了一点。

POST _scripts/test-add-tags
{
  "script": {
    "lang": "painless",
    "source": "ctx._source.num += params.my_val"
  }
}

POST test_index/_update/12
{
  "script": {
    "id": "test-add-tags",
    "params": {
      "my_val": 100
    }
  }
}

用脚本删除文档

PUT test_index/_doc/1
{
  "num": 1
}

POST _scripts/test-delete-document
{
  "script": {
    "lang": "painless",
    "source": "ctx.op = ctx._source.num == params.count ? 'delete':'none'"
  }
}

POST test_index/_update/1
{
  "script": {
    "id": "test-delete-document",
    "params": {
      "count": 12
    }
  }
}

{
  "_index" : "test_index",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 3,
  "result" : "updated",
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "failed" : 0
  },
  "_seq_no" : 24,
  "_primary_term" : 1
}

POST test_index/_update/1
{
  "script": {
    "id": "test-delete-document",
    "params": {
      "count": 1
    }
  }
}

{
  "_index" : "test_index",
  "_type" : "_doc",
  "_id" : "1",
  "_version" : 4,
  "result" : "updated",
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "failed" : 0
  },
  "_seq_no" : 25,
  "_primary_term" : 1
}

# Elasticsearch 5.2
# groovy
# ctx.op = ctx._source.num == count ? 'delete' : 'none' 

POST /test_index/test_type/11/_update
{ct
  "script": {
    "lang": "groovy",
    "file": "test-delete-document",
    "params": {
      "count": 1
    }
  }
}

upsert 操作

POST test_index/_update/13
{
  "doc": {
    "num": 13
  }
}

{
  "error" : {
    "root_cause" : [
      {
        "type" : "document_missing_exception",
        "reason" : "[_doc][13]: document missing",
        "index_uuid" : "itFrdXbPQ-ukBl1sp5EbOA",
        "shard" : "0",
        "index" : "test_index"
      }
    ],
    "type" : "document_missing_exception",
    "reason" : "[_doc][13]: document missing",
    "index_uuid" : "itFrdXbPQ-ukBl1sp5EbOA",
    "shard" : "0",
    "index" : "test_index"
  },
  "status" : 404
}

如果指定的 document 不存在,就执行 upsert 中的初始化操作;如果指定的 document 存在,就执行doc 或者 script 指定的 partial update 操作。

POST test_index/_update/13
{
  "script": "ctx._source.num+=1",
  "upsert": {
    "num": 13,
    "tags": []
  }
}

{
  "_index" : "test_index",
  "_type" : "_doc",
  "_id" : "13",
  "_version" : 1,
  "result" : "created",
  "_shards" : {
    "total" : 2,
    "successful" : 2,
    "failed" : 0
  },
  "_seq_no" : 35,
  "_primary_term" : 1
}

如果再次执行,就可以看到执行了内置脚本中的 +=1 操作。