smartcn_stop token filter
The smartcn_stop
token filter filters out stopwords defined by smartcn
analyzer (_smartcn_
), and any other custom stopwords specified by the user. This filter only supports the predefined _smartcn_
stopwords list. If you want to use a different predefined list, then use the stop
token filter instead.
PUT smartcn_example
"settings": {
"index": {
"analysis": {
"analyzer": {
"smartcn_with_stop": {
"tokenizer": "smartcn_tokenizer",
"filter": [
"filter": {
"my_smartcn_stop": {
"type": "smartcn_stop",
"stopwords": [
GET smartcn_example/_analyze
"analyzer": "smartcn_with_stop",
"text": "哈喽,我们是 Elastic 我们是 Elastic Stack(Elasticsearch、Kibana、Beats 和 Logstash)的开发公司。从股票行情到 Twitter 消息流,从 Apache 日志到 WordPress 博文,我们可以帮助人们体验搜索的强大力量,帮助他们以截然不同的方式探索和分析数据"
The above request returns:
"tokens": [
"token": "哈",
"start_offset": 0,
"end_offset": 1,
"type": "word",
"position": 0
"token": "喽",
"start_offset": 1,
"end_offset": 2,
"type": "word",
"position": 1
"token": "我们",
"start_offset": 3,
"end_offset": 5,
"type": "word",
"position": 3
"token": "是",
"start_offset": 5,
"end_offset": 6,
"type": "word",
"position": 4
"token": "elast",
"start_offset": 7,
"end_offset": 14,
"type": "word",
"position": 5
"token": "我们",
"start_offset": 17,
"end_offset": 19,
"type": "word",
"position": 6
"token": "是",
"start_offset": 19,
"end_offset": 20,
"type": "word",
"position": 7
"token": "elast",
"start_offset": 21,
"end_offset": 28,
"type": "word",
"position": 8
"token": "elasticsearch",
"start_offset": 35,
"end_offset": 48,
"type": "word",
"position": 11
"token": "kibana",
"start_offset": 49,
"end_offset": 55,
"type": "word",
"position": 13
"token": "beat",
"start_offset": 56,
"end_offset": 61,
"type": "word",
"position": 15
"token": "和",
"start_offset": 62,
"end_offset": 63,
"type": "word",
"position": 16
"token": "logstash",
"start_offset": 64,
"end_offset": 72,
"type": "word",
"position": 17
"token": "开发",
"start_offset": 74,
"end_offset": 76,
"type": "word",
"position": 20
"token": "公司",
"start_offset": 76,
"end_offset": 78,
"type": "word",
"position": 21
"token": "从",
"start_offset": 79,
"end_offset": 80,
"type": "word",
"position": 23
"token": "股票",
"start_offset": 80,
"end_offset": 82,
"type": "word",
"position": 24
"token": "行情",
"start_offset": 82,
"end_offset": 84,
"type": "word",
"position": 25
"token": "到",
"start_offset": 84,
"end_offset": 85,
"type": "word",
"position": 26
"token": "twitter",
"start_offset": 86,
"end_offset": 93,
"type": "word",
"position": 27
"token": "消息",
"start_offset": 94,
"end_offset": 96,
"type": "word",
"position": 28
"token": "流",
"start_offset": 96,
"end_offset": 97,
"type": "word",
"position": 29
"token": "从",
"start_offset": 98,
"end_offset": 99,
"type": "word",
"position": 31
"token": "apach",
"start_offset": 100,
"end_offset": 106,
"type": "word",
"position": 32
"token": "日志",
"start_offset": 107,
"end_offset": 109,
"type": "word",
"position": 33
"token": "到",
"start_offset": 109,
"end_offset": 110,
"type": "word",
"position": 34
"token": "wordpress",
"start_offset": 111,
"end_offset": 120,
"type": "word",
"position": 35
"token": "博",
"start_offset": 121,
"end_offset": 122,
"type": "word",
"position": 36
"token": "文",
"start_offset": 122,
"end_offset": 123,
"type": "word",
"position": 37
"token": "我们",
"start_offset": 124,
"end_offset": 126,
"type": "word",
"position": 39
"token": "可以",
"start_offset": 126,
"end_offset": 128,
"type": "word",
"position": 40
"token": "帮助",
"start_offset": 128,
"end_offset": 130,
"type": "word",
"position": 41
"token": "人们",
"start_offset": 130,
"end_offset": 132,
"type": "word",
"position": 42
"token": "体验",
"start_offset": 132,
"end_offset": 134,
"type": "word",
"position": 43
"token": "搜索",
"start_offset": 134,
"end_offset": 136,
"type": "word",
"position": 44
"token": "强大",
"start_offset": 137,
"end_offset": 139,
"type": "word",
"position": 46
"token": "力量",
"start_offset": 139,
"end_offset": 141,
"type": "word",
"position": 47
"token": "帮助",
"start_offset": 142,
"end_offset": 144,
"type": "word",
"position": 49
"token": "他们",
"start_offset": 144,
"end_offset": 146,
"type": "word",
"position": 50
"token": "以",
"start_offset": 146,
"end_offset": 147,
"type": "word",
"position": 51
"token": "截然不同",
"start_offset": 147,
"end_offset": 151,
"type": "word",
"position": 52
"token": "方式",
"start_offset": 152,
"end_offset": 154,
"type": "word",
"position": 54
"token": "探索",
"start_offset": 154,
"end_offset": 156,
"type": "word",
"position": 55
"token": "和",
"start_offset": 156,
"end_offset": 157,
"type": "word",
"position": 56
"token": "分析",
"start_offset": 157,
"end_offset": 159,
"type": "word",
"position": 57
"token": "数据",
"start_offset": 159,
"end_offset": 161,
"type": "word",
"position": 58