smartcn_stop token filter
The smartcn_stop
token filter filters out stopwords defined by smartcn
analyzer (_smartcn_
), and any other custom stopwords specified by the user. This filter only supports the predefined _smartcn_
stopwords list. If you want to use a different predefined list, then use the stop
token filter instead.
PUT smartcn_example
{
"settings": {
"index": {
"analysis": {
"analyzer": {
"smartcn_with_stop": {
"tokenizer": "smartcn_tokenizer",
"filter": [
"porter_stem",
"my_smartcn_stop"
]
}
},
"filter": {
"my_smartcn_stop": {
"type": "smartcn_stop",
"stopwords": [
"_smartcn_",
"stack",
"的"
]
}
}
}
}
}
}
GET smartcn_example/_analyze
{
"analyzer": "smartcn_with_stop",
"text": "哈喽,我们是 Elastic 我们是 Elastic Stack(Elasticsearch、Kibana、Beats 和 Logstash)的开发公司。从股票行情到 Twitter 消息流,从 Apache 日志到 WordPress 博文,我们可以帮助人们体验搜索的强大力量,帮助他们以截然不同的方式探索和分析数据"
}
The above request returns:
{
"tokens": [
{
"token": "哈",
"start_offset": 0,
"end_offset": 1,
"type": "word",
"position": 0
},
{
"token": "喽",
"start_offset": 1,
"end_offset": 2,
"type": "word",
"position": 1
},
{
"token": "我们",
"start_offset": 3,
"end_offset": 5,
"type": "word",
"position": 3
},
{
"token": "是",
"start_offset": 5,
"end_offset": 6,
"type": "word",
"position": 4
},
{
"token": "elast",
"start_offset": 7,
"end_offset": 14,
"type": "word",
"position": 5
},
{
"token": "我们",
"start_offset": 17,
"end_offset": 19,
"type": "word",
"position": 6
},
{
"token": "是",
"start_offset": 19,
"end_offset": 20,
"type": "word",
"position": 7
},
{
"token": "elast",
"start_offset": 21,
"end_offset": 28,
"type": "word",
"position": 8
},
{
"token": "elasticsearch",
"start_offset": 35,
"end_offset": 48,
"type": "word",
"position": 11
},
{
"token": "kibana",
"start_offset": 49,
"end_offset": 55,
"type": "word",
"position": 13
},
{
"token": "beat",
"start_offset": 56,
"end_offset": 61,
"type": "word",
"position": 15
},
{
"token": "和",
"start_offset": 62,
"end_offset": 63,
"type": "word",
"position": 16
},
{
"token": "logstash",
"start_offset": 64,
"end_offset": 72,
"type": "word",
"position": 17
},
{
"token": "开发",
"start_offset": 74,
"end_offset": 76,
"type": "word",
"position": 20
},
{
"token": "公司",
"start_offset": 76,
"end_offset": 78,
"type": "word",
"position": 21
},
{
"token": "从",
"start_offset": 79,
"end_offset": 80,
"type": "word",
"position": 23
},
{
"token": "股票",
"start_offset": 80,
"end_offset": 82,
"type": "word",
"position": 24
},
{
"token": "行情",
"start_offset": 82,
"end_offset": 84,
"type": "word",
"position": 25
},
{
"token": "到",
"start_offset": 84,
"end_offset": 85,
"type": "word",
"position": 26
},
{
"token": "twitter",
"start_offset": 86,
"end_offset": 93,
"type": "word",
"position": 27
},
{
"token": "消息",
"start_offset": 94,
"end_offset": 96,
"type": "word",
"position": 28
},
{
"token": "流",
"start_offset": 96,
"end_offset": 97,
"type": "word",
"position": 29
},
{
"token": "从",
"start_offset": 98,
"end_offset": 99,
"type": "word",
"position": 31
},
{
"token": "apach",
"start_offset": 100,
"end_offset": 106,
"type": "word",
"position": 32
},
{
"token": "日志",
"start_offset": 107,
"end_offset": 109,
"type": "word",
"position": 33
},
{
"token": "到",
"start_offset": 109,
"end_offset": 110,
"type": "word",
"position": 34
},
{
"token": "wordpress",
"start_offset": 111,
"end_offset": 120,
"type": "word",
"position": 35
},
{
"token": "博",
"start_offset": 121,
"end_offset": 122,
"type": "word",
"position": 36
},
{
"token": "文",
"start_offset": 122,
"end_offset": 123,
"type": "word",
"position": 37
},
{
"token": "我们",
"start_offset": 124,
"end_offset": 126,
"type": "word",
"position": 39
},
{
"token": "可以",
"start_offset": 126,
"end_offset": 128,
"type": "word",
"position": 40
},
{
"token": "帮助",
"start_offset": 128,
"end_offset": 130,
"type": "word",
"position": 41
},
{
"token": "人们",
"start_offset": 130,
"end_offset": 132,
"type": "word",
"position": 42
},
{
"token": "体验",
"start_offset": 132,
"end_offset": 134,
"type": "word",
"position": 43
},
{
"token": "搜索",
"start_offset": 134,
"end_offset": 136,
"type": "word",
"position": 44
},
{
"token": "强大",
"start_offset": 137,
"end_offset": 139,
"type": "word",
"position": 46
},
{
"token": "力量",
"start_offset": 139,
"end_offset": 141,
"type": "word",
"position": 47
},
{
"token": "帮助",
"start_offset": 142,
"end_offset": 144,
"type": "word",
"position": 49
},
{
"token": "他们",
"start_offset": 144,
"end_offset": 146,
"type": "word",
"position": 50
},
{
"token": "以",
"start_offset": 146,
"end_offset": 147,
"type": "word",
"position": 51
},
{
"token": "截然不同",
"start_offset": 147,
"end_offset": 151,
"type": "word",
"position": 52
},
{
"token": "方式",
"start_offset": 152,
"end_offset": 154,
"type": "word",
"position": 54
},
{
"token": "探索",
"start_offset": 154,
"end_offset": 156,
"type": "word",
"position": 55
},
{
"token": "和",
"start_offset": 156,
"end_offset": 157,
"type": "word",
"position": 56
},
{
"token": "分析",
"start_offset": 157,
"end_offset": 159,
"type": "word",
"position": 57
},
{
"token": "数据",
"start_offset": 159,
"end_offset": 161,
"type": "word",
"position": 58
}
]
}