elasticsearch - elasticsearch - 単一フィールドの用語頻度を返す

Question

フィールドの用語頻度を取得するためにファセットを使用しようとしています。私のクエリは 1 つのヒットしか返さないので、ファセットが特定のフィールドで最も頻度の高い用語を返すようにしたいと考えています。

私のマッピング:

{
"mappings":{
    "document":{
        "properties":{
            "tags":{
                "type":"object",
                "properties":{
                    "title":{
                        "fields":{
                            "partial":{
                                "search_analyzer":"main",
                                "index_analyzer":"partial",
                                "type":"string",
                                "index" : "analyzed"
                            }
                            "title":{
                                "type":"string",
                                "analyzer":"main",
                                "index" : "analyzed"
                            }
                        },
                        "type":"multi_field"
                    }
                }
            }
        }
    }
},

"settings":{
    "analysis":{
        "filter":{
            "name_ngrams":{
                "side":"front",
                "max_gram":50,
                "min_gram":2,
                "type":"edgeNGram"
            }
        },

        "analyzer":{
            "main":{
                "filter": ["standard", "lowercase", "asciifolding"],
                "type": "custom",
                "tokenizer": "standard"
            },
            "partial":{
                "filter":["standard","lowercase","asciifolding","name_ngrams"],
                "type": "custom",
                "tokenizer": "standard"
            }
        }
    }
}

}

テストデータ：

 curl -XPUT localhost:9200/testindex/document -d '{"tags": {"title": "people also kill people"}}'

クエリ:

 curl -XGET 'localhost:9200/testindex/document/_search?pretty=1' -d '
{
    "query":
    {
       "term": { "tags.title": "people" }
    },
    "facets": {
       "popular_tags": { "terms": {"field": "tags.title"}}
    }
}'

この結果

"hits" : {
   "total" : 1,
    "max_score" : 0.99381393,
    "hits" : [ {
    "_index" : "testindex",
    "_type" : "document",
    "_id" : "uI5k0wggR9KAvG9o7S7L2g",
    "_score" : 0.99381393, "_source" : {"tags": {"title": "people also kill people"}}
 } ]
},
"facets" : {
  "popular_tags" : {
  "_type" : "terms",
  "missing" : 0,
  "total" : 3,
  "other" : 0,
  "terms" : [ {
    "term" : "people",
    "count" : 1            // I expect this to be 2
   }, {
    "term" : "kill",
    "count" : 1
  }, {
    "term" : "also",
    "count" : 1
  } ]
}

}

上記の結果は私が望むものではありません。度数を2にしたい

"hits" : {
   "total" : 1,
   "max_score" : 0.99381393,
   "hits" : [ {
   "_index" : "testindex",
   "_type" : "document",
   "_id" : "uI5k0wggR9KAvG9o7S7L2g",
   "_score" : 0.99381393, "_source" : {"tags": {"title": "people also kill people"}}
} ]
},
"facets" : {
"popular_tags" : {
  "_type" : "terms",
  "missing" : 0,
  "total" : 3,
  "other" : 0,
  "terms" : [ {
    "term" : "people",
    "count" : 2            
  }, {
    "term" : "kill",
    "count" : 1
  }, {
    "term" : "also",
    "count" : 1
  } ]
 }
}

どうすればこれを達成できますか? ファセットは間違った方法ですか？

score 0 · Accepted Answer

残念ながら、フィールドの用語頻度は Elastic では利用できません。GitHub プロジェクトのIndex TermListは Lucene の Terms と連携しており、すべてのドキュメントの合計発生数を計算します。それを確認して、必要に応じて切り替えることができます。

elasticsearch - elasticsearch - 単一フィールドの用語頻度を返す

2 に答える 2

Related

Reference