19

同じ内容のドキュメントがいくつかありますが、これらのドキュメントをクエリしようとすると、クエリされたフィールドに同じテキストが含まれているにもかかわらず、異なるスコアが得られます。スコアについて説明しましたが、スコアが異なる理由を分析して見つけることができません。

私のクエリは

 curl 'localhost:9200/acqindex/_search?pretty=1' -d '{
    "explain" : true,
    "query" : {           
        "query_string" : {         
            "query" : "text:shimla"
        }
    }     
  }'

検索応答:

{
  "took" : 8,
  "timed_out" : false,
  "_shards" : {
    "total" : 5,
    "successful" : 5,
    "failed" : 0
  },
  "hits" : {
    "total" : 31208,
    "max_score" : 268.85962,
    "hits" : [ {
      "_shard" : 0,
      "_node" : "KOebAnGhSJKUHLPNxndcpQ",
      "_index" : "acqindex",
      "_type" : "autocomplete_questions",
      "_id" : "50efec6c38cc6fdabd8653a3",
      "_score" : 268.85962, "_source" : {"_class":"com.ixigo.next.cms.model.AutoCompleteObject","_id":"50efec6c38cc6fdabd8653a3","ad":"rajasthan,IN","category":["Destination"],"ctype":"destination","eid":"503b2a65e4b032e338f0d24b","po":8.772307692307692,"text":"shimla","url":"/travel-guide/shimla"},
      "_explanation" : {
        "value" : 268.85962,
        "description" : "sum of:",
        "details" : [ {
          "value" : 38.438133,
          "description" : "weight(text:shi in 5860), product of:",
          "details" : [ {
            "value" : 0.37811017,
            "description" : "queryWeight(text:shi), product of:",
            "details" : [ {
              "value" : 5.0829277,
              "description" : "idf(docFreq=7503, maxDocs=445129)"
            }, {
              "value" : 0.074388266,
              "description" : "queryNorm"
            } ]
          }, {
            "value" : 101.658554,
            "description" : "fieldWeight(text:shi in 5860), product of:",
            "details" : [ {
              "value" : 1.0,
              "description" : "tf(termFreq(text:shi)=1)"
            }, {
              "value" : 5.0829277,
              "description" : "idf(docFreq=7503, maxDocs=445129)"
            }, {
              "value" : 20.0,
              "description" : "fieldNorm(field=text, doc=5860)"
            } ]
          } ]
        }, {
          "value" : 66.8446,
          "description" : "weight(text:shim in 5860), product of:",
          "details" : [ {
            "value" : 0.49862078,
            "description" : "queryWeight(text:shim), product of:",
            "details" : [ {
              "value" : 6.7029495,
              "description" : "idf(docFreq=1484, maxDocs=445129)"
            }, {
              "value" : 0.074388266,
              "description" : "queryNorm"
            } ]
          }, {
            "value" : 134.05899,
            "description" : "fieldWeight(text:shim in 5860), product of:",
            "details" : [ {
              "value" : 1.0,
              "description" : "tf(termFreq(text:shim)=1)"
            }, {
              "value" : 6.7029495,
              "description" : "idf(docFreq=1484, maxDocs=445129)"
            }, {
              "value" : 20.0,
              "description" : "fieldNorm(field=text, doc=5860)"
            } ]
          } ]
        }, {
          "value" : 81.75818,
          "description" : "weight(text:shiml in 5860), product of:",
          "details" : [ {
            "value" : 0.5514458,
            "description" : "queryWeight(text:shiml), product of:",
            "details" : [ {
              "value" : 7.413075,
              "description" : "idf(docFreq=729, maxDocs=445129)"
            }, {
              "value" : 0.074388266,
              "description" : "queryNorm"
            } ]
          }, {
            "value" : 148.2615,
            "description" : "fieldWeight(text:shiml in 5860), product of:",
            "details" : [ {
              "value" : 1.0,
              "description" : "tf(termFreq(text:shiml)=1)"
            }, {
              "value" : 7.413075,
              "description" : "idf(docFreq=729, maxDocs=445129)"
            }, {
              "value" : 20.0,
              "description" : "fieldNorm(field=text, doc=5860)"
            } ]
          } ]
        }, {
          "value" : 81.8187,
          "description" : "weight(text:shimla in 5860), product of:",
          "details" : [ {
            "value" : 0.55164987,
            "description" : "queryWeight(text:shimla), product of:",
            "details" : [ {
              "value" : 7.415818,
              "description" : "idf(docFreq=727, maxDocs=445129)"
            }, {
              "value" : 0.074388266,
              "description" : "queryNorm"
            } ]
          }, {
            "value" : 148.31636,
            "description" : "fieldWeight(text:shimla in 5860), product of:",
            "details" : [ {
              "value" : 1.0,
              "description" : "tf(termFreq(text:shimla)=1)"
            }, {
              "value" : 7.415818,
              "description" : "idf(docFreq=727, maxDocs=445129)"
            }, {
              "value" : 20.0,
              "description" : "fieldNorm(field=text, doc=5860)"
            } ]
          } ]
        } ]
      }
    }, {
      "_shard" : 1,
      "_node" : "KOebAnGhSJKUHLPNxndcpQ",
      "_index" : "acqindex",
      "_type" : "autocomplete_questions",
      "_id" : "50efed1c38cc6fdabd8b8d2f",
      "_score" : 268.29953, "_source" : {"_id":"50efed1c38cc6fdabd8b8d2f","ad":"himachal pradesh,IN","category":["Hill","See and Do","Destination","Mountain","Nature and Wildlife"],"ctype":"destination","eid":"503b2a64e4b032e338f0d0af","po":8.781970310391364,"text":"shimla","url":"/travel-guide/shimla"},
      "_explanation" : {
        "value" : 268.29953,
        "description" : "sum of:",
        "details" : [ {
          "value" : 38.52957,
          "description" : "weight(text:shi in 14769), product of:",
          "details" : [ {
            "value" : 0.37895453,
            "description" : "queryWeight(text:shi), product of:",
            "details" : [ {
              "value" : 5.083667,
              "description" : "idf(docFreq=7263, maxDocs=431211)"
            }, {
              "value" : 0.07454354,
              "description" : "queryNorm"
            } ]
          }, {
            "value" : 101.67334,
            "description" : "fieldWeight(text:shi in 14769), product of:",
            "details" : [ {
              "value" : 1.0,
              "description" : "tf(termFreq(text:shi)=1)"
            }, {
              "value" : 5.083667,
              "description" : "idf(docFreq=7263, maxDocs=431211)"
            }, {
              "value" : 20.0,
              "description" : "fieldNorm(field=text, doc=14769)"
            } ]
          } ]
        }, {
          "value" : 66.67524,
          "description" : "weight(text:shim in 14769), product of:",
          "details" : [ {
            "value" : 0.49850821,
            "description" : "queryWeight(text:shim), product of:",
            "details" : [ {
              "value" : 6.6874766,
              "description" : "idf(docFreq=1460, maxDocs=431211)"
            }, {
              "value" : 0.07454354,
              "description" : "queryNorm"
            } ]
          }, {
            "value" : 133.74953,
            "description" : "fieldWeight(text:shim in 14769), product of:",
            "details" : [ {
              "value" : 1.0,
              "description" : "tf(termFreq(text:shim)=1)"
            }, {
              "value" : 6.6874766,
              "description" : "idf(docFreq=1460, maxDocs=431211)"
            }, {
              "value" : 20.0,
              "description" : "fieldNorm(field=text, doc=14769)"
            } ]
          } ]
        }, {
          "value" : 81.53204,
          "description" : "weight(text:shiml in 14769), product of:",
          "details" : [ {
            "value" : 0.5512571,
            "description" : "queryWeight(text:shiml), product of:",
            "details" : [ {
              "value" : 7.3951015,
              "description" : "idf(docFreq=719, maxDocs=431211)"
            }, {
              "value" : 0.07454354,
              "description" : "queryNorm"
            } ]
          }, {
            "value" : 147.90204,
            "description" : "fieldWeight(text:shiml in 14769), product of:",
            "details" : [ {
              "value" : 1.0,
              "description" : "tf(termFreq(text:shiml)=1)"
            }, {
              "value" : 7.3951015,
              "description" : "idf(docFreq=719, maxDocs=431211)"
            }, {
              "value" : 20.0,
              "description" : "fieldNorm(field=text, doc=14769)"
            } ]
          } ]
        }, {
          "value" : 81.56268,
          "description" : "weight(text:shimla in 14769), product of:",
          "details" : [ {
            "value" : 0.55136067,
            "description" : "queryWeight(text:shimla), product of:",
            "details" : [ {
              "value" : 7.3964915,
              "description" : "idf(docFreq=718, maxDocs=431211)"
            }, {
              "value" : 0.07454354,
              "description" : "queryNorm"
            } ]
          }, {
            "value" : 147.92982,
            "description" : "fieldWeight(text:shimla in 14769), product of:",
            "details" : [ {
              "value" : 1.0,
              "description" : "tf(termFreq(text:shimla)=1)"
            }, {
              "value" : 7.3964915,
              "description" : "idf(docFreq=718, maxDocs=431211)"
            }, {
              "value" : 20.0,
              "description" : "fieldNorm(field=text, doc=14769)"
            } ]
          } ]
        } ]
      }
    }
  }
}

ドキュメントは次のとおりです。

{"_class":"com.ixigo.next.cms.model.AutoCompleteObject","_id":"50efec6c38cc6fdabd8653a3","ad":"ラジャスタン州,IN","category":["Destination"],"ctype" :"destination","eid":"503b2a65e4b032e338f0d24b","po":8.772307692307692, "text":"shimla" ,"url":"/travel-guide/shimla"}

{"_id":"50efed1c38cc6fdabd8b8d2f","ad":"himachal pradesh,IN","category":["丘","見どころとアクティビティ","目的地","山","自然と野生動物"], "ctype":"destination","eid":"503b2a64e4b032e338f0d0af","po":8.781970310391364," text":"shimla" ,"url":"/travel-guide/shimla"}

スコアの違いの理由を理解するために私を導いてください。

4

2 に答える 2

33

lucene スコアは、さまざまな要因によって異なります。tf idf 類似度 (デフォルトのもの) を使用すると、主に以下に依存します。

  1. 用語の頻度: 見つかった用語がドキュメント内でどの程度頻繁に使用されているか
  2. 文書の反転頻度: 見つかった用語が文書の中でどれだけ出現するか (索引中)
  3. フィールド ノルム (インデックス時間ブースティングを含む)。短いフィールドは、長いフィールドよりも高いスコアを取得します。

あなたの場合、2 つのドキュメントが異なるシャードからのものであることを考慮する必要があります。したがって、すべてのシャードは実際には個別の lucene インデックスであるため、スコアはそれぞれのシャードで個別に計算されます。

Elasticsearch がより正確なスコアリングを提供する、より高価なDFS、Query then Fetch 検索タイプを確認することをお勧めします。デフォルトのものは、単純なクエリとフェッチです。

于 2013-01-29T16:00:56.263 に答える
0

javanaは、スコアリングが複数のシャードで発生するという事実からスコアの違いが生じることを示す問題を明確に指摘しました。これらのシャードには、異なる数のドキュメントが含まれる場合があります。これは、スコアリング アルゴリズムに影響します。

ただし、Elasticsearch: The Definitive Guide の著者は次のように通知しています。

ローカル IDF とグローバル IDF [逆ドキュメント頻度] の違いは、インデックスに追加するドキュメントが増えるほど減少します。実際のデータ量では、ローカル IDF はすぐに均等になります。問題は、関連性が失われていることではなく、データが少なすぎることです。

本番環境では dfs_query_then_fetch を使用しないでください。テストのために、インデックスを 1 つのプライマリ シャードに配置するか、?search_type=dfs_query_then_fetch.

于 2020-04-16T15:04:42.647 に答える