django - インデックスの構築に Ngram と Edgengram を使用しているにもかかわらず、Elasticsearch+Haystack で部分検索が機能しない

Question

私は次のようなインデックスを構築しています:

class BookIndex(indexes.SearchIndex,indexes.Indexable):

text= indexes.EdgeNgramField(document=True,use_template=True)
content_auto = indexes.EdgeNgramField(model_attr='title')
isbn_13 = indexes.CharField(model_attr='isbn_13')
validate = indexes.IntegerField(model_attr='validate')
price = indexes.IntegerField(model_attr='price')
authors = indexes.EdgeNgramField()
reviews = indexes.CharField()
publishers = indexes.EdgeNgramField()
institutes = indexes.EdgeNgramField()
sellers = indexes.CharField()
category = indexes.CharField()
sub_category = indexes.CharField()

Ngram を使用してみましたが、部分検索が機能しません。

SearchQuerySet().all().filter(content=query)私も試したようにクエリしていSearchQuerySet().filter(content__contains=query)ますが、部分一致の結果は表示されません。

誰かが私を助けてくれますか？

score 1 · Accepted Answer

Haystack は ElasticSearch にあまり適していません。適切なインデックス値を使用できないため、カスタム ElasticSearchBackEnd を提供して有効にする必要があります。

#in a search_backends.py file
from django.conf import settings
from haystack.backends.elasticsearch_backend import (
    ElasticsearchSearchBackend,
    ElasticsearchSearchEngine
)
from haystack.fields import EdgeNgramField as BaseEdgeNgramField, NgramField as BaseNgramField
from haystack.indexes import CharField

#just an example of which degree of configuration could be possible
CUSTOM_FIELD_TYPE = {
    'completion': {
        'type': 'completion',
        'payloads': True,
        'analyzer': 'suggest_analyzer',
        'preserve_separators': True,
        'preserve_position_increments': False
    },
}

# Custom Backend
class CustomElasticBackend(ElasticsearchSearchBackend):

    DEFAULT_ANALYZER = None

    def __init__(self, connection_alias, **connection_options):
        super(CustomElasticBackend, self).__init__(
                                connection_alias, **connection_options)
        user_settings = getattr(settings, 'ELASTICSEARCH_INDEX_SETTINGS', None)
        self.DEFAULT_ANALYZER = getattr(settings, 'ELASTICSEARCH_DEFAULT_ANALYZER', "snowball")
        if user_settings:
            setattr(self, 'DEFAULT_SETTINGS', user_settings)

    def build_schema(self, fields):
        content_field_name, mapping = super(CustomElasticBackend,
                                              self).build_schema(fields)

        for field_name, field_class in fields.items():
            field_mapping = mapping[field_class.index_fieldname]

            index_analyzer = getattr(field_class, 'index_analyzer', None)
            search_analyzer = getattr(field_class, 'search_analyzer', None)
            field_analyzer = getattr(field_class, 'analyzer', self.DEFAULT_ANALYZER)

            if field_mapping['type'] == 'string' and field_class.indexed:
                field_mapping["term_vector"] = "with_positions_offsets"
                if not hasattr(field_class, 'facet_for') and not field_class.field_type in('ngram', 'edge_ngram'):
                    field_mapping['analyzer'] = field_analyzer

            if field_class.field_type in CUSTOM_FIELD_TYPE:
                field_mapping = CUSTOM_FIELD_TYPE.get(field_class.field_type).copy()

            if index_analyzer and search_analyzer:
                field_mapping['index_analyzer'] = index_analyzer
                field_mapping['search_analyzer'] = search_analyzer
                if 'analyzer' in field_mapping:
                    del(field_mapping['analyzer'])

            mapping.update({field_class.index_fieldname: field_mapping})
        return (content_field_name, mapping)


class CustomElasticSearchEngine(ElasticsearchSearchEngine):
    backend = CustomElasticBackend


# Custom fields, just use the ones you need or create yours
class CustomFieldMixin(object):

    def __init__(self, **kwargs):
        self.analyzer = kwargs.pop('analyzer', None)
        self.index_analyzer = kwargs.pop('index_analyzer', None)
        self.search_analyzer = kwargs.pop('search_analyzer', None)
        super(CustomFieldMixin, self).__init__(**kwargs)

class CustomCharField(CustomFieldMixin, CharField):
    pass


class CustomCompletionField(CustomFieldMixin, CharField):
    field_type = 'completion'


class CustomEdgeNgramField(CustomFieldMixin, BaseEdgeNgramField):
    pass


class CustomNgramField(CustomFieldMixin, BaseNgramField):
    pass




#settings.py
ELASTICSEARCH_INDEX_SETTINGS = {
    'settings': {
        "analysis": {
            "analyzer": {
                "custom_analyzer": {
                    "type": "custom",
                    "tokenizer": "standard",
                    "filter":  [ "lowercase", "asciifolding" ]
                },
                "str_index_analyzer" : {
                    "type": "custom",
                    "tokenizer" : "haystack_ngram_tokenizer",
                    "filter" : ["stopwords", "asciifolding", "lowercase", "snowball", "elision", "worddelimiter"]
                },
                "str_search_analyzer" : {
                    "type": "custom",
                    "tokenizer" : "standard",
                    "filter" : ["stopwords", "asciifolding", "lowercase", "snowball", "elision", "worddelimiter"]
                },
                "suggest_analyzer": {
                    "type":"custom",
                    "tokenizer":"standard",
                    "filter":[
                        "stopwords",
                        "standard",
                        "lowercase",
                        "asciifolding"
                    ]
                },
            },
            "tokenizer": {
                "haystack_ngram_tokenizer": {
                    "type": "nGram",
                    "min_gram": 2,
                    "max_gram": 20,
                },
            },
            "filter": {
                "elision": {
                    "type": "elision",
                    "articles": ["l", "m", "t", "qu", "n", "s", "j", "d"]
                },
                "stopwords": {
                    "type": "stop",
                    "stopwords": ["_french_", "_english_"],
                    "ignore_case": True
                },
                "worddelimiter": {
                    "type": "word_delimiter"
                }
            }
        }
    }
}

#Haystack settings
HAYSTACK_CONNECTIONS = {
    'default': {
        ...
        'ENGINE': 'path.to.search_backends.CustomElasticSearchEngine',
        ...
    },
}

score 0 · Accepted Answer

バージョンで使用elasticsearch-2.xすると、この問題が発生します。バージョンがこれらと一致するかどうかを確認してください。django-haystack<2.5

elasticsearch-2.xそれ以降は、 boosthaystack が渡すサポートメタデータではなくなりました。( https://stackoverflow.com/a/36847352/5108155の回答を参照してください)
この問題は2.5haystack のバージョンで修正されました。

インデックスの構築 (または更新) 中elasticsearchに、フィールドに適用する予定の ngram アナライザーを取得できませんでした。手動で実行することでこれを検証できます。これによりcurl 'http://<elasticsearch_address>/<index_name>/?pretty' 、フィールドの型のみが表示され、アナライザープロパティは表示されません。

興味深いのは、クラスの内部silently_failプロパティが原因で、haystack がこの例外をスローしないことです。ElasticSearchBackend

django - インデックスの構築に Ngram と Edgengram を使用しているにもかかわらず、Elasticsearch+Haystack で部分検索が機能しない

2 に答える 2

Related

Reference