solr - Solr (Open Solr) のサジェスターの結果に句読点が含まれている

Question

私は提案者に取り組んでおり、返される結果には句読点が含まれています。たとえば、「Volcan」と入力すると、次のようになります。

"volcanoes", "volcanic", "volcano", "volcano,", <- コンマ "volcanoes." <- ピリオド/ピリオド

solrconfig.xml ファイルのコードは次のとおりです。

<searchComponent class="solr.SpellCheckComponent" name="suggest">
  <lst name="spellchecker">
    <str name="name">suggest</str>
    <str name="classname">org.apache.solr.spelling.suggest.Suggester</str>
    <str name="lookupImpl">org.apache.solr.spelling.suggest.tst.TSTLookup</str>
    <str name="field">text</str>
    <float name="threshold">0.005</float>
    <str name="buildOnCommit">true</str>
  </lst>
</searchComponent>
<requestHandler class="org.apache.solr.handler.component.SearchHandler" name="/suggest">
  <lst name="defaults">
    <str name="echoParams">explicit</str>
    <str name="spellcheck">true</str>
    <str name="spellcheck.dictionary">suggest</str>
    <str name="spellcheck.onlyMorePopular">true</str>
    <str name="spellcheck.count">5</str>
    <str name="spellcheck.collate">true</str>
  </lst>
  <lst name="invariants">
      <!-- always run the Suggester for queries to this handler -->
      <str name="spellcheck">true</str>
      <!-- collate not needed, query if tokenized as keyword, we need only suggestions for that term -->
      <str name="spellcheck.collate">false</str>
  </lst>
  <arr name="components">
    <str>suggest</str>
  </arr>
</requestHandler>

schema.xml ファイルには、次のものがあります。

<fieldType name="spell" class="solr.TextField" positionIncrementGap="100" indexed="true" stored="false" multiValued="true" termVectors="true" termPositions="true" termOffsets="true">
  <analyzer type="index">
    <tokenizer class="solr.StandardTokenizerFactory"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
    <filter class="solr.StandardFilterFactory"/>
    <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    <filter class="solr.ShingleFilterFactory"
                    minShingleSize="2"
                    maxShingleSize="4"
                    outputUnigrams="true"
                    outputUnigramsIfNoShingles="true"/>
    <filter class="solr.LowerCaseFilterFactory"/>
  </analyzer>
  <analyzer type="query">
    <tokenizer class="solr.KeywordTokenizerFactory"/>
    <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
    <filter class="solr.TrimFilterFactory"/>
    <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
    <filter class="solr.StandardFilterFactory"/>
    <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
    <filter class="solr.LowerCaseFilterFactory"/>
  </analyzer>
</fieldType>

結果は次のとおりです。

{
    "responseHeader": {
        "status": 0,
        "QTime": 0,
        "params": {
            "wt": "json",
            "q": "volcan"
        }
    },
    "spellcheck": {
        "suggestions": [
            "volcan",
            {
                "numFound": 5,
                "startOffset": 0,
                "endOffset": 6,
                "suggestion": [
                    "volcanoes",
                    "volcanic",
                    "volcano",
                    "volcano,",
                    "volcanoes."
                ]
            }
        ]
    }
}

solr - Solr (Open Solr) のサジェスターの結果に句読点が含まれている

1 に答える 1

Related

Reference