ご挨拶の友人、
要点にまっすぐ。Mysql DB に多くの BLOB を保存しました。これらは主に PDF (80%) と .doc です。DBにもテキストがあります。今までインデックスを作成してテキストをクエリできましたが、BLOB にインデックスを付けることができませんでした。単一のコレクション (ドキュメント) を作成しようとしていますが、うまくいきません。そのようなことを行う方法に関するレシピはありますか?
data-config.xml の一部:
<?xml version="1.0" encoding="utf-8"?>
<dataConfig>
<dataSource type="JdbcDataSource"
autoCommit="true" batchSize="-1"
convertType="false"
driver="com.mysql.jdbc.Driver"
url="jdbc:mysql://127.0.0.1:3306/ktimatologio"
user="root"
password="********"
name="db"/>
<dataSource name="fieldReader" type="FieldStreamDataSource" />
<document>
<entity name="aitiologikes_ektheseis"
dataSource="db"
transformer="HTMLStripTransformer"
query="select id, title, title AS grid_title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT( body,' ',title) AS content from aitiologikes_ektheseis where type = 'text'"
deltaImportQuery="select id, title, title AS grid_title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT( body,' ',title) AS content from aitiologikes_ektheseis where type = 'text' and id='${dataimporter.delta.id}'"
deltaQuery="select id, title, title AS grid_title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, CONCAT( body,' ',title) AS content from aitiologikes_ektheseis where type = 'text' and last_modified > '${dataimporter.last_index_time}'">
<field column="id" name="ida" />
<field column="solr_id" name="solr_id" />
<field column="title" name="title" stripHTML="true" />
<field column="grid_title" name="grid_title" stripHTML="true" />
<field column="model" name="model" stripHTML="true" />
<field column="type" name="type" stripHTML="true" />
<field column="url" name="url" stripHTML="true" />
<field column="last_modified" name="last_modified" stripHTML="true" />
<field column="search_tag" name="search_tag" stripHTML="true" />
<field column="content" name="content" stripHTML="true" />
</entity>
<entity name="aitiologikes_ektheseis_bin"
query="select id, title, title AS grid_title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con AS text from aitiologikes_ektheseis where type = 'bin'"
deltaImportQuery="select id, title, title AS grid_title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con AS text from aitiologikes_ektheseis where type = 'bin' and id='${dataimporter.delta.id}'"
deltaQuery="select id, title, title AS grid_title, model, type, url, last_modified, CONCAT_WS('_',id,model) AS solr_id, search_tag, bin_con AS text from aitiologikes_ektheseis where type = 'bin' and last_modified > '${dataimporter.last_index_time}'"
transformer="TemplateTransformer"
dataSource="db">
<field column="id" name="ida" />
<field column="solr_id" name="solr_id" />
<field column="title" name="title" stripHTML="true" />
<field column="grid_title" name="grid_title" stripHTML="true" />
<field column="model" name="model" stripHTML="true" />
<field column="type" name="type" stripHTML="true" />
<field column="url" name="url" stripHTML="true" />
<field column="last_modified" name="last_modified" stripHTML="true" />
<field column="search_tag" name="search_tag" stripHTML="true" />
<entity dataSource="fieldReader" processor="TikaEntityProcessor" dataField="aitiologikes_ektheseis_bin.text" format="text">
<field column="text" name="contentbin" stripHTML="true" />
</entity>
</entity>
...
...
</document>
</dataConfig>
schema.xml の一部 (fieldTypes とフィールド定義):
<fieldType name="text_ktimatologio" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.GreekStemFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt" enablePositionIncrements="true"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.GreekStemFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
<filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
<fieldType name="text" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.GreekStemFilterFactory"/>
<filter class="solr.HunspellStemFilterFactory" dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff" ignoreCase="true" />
</analyzer>
<analyzer type="query">
<charFilter class="solr.HTMLStripCharFilterFactory"/>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.StandardFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_el.txt" enablePositionIncrements="true"/>
<filter class="solr.GreekLowerCaseFilterFactory"/>
<filter class="solr.GreekStemFilterFactory"/>
<filter class="solr.HunspellStemFilterFactory" dictionary="dictionaries/el_GR.dic" affix="dictionaries/el_GR.aff" ignoreCase="true" />
</analyzer>
</fieldType>
<fields>
<field name="ida" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="solr_id" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="title" type="text_ktimatologio" indexed="true" stored="true"/>
<field name="grid_title" type="text_ktimatologio" indexed="true" stored="true"/>
<field name="model" type="string" indexed="true" stored="true" multiValued="false"/>
<field name="type" type="string" indexed="true" stored="true"/>
<field name="url" type="string" indexed="true" stored="true"/>
<field name="last_modified" type="string" indexed="true" stored="true"/>
<field name="search_tag" type="string" indexed="true" stored="true"/>
<field name="contentbin" type="text" indexed="true" stored="true" multiValued="true"/>
<field name="content" type="text_ktimatologio" indexed="true" stored="true" multiValued="true"/>
</fields>
これについて本当に助けが必要です!
敬意を表して、
トム
ギリシャ