2

いくつかの XML ファイルにインデックスを付ける必要があります。ファイルの構造は次のとおりです。

Solr 構成ファイルを編集しました。

データ構成.xml

<dataConfig>
    <dataSource type="FileDataSource" encoding="UTF-8" />
    <document>
    <entity name="PubmedArticle"
        processor="XPathEntityProcessor"
        stream="true"
        pk="pmc"
        forEach="/pmc-articleset/article"
        url="C:\Users\hp\Desktop\idpSOlr - Copy\apache-solr-4.0.0\example\solr\collection1\conf\pmcsampleDownloaded.xml" 
        transformer="RegexTransformer,DateFormatTransformer"
        >
        <field column="journal-title" xpath="/pmc-articleset/article/front/journal-meta/journal-title-group/journal-title"/>
        <filed column="pmc"  xpath="/pmc-articleset/article/front/article-meta/article-id[@pub-id-type='pmc']"/>
        <filed column="pmid"  xpath="/pmc-articleset/article/front/article-meta/article-id[@pub-id-type='pmid']"/>
        <filed column="other"  xpath="/pmc-articleset/article/front/article-meta/article-id[@pub-id-type='other']"/>
        <filed column="doi"  xpath="/pmc-articleset/article/front/article-meta//article-id[@pub-id-type='doi']"/>
        <field column="text" xpath="/pmc-articleset/article/front/article-meta/tittle-group/article-title" />



    </entity>
</document>

および schema.xml

<schema name="pubmed" version="1.1">
<types>
    <fieldType name="nametext" class="solr.TextField" sortMissingLast="true" omitNorms="true"/>         
    <fieldType name="integer" class="solr.IntField" omitNorms="true"/>
    <fieldType name="long" class="solr.LongField" omitNorms="true"/>
    <fieldType name="float" class="solr.FloatField" omitNorms="true"/>
    <fieldType name="double" class="solr.DoubleField" omitNorms="true"/>
    <fieldType name="sint" class="solr.SortableIntField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="slong" class="solr.SortableLongField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sfloat" class="solr.SortableFloatField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="date" class="solr.DateField" sortMissingLast="true" omitNorms="true"/>
    <fieldType name="random" class="solr.RandomSortField" indexed="true" /> 
     <!-- A text field that only splits on whitespace for exact matching of words -->
    <fieldType name="text_ws" class="solr.TextField" positionIncrementGap="100">
  <analyzer>
    <tokenizer class="solr.WhitespaceTokenizerFactory"/>
  </analyzer>
</fieldType>
    <fieldType name="text" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
            <tokenizer class="solr.WhitespaceTokenizerFactory"/>
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
            <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
            <filter class="solr.LowerCaseFilterFactory"/>
            <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
            <filter class="solr.PorterStemFilterFactory"/>
            <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
        </analyzer>
        <analyzer type="query">
            <tokenizer class="solr.WhitespaceTokenizerFactory"/>
            <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
            <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
            <filter class="solr.LowerCaseFilterFactory"/>
            <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
            <filter class="solr.PorterStemFilterFactory"/>
            <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
        </analyzer>
    </fieldType>
    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
        <analyzer type="index">
            <tokenizer class="solr.StandardTokenizerFactory"/>
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
            <filter class="solr.LowerCaseFilterFactory"/>
        </analyzer>
        <analyzer type="query">
            <tokenizer class="solr.StandardTokenizerFactory"/>
            <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
            <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
            <filter class="solr.LowerCaseFilterFactory"/>
        </analyzer>
    </fieldType>
    <fieldType name="alphaOnlySort" class="solr.TextField" sortMissingLast="true" omitNorms="true">
    <analyzer>   
        <tokenizer class="solr.KeywordTokenizerFactory"/>
            <filter class="solr.LowerCaseFilterFactory" />
        <filter class="solr.TrimFilterFactory" />
        <filter class="solr.PatternReplaceFilterFactory"
            pattern="([^a-z])" replacement="" replace="all"
        />
  </analyzer>
</fieldType>
    <fieldtype name="ignored" stored="false" indexed="false" class="solr.StrField" /> 
    <fieldType name="string" class="solr.StrField" sortMissingLast="true" />    
</types>
<fields>
    <field name="journal-title" type="text" indexed="true" stored="true" required="true" multiValued="false" /> 
    <field name="pmc" type="string" indexed="true" stored="true" required="false" multiValued="false" /> 
    <field name="pmid" type="string" indexed="true" stored="true" required="false" multiValued="false" />
    <field name="other"  type="string" indexed="true" stored="true" required="false" multiValued="false" />     
    <field name="doi" type="string" indexed="true" stored="true" required="false" multiValued="false" />        
    <field name="article-title" type="text" indexed="true" stored="true" required="false" multiValued="true" /> 
    <!--<field name="surname" type="string" indexed="true" stored="true" required="true" multiValued="true" /> 
    <field name="given-name" type="string" indexed="true" stored="true" required="true" multiValued="true" />       
    <field name="pub-date_day" type="long" indexed="true" stored="true" required="true" multiValued="true" />-->
    <field name="text" type="text_general" indexed="true" stored="true" required="true" multiValued="true"/>
    <field name="_version_" type="long" indexed="true" stored="true"/>
</fields>
<uniqueKey>pmc</uniqueKey>
<solrQueryParser defaultOperator="OR"/>
</schema>

そして、リクエストハンドラーを次のように構成しました。

<requestHandler name="/dataimport" class="org.apache.solr.handler.dataimport.DataImportHandler">
<lst name="defaults">
<str name="config">data-config.xml</str>
</lst>

http://localhost:8983/solr/dataimport?command=full-import初めてSolr ( ) にフル インポート リクエストを発行すると、次のような応答が返されます。

<response>
    <lst name="responseHeader">
        <int name="status">0</int>
        <int name="QTime">18</int>
    </lst>
    <lst name="initArgs">
        <lst name="defaults">
    <str name="config">data-config.xml</str>
    </lst>
    </lst>
    <str name="command">full-import</str>
    <str name="status">idle</str>
    <str name="importResponse"/>
    <lst name="statusMessages"/>
    <str name="WARNING">
    This response format is experimental. It is likely to change in the future.
    </str>
    </response>

そして、2度目に、私はこの応答を受け取ります:

<response>
    <lst name="responseHeader">
        <int name="status">0</int>
        <int name="QTime">6</int>
    </lst>
    <lst name="initArgs">
    <lst name="defaults">
        <str name="config">data-config.xml</str>
    </lst>
    </lst>
    <str name="command">full-import</str>
    <str name="status">idle</str>
    <str name="importResponse"/>
    <lst name="statusMessages">
        <str name="Total Requests made to DataSource">0</str>
        <str name="Total Rows Fetched">0</str>
        <str name="Total Documents Skipped">0</str>
        <str name="Full Dump Started">2013-10-12 20:45:12</str>
        <str name="">
        Indexing completed. Added/Updated: 0 documents. Deleted 0 documents.
        </str>
        <str name="Committed">2013-10-12 20:45:12</str>
        <str name="Total Documents Processed">0</str>
        <str name="Time taken">0:0:0.270</str>
    </lst>
    <str name="WARNING">
        This response format is experimental. It is likely to change in the future.
    </str>
</response>

Solr にクエリを実行すると、インデックスが作成されたドキュメントがないことがわかりました。誰でもこれで私を助けてくれますか? スキーマとデータ構成は正しいですか?

4

0 に答える 0