java - Apachelucene検索コードはnullを出力します

Question

 Directory directory = FSDirectory.open(indexDir);
        IndexReader reader = DirectoryReader.open(directory);
        IndexSearcher searcher = new IndexSearcher(reader);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);

        QueryParser parser = new QueryParser(Version.LUCENE_41, "contents", analyzer);
        Query query = parser.parse(queryStr);
        System.out.println("Searching for: " + query.toString("contents"));
        TopDocs results = searcher.search(query, maxHits);

        ScoreDoc[] hits = results.scoreDocs;
        int numTotalHits = results.totalHits;

        System.out.println("\n\n\n-----------------------Results--------------------------\n\n\n");
       System.out.println(numTotalHits + " total matching documents");


        for (int i = 0; i < hits.length; i++) {
            int docId = hits[i].doc;
            Document d = searcher.doc(docId);
            System.out.println(i+":File name is"+d.get("filename"));
        }

        System.out.println("Found " + hits.length);

上記のコードを検索モジュールで使用しました。これでコードは正常に機能しますが、出力は次のようになります

390:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2012-12-31.txt
391:File name isnull
392:File name isnull
393:File name isnull
394:File name isnull
395:File name isnull
396:File name isnull
397:File name isnull
398:File name isnull
399:File name isnull
400:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-09.txt
401:File name isnull
402:File name isnull
403:File name isnull
404:File name isnull
405:File name isnull
406:File name isnull
407:File name isnull
408:File name isnull
409:File name isnull
410:File name is/home/maclean/Installations/apache-tomcat-7.0.21/logs/localhost_access_log.2013-01-10.txt

ここでは、クエリ文字列を含むファイル名のみを出力していますが、結果が多すぎて、ほとんどの結果のファイル名がnullになっています。なぜこれが発生するのですか？

インデックス作成には、このコードを使用しています

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;



public class SimpleFileIndexer {

    public static void main() throws Exception {

        File dataDir = new File("/home/maclean/Installations/apache-tomcat-7.0.21/logs");
        File indexDir = new File("/home/maclean/NetBeansProjects/LogSearchEngine/Result");

        SimpleFileIndexer indexer = new SimpleFileIndexer();

        int numIndex = indexer.index(indexDir, dataDir);

        System.out.println("Total files indexed " + numIndex);

    }

    private int index(File indexDir, File dataDir) throws Exception {
    // API and code to convert text into indexable/searchable tokens.
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_41);
    //To store an index on disk
    Directory directory = FSDirectory.open(indexDir);
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_41, analyzer);
        int numIndexed;
        try (IndexWriter indexWriter = new IndexWriter(directory, config)) {
            indexDirectory(indexWriter, dataDir);
            numIndexed = indexWriter.maxDoc();
            indexWriter.close();

        }

        return numIndexed;


    }

    private void indexDirectory(IndexWriter indexWriter, File dataDir) throws IOException {

        File[] files = dataDir.listFiles();
        for (int i = 0; i < files.length; i++) {
            File f = files[i];
            if (f.isDirectory()) {
                indexDirectory(indexWriter, f);
            }
            else {
                indexFileWithIndexWriter(indexWriter, f);
            }
        }

    }

    private void indexFileWithIndexWriter(IndexWriter indexWriter, File file) throws IOException {

        FileInputStream fis = null;
        if (file.isHidden() || file.isDirectory() || !file.canRead() || !file.exists()) {
            return;
        }

        System.out.println("Indexing file " + file.getCanonicalPath());

        try {
          fis = new FileInputStream(file);
        } catch (FileNotFoundException fnfe) {
          System.out.println("File Not Found"+fnfe);

       }

        Document doc = new Document();
        doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));
        doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

        if (indexWriter.getConfig().getOpenMode() == OpenMode.CREATE) {
          // New index, so we just add the document (no old document can be there):
           System.out.println("adding " + file);
          indexWriter.addDocument(doc);
       } else {
          // Existing index (an old copy of this document may have been indexed) so 
       // we use updateDocument instead to replace the old one matching the exact 
           // path, if present:
            System.out.println("updating " + file);
            indexWriter.updateDocument(new Term("path", file.getPath()), doc);
          }


         fis.close();




    }

}*

score 1 · Accepted Answer

hits 配列は numTotalHits よりも長いため、for ループの制限は hits.length ではなく numTotalHits にする必要があります。

java - Apachelucene検索コードはnullを出力します

1 に答える 1

Related

Reference