私は次のプログラムを持っています:
public class RegexQueryExample {
public static String[] terms = {
"US $65M dollars",
"USA",
"$35",
"355",
"US $33",
"U.S.A",
"John Keates",
"Tom Dick Harry",
"Southeast' Asia"
};
private static Directory directory;
public static void main(String[] args) throws CorruptIndexException, IOException {
String searchString = ".*\\$.*";
createIndex();
searchRegexIndex(searchString);
}
/**
* Creates an index for the files in the data directory.
*/
private static void createIndex() throws CorruptIndexException, LockObtainFailedException, IOException {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_30);
directory = new RAMDirectory();
IndexWriter indexWriter = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
for (String term : terms) {
Document document = new Document();
if (term.indexOf('$') >= 0) {
document.add(new Field("type", "currency", Field.Store.YES, Field.Index.NOT_ANALYZED));
} else {
document.add(new Field("type", "simple_field", Field.Store.YES, Field.Index.NOT_ANALYZED));
}
document.add(new Field("term", term, Field.Store.YES, Field.Index.NOT_ANALYZED));
indexWriter.addDocument(document);
}
indexWriter.close();
}
/**
* searches for a regular expression satisfied by a file path.
*
* @param searchString the string to be searched.
*/
private static void searchRegexIndex(String regexString) throws CorruptIndexException, IOException {
regexString = regexString;
IndexSearcher searcher = new IndexSearcher(directory);
RegexQuery rquery = new RegexQuery(new Term("term", regexString));
BooleanQuery queryin = new BooleanQuery();
BooleanQuery query = new BooleanQuery();
query.add(new TermQuery(new Term("type", "simple_field")), BooleanClause.Occur.MUST);
query.add(rquery, BooleanClause.Occur.MUST);
TopDocs hits = searcher.search(query, terms.length);
ScoreDoc[] alldocs = hits.scoreDocs;
for (int i = 0; i < alldocs.length; i++) {
Document d = searcher.doc(alldocs[i].doc);
System.out.println((i + 1) + ". " + d.get("term"));
}
}
}
この関数は、正規表現クエリを実行しcreateIndex()
ながらLuceneインデックスを作成します。searchRegexIndex()
関数では、記号を含む用語が返されることを期待してmain()
検索します。しかし、それは機能しませんでした。どうすればそれを機能させることができますか?これはアナライザーの問題ですか?.*\\$.*
$
編集:
Lukeからの私のLuceneインデックススナップショット: