Lucene 4.* QueryParser Range Searcher (范围查询)

 

 由于Lucene4.9.1的QueryParser支持类似Solr的范围检索( FL:[ 111 TO 222] ) 但是实际中会出现不准确的现象。

这是由于QueryParser 把范围查询当做String类型。而无论索引时使用LongField 和 TextField 都会出现不准确问题。

解决办法是手动将范围检索改成long类型或int类型。

 

1 自定义QueryParser 。

public class RangeQueryParser extends QueryParser {

    public RangeQueryParser(Version matchVersion, String f, Analyzer a) {
        super(matchVersion, f, a);
    }

    @Override
    protected Query getRangeQuery(String field, String part1, String part2, boolean arg3, boolean arg4) throws ParseException {
        try {
            long num1 = Long.parseLong(part1);
            long num2 = Long.parseLong(part2);
            return NumericRangeQuery.newLongRange(field, num1, num2, true, true);
        } catch (NumberFormatException e) {
            throw new ParseException(e.getMessage());
        }

    }

}

 

2 使用方法:

 

package org.bidlink.index;

import java.io.File;
import java.text.SimpleDateFormat;
import java.util.Date;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.wltea.analyzer.lucene.IKAnalyzer;

import com.bidlink.general.index.RangeQueryParser;
import com.bidlink.general.index.SearcherCallBack;
import com.bidlink.general.index.WordBankConst;

public class TestIndex {

    public void index() throws Exception {
        Directory d = FSDirectory.open(new File("Index"));
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_4_9, new IKAnalyzer());
        IndexWriter iw = new IndexWriter(d, iwc);
        Document doc = new Document();
        doc.add(new TextField("WORD", "测试北京市第102个", Store.YES));
        doc.add(new LongField("ENDDATE", 1234556L, Store.YES));
        iw.addDocument(doc);
        iw.commit();
        iw.close();
    }

    public void search(String fl, String word, SearcherCallBack callBack) throws Exception {
        Directory d = FSDirectory.open(new File("Index"));
        IndexReader r = DirectoryReader.open(d);
        IndexSearcher s = new IndexSearcher(r);
        callBack.maxDocs = s.getIndexReader().maxDoc();
        callBack.numDocs = s.getIndexReader().numDocs();
        QueryParser qp = new RangeQueryParser(Version.LUCENE_4_9, fl, new IKAnalyzer());
        qp.setDefaultOperator(QueryParser.Operator.AND);
        try {

            TopDocs tdocs = s.search(qp.parse(word), 1000);
            callBack.numFound = tdocs.totalHits;
            ScoreDoc scoreDocs[] = tdocs.scoreDocs;
            for (ScoreDoc sDoc : scoreDocs) {
                int docNumber = sDoc.doc;
                Document doc = s.doc(docNumber);
                callBack.process(doc);
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    public static void main(String[] args) throws Exception {
        SearcherCallBack s = new SearcherCallBack() {
            @Override
            public void process(Document doc) {
                // System.out.println(doc.getField(WordBankConst.ID).stringValue());
                System.out.println(doc.getField(WordBankConst.WORD).stringValue());
                SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
                String date = sdf.format(new Date(Long.parseLong(doc.getField(WordBankConst.ENDDATE).stringValue())));
                System.out.println(Long.parseLong(doc.getField(WordBankConst.ENDDATE).stringValue()) + "   " + date);
            }
        };
        TestIndex index = new TestIndex();
        // index.index();
        index.search("", "WORD:地铁  AND ENDDATE:[1111 TO 222222222222 ]", s);
        System.out.println("Doc numFound :  " + s.numFound + "  Max Doc num :" + s.maxDocs);
    }
}

 

郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。