Lucene中文分词图解
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
public class TestFileIndex {
public static void main(String[] args) throws Exception {
String dataDir="d:/data";
String indexDir="d:/luceneindex";
File[] files=new File(dataDir).listFiles();
System.out.println(files.length);
Analyzer analyzer=new PaodingAnalyzer();
Directory dir=FSDirectory.open(new File(indexDir));
IndexWriter writer=new IndexWriter(dir, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
for(int i=0;i<files.length;i++){
StringBuffer strBuffer=new StringBuffer();
String line="";
FileInputStream is=new FileInputStream(files[i].getCanonicalPath());
BufferedReader reader=new BufferedReader(new InputStreamReader(is,"gb2312"));
line=reader.readLine();
while(line != null){
strBuffer.append(line);
strBuffer.append("\n");
line=reader.readLine();
}
Document doc=new Document();
doc.add(new Field("fileName", files[i].getName(), Field.Store.YES, Field.Index.ANALYZED));
doc.add(new Field("contents", strBuffer.toString(), Field.Store.YES, Field.Index.ANALYZED));
writer.addDocument(doc);
reader.close();
is.close();
}
writer.optimize();
writer.close();
dir.close();
System.out.println("ok");
}
}
import java.io.File;
import net.paoding.analysis.analyzer.PaodingAnalyzer;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class TestFileSearcher {
public static void main(String[] args) throws Exception {
String indexDir = "d:/luceneindex";
Analyzer analyzer = new PaodingAnalyzer();
Directory dir = FSDirectory.open(new File(indexDir));
IndexSearcher searcher = new IndexSearcher(dir, true);
QueryParser parser = new QueryParser(Version.LUCENE_29, "contents",analyzer);
Query query = parser.parse("呼救");
//Term term=new Term("fileName", "大学");
//TermQuery query=new TermQuery(term);
TopDocs docs=searcher.search(query, 1000);
ScoreDoc[] hits=docs.scoreDocs;
System.out.println(hits.length);
for(int i=0;i<hits.length;i++){
Document doc=searcher.doc(hits[i].doc);
System.out.print(doc.get("fileName")+"--:\n");
System.out.println(doc.get("contents")+"\n");
}
searcher.close();
dir.close();
}
}
本文出自 “李新博客” 博客,请务必保留此出处http://kinglixing.blog.51cto.com/3421535/702663
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。