Lucene中文分词
package com.fxr.test2; import java.io.BufferedReader; import java.io.File; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import net.paoding.analysis.analyzer.PaodingAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestFileIndex { /** * ??????ж???????????? * @throws IOException */ public static void main(String[] args) throws IOException { String dataDir = "f:/lucenedata"; String indexDir = "f:/ChinesLuceneIndex"; File [] files = new File(dataDir).listFiles(); System.out.println(files.length); Analyzer analyzer = new PaodingAnalyzer(); Directory directory = FSDirectory.getDirectory(indexDir); IndexWriter indexWriter = new IndexWriter(directory,analyzer,true,IndexWriter.MaxFieldLength.LIMITED); for(int i=0;i<files.length;i++){ StringBuffer strBuffer = new StringBuffer(); String line = ""; FileInputStream is = new FileInputStream(files[i].getCanonicalFile()); BufferedReader reader = new BufferedReader(new InputStreamReader(is)); System.out.println(files[i].getName()); line = reader.readLine(); while(line != null){ strBuffer.append(line); strBuffer.append("\n"); line = reader.readLine(); } Document document = new Document(); document.add(new Field("fileName",files[i].getName(),Field.Store.YES,Field.Index.ANALYZED)); document.add(new Field("contents",strBuffer.toString(),Field.Store.YES,Field.Index.ANALYZED)); indexWriter.addDocument(document); is.close(); reader.close(); System.out.println(strBuffer.toString()); } indexWriter.close(); directory.close(); } }
package com.fxr.test2; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestFileSearch { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { String indexDir = "f:/ChinesLuceneIndex"; Directory directory = FSDirectory.getDirectory(indexDir); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc [] hits = null; Term term = new Term("contents","中国"); TermQuery query = new TermQuery(term); TopDocs topDocs = indexSearcher.search(query, 100); hits = topDocs.scoreDocs; for(int i=0;i<hits.length;i++){ Document doc = indexSearcher.doc(hits[i].doc); System.out.print(hits[i].score+" "); System.out.println(doc.get("fileName")+" "); System.out.println(doc.get("contents")+" "); } indexSearcher.close(); directory.close(); } }
package com.fxr.test2; import java.io.IOException; import net.paoding.analysis.analyzer.PaodingAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.document.Field; import org.apache.lucene.index.IndexWriter; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestIndex { /** * 中文分词的效果 * @throws IOException */ public static void main(String[] args) throws IOException { String [] ids = {"1","2","3","4"}; String [] names = {"张三","李逵","zhangsan","zhangsun"}; String [] addresses = {"居住在北京","居住在南京","北京海淀","nanning"}; String [] birthdays = {"19820720","19840203","19770409","19830130"}; Analyzer analyzer = new PaodingAnalyzer(); String indexDir = "f:/ChinesLuceneIndex"; Directory directory = FSDirectory.getDirectory(indexDir); //true表示创建或者覆盖当前的索引,FALSE表示当前的索引进行追加 IndexWriter indexWriter = new IndexWriter(directory,analyzer,true,IndexWriter.MaxFieldLength.LIMITED); for(int i=0;i<ids.length;i++){ Document document = new Document(); document.add(new Field("id",ids[i],Field.Store.YES,Field.Index.ANALYZED)); document.add(new Field("name",names[i],Field.Store.YES,Field.Index.ANALYZED)); document.add(new Field("address",addresses[i],Field.Store.YES,Field.Index.ANALYZED)); document.add(new Field("birthday",birthdays[i],Field.Store.YES,Field.Index.ANALYZED)); indexWriter.addDocument(document); } indexWriter.optimize(); indexWriter.close(); System.out.println("ok!"); } } ============================================ package com.fxr.test2; import java.io.IOException; import net.paoding.analysis.analyzer.PaodingAnalyzer; import org.apache.lucene.analysis.Analyzer; import org.apache.lucene.document.Document; import org.apache.lucene.queryParser.ParseException; import org.apache.lucene.queryParser.QueryParser; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Query; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TopDocCollector; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestQueryParser { /** * @param args * @throws IOException * @throws ParseException */ public static void main(String[] args) throws IOException, ParseException { String indexDir = "f:/ChinesLuceneIndex"; Analyzer analyzer = new PaodingAnalyzer(); Directory directory = FSDirectory.getDirectory(indexDir); IndexSearcher indexSearcher = new IndexSearcher(directory); ScoreDoc [] hits = null; QueryParser parser = new QueryParser("address",analyzer); Query query = parser.parse("(海淀 OR 居住)AND北京"); TopDocCollector topdoc = new TopDocCollector(100); indexSearcher.search(query,topdoc); hits = topdoc.topDocs().scoreDocs; for(int i=0;i<hits.length;i++){ Document doc=indexSearcher.doc(hits[i].doc); System.out.print(hits[i].score+" "); System.out.print(doc.get("id")+" "); System.out.print(doc.get("name")+" "); System.out.print(doc.get("address")+" "); System.out.println(doc.get("birthday")+" "); } indexSearcher.close(); directory.close(); } } ========================================== package com.fxr.test2; import java.io.IOException; import org.apache.lucene.document.Document; import org.apache.lucene.index.Term; import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.PrefixQuery; import org.apache.lucene.search.ScoreDoc; import org.apache.lucene.search.TermQuery; import org.apache.lucene.search.TopDocs; import org.apache.lucene.search.WildcardQuery; import org.apache.lucene.store.Directory; import org.apache.lucene.store.FSDirectory; public class TestSearch { /** * @param args * @throws IOException */ public static void main(String[] args) throws IOException { String indexDir = "f:/ChinesLuceneIndex"; Directory dir = FSDirectory.getDirectory(indexDir); IndexSearcher indexSearcher = new IndexSearcher(dir); ScoreDoc [] hits = null; Term term = new Term("address","*海*"); //TermQuery termQuery = new TermQuery(term); //PrefixQuery prefixQuery = new PrefixQuery(term); WildcardQuery wildcardQuery = new WildcardQuery(term); TopDocs topDocs = indexSearcher.search(wildcardQuery, 100); hits = topDocs.scoreDocs; for(int i=0;i<hits.length;i++){ Document doc = indexSearcher.doc(hits[i].doc); System.out.print(hits[i].score); System.out.print(doc.get("id")+" "); System.out.print(doc.get("name")+" "); System.out.print(doc.get("address")+" "); System.out.println(doc.get("birthday")+" "); } indexSearcher.close(); dir.close(); } }
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。