lucene 3.5.0 入门笔记

1. lucene-3.5.0.jar

2. 新建目录C:\testsource,新建目录C:\testindex。

3.在C:\testsource下新建test1.txt, test2.txt,内容分别为:“商务休闲品牌男装西裤衬衫”,“潮流休闲品牌女装裙子大衣”。

4.创建索引

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Date;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * 给text文件建立索引
 * @author [email protected]
 */
public class TextFileIndexer {
	
	public static void main(String[] args) throws Exception{
		
		//text文件路径
		File sourceDir = new File("C:\\testsource");
		File[] sourceFiles = sourceDir.listFiles();
		
		//索引文件路径
		File indexDir = new File("C:\\testindex");
		Directory indexFilesDir = FSDirectory.open(indexDir);
		
		//构建analyzer
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
		
		//配置IndexWriter
		IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_35, analyzer);
		iwConfig.setOpenMode(OpenMode.CREATE);
		
		//构建IndexWriter
		IndexWriter indexWriter = new IndexWriter(indexFilesDir, iwConfig);
		
		long startTime = new Date().getTime();
		for(int i=0; i<sourceFiles.length; i++){
			if(sourceFiles[i].isFile() && sourceFiles[i].getName().endsWith(".txt")){
				System.out.println("\nFile " + sourceFiles[i].getCanonicalPath() + "正在被索引......");
				String temp = fileReaderAll(sourceFiles[i].getCanonicalPath(), "UTF-8");
				System.out.println(temp);
				Field FieldPath = new Field("path", sourceFiles[i].getPath(), Field.Store.YES, Field.Index.NO);
				Field FieldBody = new Field("body", temp, Field.Store.YES, Field.Index.ANALYZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
				Document document = new Document();
				document.add(FieldPath);
				document.add(FieldBody);
				indexWriter.addDocument(document);
			}
		}
		
		//关闭IndexWriter
		indexWriter.close();
		
		long endTime = new Date().getTime();
		System.out.println("\n花费了" + (endTime-startTime) + " 毫秒把文档增加到索引里面去!索引文件地址:" + sourceDir.getPath());
	}

	//读取文件所有内容
	private static String fileReaderAll(String filePath, String charset) throws IOException {
		String line = new String();
		String temp = new String();
		BufferedReader reader = new BufferedReader(new InputStreamReader(new FileInputStream(filePath), charset));
		while((line=reader.readLine())!=null){
			temp += line;
		}
		reader.close();
		return temp;
	}

}

输出结果:


File C:\testsource\test1.txt正在被索引......
商务休闲品牌男装西裤衬衫

File C:\testsource\test2.txt正在被索引......
潮流休闲品牌女装裙子大衣

花费了569 毫秒把文档增加到索引里面去!索引文件地址:C:\testsource



5.关键字检索


import java.io.File;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;

/**
 * 关键字检索
 * @author [email protected]
 */
public class TextQuery {
	
	public static void main(String[] args) throws Exception{
		
		String queryString = "休闲 装";

		//索引文件路径
		String indexDir = "C:\\testindex";
		IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(indexDir)));
		IndexSearcher indexSearcher = new IndexSearcher(indexReader);
		
		Query query = null;
		Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
		QueryParser queryParser = new QueryParser(Version.LUCENE_35, "body", analyzer);
		queryParser.setDefaultOperator(QueryParser.AND_OPERATOR);
		query = queryParser.parse(queryString);
		
		ScoreDoc[] hits = null;
		if(indexSearcher!=null){
			//返回最多为10条记录 
			TopDocs results = indexSearcher.search(query, 10);
			hits = results.scoreDocs;
			if(hits.length>0){
				System.out.println("找到:" + hits.length + " 个结果!");
			}else{
				System.out.println("没有找到");
			}
			indexSearcher.close();
		}
		
	}

}

输出结果:


找到:2 个结果!






郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。