使用Lucene.NET实现数据检索功能
- 引言
- 案例概要
- 思路
- 分词
- 索引的管理
1 //指定索引库文件存放文件位置 2 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(this.IndexDataDir), new NativeFSLockFactory()); 3 //判断索引文件目录是否存在 4 bool isExist = IndexReader.IndexExists(directory); 5 if (isExist) 6 { 7 if (IndexWriter.IsLocked(directory)) 8 { 9 IndexWriter.Unlock(directory); 10 } 11 } 12 //盘古分词器 13 PanGuAnalyzer analyzer = new PanGuAnalyzer(); 14 //索引写入类 15 IndexWriter writer = new IndexWriter(directory, analyzer, !isExist, IndexWriter.MaxFieldLength.UNLIMITED); 16 //循环队列执行操作 17 while (IndexDataQueue.Count > 0) 18 { 19 Document document = new Document(); 20 //这是我为索引数据自定义的模型类,主要内容是文件的路径、名称、内容和索引管理的操作类型(新增、更新、删除) 21 BaseDataMode mode = IndexDataQueue.Dequeue(); 22 switch (mode.Type) 23 { 24 case OperationType.Insert: 25 { 26 foreach (KeyValuePair<string, string> kv in mode.Content) 27 { 28 //这里kv.Key是设置索引内字段的名称,kv.Value是这个字段内存储的内容。 29 document.Add(new Field(kv.Key, kv.Value, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS)); 30 } 31 writer.AddDocument(document); 32 }; break; 33 case OperationType.Update: 34 { 35 //设置删除条件 36 MultiFieldQueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { "id" }, analyzer); 37 Query query = parser.Parse(mode.Content["id"]); 38 writer.DeleteDocuments(query); 39 foreach (KeyValuePair<string, string> kv in mode.Content) 40 { 41 document.Add(new Field(kv.Key, kv.Value, Field.Store.YES, Field.Index.ANALYZED,Field.TermVector.WITH_POSITIONS_OFFSETS)); 42 } 43 writer.AddDocument(document); 44 }; break; 45 case OperationType.Delete: 46 { 47 MultiFieldQueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_29, new string[] { "id" }, analyzer); 48 Query query = parser.Parse(mode.Content["id"]); 49 writer.DeleteDocuments(query); 50 }; break; 51 default: { }; break; 52 } 53 } 54 //提交操作 55 writer.Commit(); 56 //优化 57 writer.Optimize(); 58 //关闭连接 59 writer.Close(); 60 directory.Close();
- 文件检索
1 //指定索引库文件存放文件位置 2 FSDirectory directory = FSDirectory.Open(new DirectoryInfo(this.IndexDir), new NativeFSLockFactory()); 3 IndexReader reader = IndexReader.Open(directory, true); 4 IndexSearcher searcher = new IndexSearcher(reader); 5 //设置关键词在条件中为OR关系 6 BooleanQuery queryOr = new BooleanQuery(); 7 foreach (string word in SplitContent.SplitByPanGu(keyword)) 8 { 9 foreach (KeyValuePair<string, string> kv in Mode.Content) 10 { 11 TermQuery query = new TermQuery(new Term(kv.Key, word)); 12 //这里设置条件为Or关系 13 queryOr.Add(query, BooleanClause.Occur.SHOULD); 14 } 15 } 16 //获取搜索结果 17 //1000为搜索文件的下标限制,设置这个可以控制检索的范围,也可以用于分页显示 18 TopDocs tds = searcher.Search(queryOr, null, 1000); 19 ScoreDoc[] docs = tds.scoreDocs; 20 for (int i = 0; i < docs.Length; i++) 21 { 22 int docId = docs[i].doc; 23 Document doc = searcher.Doc(docId); 24 string content = doc.Get("索引内字段的名称"); 25 } 26
- 资源
DLL与词库:http://download.csdn.net/detail/aaakingwin/7208679
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。