lucene4.7源码研究之索引建立过程(2)
不得不说一句,lucene的源码中大多数地方都留有注释,解释的很详细,阅读起来会很方便
定义分析器
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
以StandardAnalyzer为例,就只加载了stopword和lucene的版本号
IndexWriterConfig初始化,加载了建立索引需要的配置
IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
IndexWriterConfig继承LiveIndexWriterConfig
// used by IndexWriterConfig LiveIndexWriterConfig(Analyzer analyzer, Version matchVersion) { this.analyzer = analyzer;//加载分词器 this.matchVersion = matchVersion;//lucene版本 ramBufferSizeMB = IndexWriterConfig.DEFAULT_RAM_BUFFER_SIZE_MB;//当内存中达到该值,开始flush数据到硬盘,默认16M maxBufferedDocs = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DOCS;//该值与maxBufferedDeleteTerms都为disable_auto_flush,默认值都是-1,自动flush被停止,也就是内存中不允许存储这两个值 maxBufferedDeleteTerms = IndexWriterConfig.DEFAULT_MAX_BUFFERED_DELETE_TERMS;//同上 readerTermsIndexDivisor = IndexWriterConfig.DEFAULT_READER_TERMS_INDEX_DIVISOR;//默认为1,读取term文件的除数 mergedSegmentWarmer = null;//lucene给的解释warm is called before any deletes have been carried over the merged segment | warm是在任何删除操作之前调用,一直延迟到merge完毕 termIndexInterval = IndexWriterConfig.DEFAULT_TERM_INDEX_INTERVAL; // TODO: this should be private to the codec, not settable here //默认跳跃表间距 delPolicy = new KeepOnlyLastCommitDeletionPolicy();//针对索引提交删除的策略,实现IndexDeletionPolicy,注解上说明该策略只会保存最近的一次commit,如果一个commit操作完毕,就直接删除 commit = null; useCompoundFile = IndexWriterConfig.DEFAULT_USE_COMPOUND_FILE_SYSTEM;//是否使用复合索引,默认为true openMode = OpenMode.CREATE_OR_APPEND;//打开模式为创建和追加 similarity = IndexSearcher.getDefaultSimilarity();//设置默认打分策略,vsm mergeScheduler = new ConcurrentMergeScheduler();//段合并调度器,concurrent线程安全 writeLockTimeout = IndexWriterConfig.WRITE_LOCK_TIMEOUT;//写锁超时 indexingChain = DocumentsWriterPerThread.defaultIndexingChain;//索引链 codec = Codec.getDefault();//lucene索引文件header(magic,codecname,version)校验,根据lucene版本区分,default为当前版本 if (codec == null) { throw new NullPointerException(); } infoStream = InfoStream.getDefault();//默认 NoOutput mergePolicy = new TieredMergePolicy();//段合并策略 flushPolicy = new FlushByRamOrCountsPolicy();//flush策略 readerPooling = IndexWriterConfig.DEFAULT_READER_POOLING;//默认不使用reader pool indexerThreadPool = new ThreadAffinityDocumentsWriterThreadPool(IndexWriterConfig.DEFAULT_MAX_THREAD_STATES);//允许在同一个indexWriter进行index操作的最大线程数量,默认是8 perThreadHardLimitMB = IndexWriterConfig.DEFAULT_RAM_PER_THREAD_HARD_LIMIT_MB;//默认1945,每个线程在内存中分配最大的容量 }
其中的各项策略等到真正调用的时候再做分析
郑重声明:本站内容如果来自互联网及其他传播媒体,其版权均属原媒体及文章作者所有。转载目的在于传递更多信息及用于网络分享,并不代表本站赞同其观点和对其真实性负责,也不构成任何其他建议。