jQuery1.11源码分析(4)-----Sizzle工厂函数

浏览数：48 / 时间：2015年06月09日

在用前两篇讲述完正则表达式、初始化、特性检测之后，终于到了我们的正餐——Sizzle工厂函数！

Sizzle工厂函数有四个参数，

selector:选择符

context:查找上下文

results:返回的结果数组

seed:待选元素，刚开始是undefined，但有的情况下Sizzle会递归调用，故那个时候会将待选元素层层传递

当我们要使用Sizzle时，使用频率最高的通常是直接指定单个id、class、tag来获取(通常还指定查找上下文来加速这一过程)，而这种情况下Sizzle做了优化，当判断是这三种情况时，直接调用原生API来获取元素。其次，最快的方法莫过于使用querySelectorAll了，也省去了后续的过滤等各种耗性能的操作。若上述手段都不生效，再采用复杂的查找、过滤等流程。

接下来我们看看源代码。

//对工具函数处理完毕，开始正式的Sizzle工厂函数
//这个seed有什么用？
//现在可以回答这个问题了，因为这个Sizzle会递归调用，这里的seed保留的是已经过粗选的待选元素
function Sizzle( selector, context, results, seed ) {
    console.log(‘Sizzle begin‘);
    console.log(‘arguments(selector, context, results, seed):‘);
    console.log(arguments);
    var match, elem, m, nodeType,
        // QSA vars
        i, groups, old, nid, newContext, newSelector;
    //如果(查找范围的所属文档节点或查找范围)不是当前文档节点，则设置一下文档节点各方面的能力
    if ( ( context ? context.ownerDocument || context : preferredDoc ) !== document ) {
        console.log(‘Sizzle setDocument‘);
        setDocument( context );
    }

    context = context || document;
    results = results || [];
    //如果没有选择符或选择符不是字符串，则直接返回结果。
    if ( !selector || typeof selector !== "string" ) {
        return results;
    }
    //这种写法压缩了一行代码，在用的时候再首次初始化。
    if ( (nodeType = context.nodeType) !== 1 && nodeType !== 9 ) {
        return [];
    }
    //如果没seed才进入，有seed的话说明肯定不是简单匹配了，最后在return的时候才处理
    if ( documentIsHTML && !seed ) {

        // Shortcuts
        //jQuery用的思想是先用简单的方式来执行简单selector
        //这个时候其实应该考虑正则表达式效率。
        if ( (match = rquickExpr.exec( selector )) ) {
            // Speed-up: Sizzle("#ID")
            //match[1]存的是ID
            //match[2]存的是TAG
            //match[3]存的是CLASS
            if ( (m = match[1]) ) {
                if ( nodeType === 9 ) {
                    elem = context.getElementById( m );
                    // Check parentNode to catch when Blackberry 4.6 returns
                    // nodes that are no longer in the document (jQuery #6963)
                    //黑莓4.6会返回不在DOM树里的节点。
                    if ( elem && elem.parentNode ) {
                        // Handle the case where IE, Opera, and Webkit return items
                        // by name instead of ID
                        //有的时候会根据name来返回而不是ID
                        if ( elem.id === m ) {
                            results.push( elem );
                            return results;
                        }
                    } else {
                        return results;
                    }
                } else {
                    // Context is not a document
                    //这一行执行了多种行为，可以学习一下。
                    //先判断context是否有所属文档节点，有的话则先用文档节点的方法获得指定节点(因为只有document才有getElementById)。
                    //获得指定节点之后，再检查context是否包含指定节点，最后检查指定节点的id。
                    //只有文档节点才有getElementById
                    if ( context.ownerDocument && (elem = context.ownerDocument.getElementById( m )) &&
                        contains( context, elem ) && elem.id === m ) {
                        results.push( elem );
                        return results;
                    }
                }

            // Speed-up: Sizzle("TAG")
            } else if ( match[2] ) {
                push.apply( results, context.getElementsByTagName( selector ) );
                return results;

            // Speed-up: Sizzle(".CLASS")
            //要是浏览器没有getElementsByClassName，Sizzle不做任何处理，而不是模仿一个比较慢的API
            } else if ( (m = match[3]) && support.getElementsByClassName && context.getElementsByClassName ) {
                push.apply( results, context.getElementsByClassName( m ) );
                return results;
            }
        }

        // QSA path
        //要是有QSA，且没有带bug的QSA或者选择符不匹配该bugQSA
        if ( support.qsa && (!rbuggyQSA || !rbuggyQSA.test( selector )) ) {

            nid = old = expando;
            newContext = context;
            newSelector = nodeType === 9 && selector;

            // qSA works strangely on Element-rooted queries
            // We can work around this by specifying an extra ID on the root
            // and working up from there (Thanks to Andrew Dupont for the technique)
            // IE 8 doesn‘t work on object elements
            //??????为什么这样写？
            //为什么要给context设置一个id？
            //详情参见http://www.cnblogs.com/snandy/archive/2011/03/30/1999388.html
            if ( nodeType === 1 && context.nodeName.toLowerCase() !== "object" ) {

                //这里是把选择符用词法分析器拆成一个个词元
                //其实就是一个数据结构{value:"div",type:"TAG",matches:"div"}
                //多个词元组成一个词序列tokens，也是一个group
                //以选择器中的逗号为分隔符，多个group组成一个groups数组
                console.log(‘Sizzle tokenize‘);
                groups = tokenize( selector );
                console.log(‘Sizzle tokenize results‘+groups);
                //如果查找范围有属性ID节点则取出来，没有则将ID设为expando
                if ( (old = context.getAttribute("id")) ) {
                    nid = old.replace( rescape, "\\$&" );
                } else {
                    context.setAttribute( "id", nid );
                }
                //nid变为[id="expando"]属性选择符，作为后面选择的查找范围标示。
                //为什么不用#expando的形式？

                nid = "[id=‘" + nid + "‘] ";

                i = groups.length;
                while ( i-- ) {
                    groups[i] = nid + toSelector( groups[i] );
                }
                //如果是要查找兄弟元素，则将查找范围设为原查找范围的父元素
                newContext = rsibling.test( selector ) && testContext( context.parentNode ) || context;
                newSelector = groups.join(",");
            }
            /*因为我是用的是chrome浏览器，测试的时候暂时注释掉这个
            if ( newSelector ) {
                try {
                    push.apply( results,
                        newContext.querySelectorAll( newSelector )
                    );
                    return results;
                } catch(qsaError) {
                } finally {
                    if ( !old ) {
                        context.removeAttribute("id");
                    }
                }
            }*/
        }
    }

    // All others
    //如果上述省时省力的方法都不行的话，则使用真正复杂的方法进行查找
    return select( selector.replace( rtrim, "$1" ), context, results, seed );
}

中间调用了tokenize这个工具函数，把selector转换成groups数组，每一个group又是一个tokens数组，tokens数组由之前说过的一个个token组成。我们来看一下这个工具函数是怎么切割selector的(其实简单来说就是用正则表达式不断地匹配、切割，用剩下的selector再匹配，再切割)

//以后遇到这种工具函数，先拷到外面看输入输出
//当tokenize第二个参数为true时，仅仅返回处理的结果长度
function tokenize( selector, parseOnly ) {
    var matched, match, tokens, type,
        soFar, groups, preFilters,
        cached = tokenCache[ selector + " " ];

    if ( cached ) {
        //??????为什么这里要调用一下slice？这里是数组的slice
        //如果有缓存，parseOnly为true，为什么不返回长度而是0
        return parseOnly ? 0 : cached.slice( 0 );
    }
    //soFar用来存切割剩下的selector
    soFar = selector;
    groups = [];
    preFilters = Expr.preFilter;
    while ( soFar ) {

        // Comma and first run
        //原本这里的写法是!matched || (match = rcomma.exec( soFar )
        //这里的写法应该换一下，换成(match = rcomma.exec( soFar ) || !matched
        //否则$(‘,body‘,document.documentElement)这样的写法会报错
        if ( (match = rcomma.exec( soFar )) || !matched ) {
            //第一次循环不进入
            if ( match ) {
                // Don‘t consume trailing commas as valid
                soFar = soFar.slice( match[0].length ) || soFar;
            }
            groups.push( (tokens = []) );
        }

        matched = false;

        // Combinators
        //先执行看有没有连接符[>+~]
        if ( (match = rcombinators.exec( soFar )) ) {
            matched = match.shift();
            tokens.push({
                value: matched,
                // Cast descendant combinators to space
                type: match[0].replace( rtrim, " " )
            });
            //我切我切
            soFar = soFar.slice( matched.length );
        }

        // Filters
        for ( type in Expr.filter ) {
            //每一次循环都要用过滤器滤一遍
            //若有预处理过滤器，则执行预处理过滤器的写法 !preFilters[ type ] || (match = preFilters[ type ]( match )))
            if ( (match = matchExpr[ type ].exec( soFar )) && (!preFilters[ type ] ||
                (match = preFilters[ type ]( match ))) ) {
                matched = match.shift();
                tokens.push({
                    value: matched,
                    type: type,
                    matches: match
                });
                soFar = soFar.slice( matched.length );
            }
        }
        //当matched再也没有捕获到的元素了，则可以跳出
        if ( !matched ) {
            break;
        }
    }

    // Return the length of the invalid excess
    // if we‘re just parsing
    // Otherwise, throw an error or return tokens
    return parseOnly ?
        soFar.length :
        soFar ?
            Sizzle.error( selector ) :
            // Cache the tokens
            tokenCache( selector, groups ).slice( 0 );
}

这里还有一个工具函数，toSelector，和上面的tokenize的作用刚好相反，即将一个个token给再拼成selector，过程很简单，不放代码了，本文最后再看一个select函数，用于调用各个查找函数(find)，来找到待选集seed。

function select( selector, context, results, seed ) {
    console.log(‘select begin‘);
    console.log(‘arguments:selector, context, results, seed‘);
    console.log(arguments);
    console.log(‘select tokenize‘);
    var i, tokens, token, type, find,
        match = tokenize( selector );
    console.log(‘select after tokenize‘);
    console.log(match);
    if ( !seed ) {
        // Try to minimize operations if there is only one group
        //尝试最小化操作？
        //如果只有一个group
        if ( match.length === 1 ) {

            // Take a shortcut and set the context if the root selector is an ID
            tokens = match[0] = match[0].slice( 0 );
            //如果token的数量大于2，且第一个token的类型是id
            if ( tokens.length > 2 && (token = tokens[0]).type === "ID" &&
                    support.getById && context.nodeType === 9 && documentIsHTML &&
                    Expr.relative[ tokens[1].type ] ) {
                console.log(‘select find id‘);
                context = ( Expr.find["ID"]( token.matches[0].replace(runescape, funescape), context ) || [] )[0];
                if ( !context ) {
                    return results;
                }
                //减去已经用掉的长度
                selector = selector.slice( tokens.shift().value.length );
            }

            // Fetch a seed set for right-to-left matching
            //先检查一下看selector是否必须要查找上下文，比如上来就使用 >之类的连接符或:nth(1)之类的伪方法
            i = matchExpr["needsContext"].test( selector ) ? 0 : tokens.length;
            //如果selector不需要查找上下文，则直接进入下面的循环进行查找seed，否则跳过，交给后面的compile去递归获得seed。
            while ( i-- ) {
                token = tokens[i];

                // Abort if we hit a combinator
                //?????连接符会怎样？
                if ( Expr.relative[ (type = token.type) ] ) {
                    break;
                }
                if ( (find = Expr.find[ type ]) ) {
                    // Search, expanding context for leading sibling combinators
                    console.log(‘select after find‘);
                    if ( (seed = find(
                        token.matches[0].replace( runescape, funescape ),
                        rsibling.test( tokens[0].type ) && testContext( context.parentNode ) || context
                    )) ) {
                        console.log(‘select after find:seed‘);
                        console.log(seed);
                        // If seed is empty or no tokens remain, we can return early
                        //因为是从右向左匹配的，范围会越缩越小，如果这都获得不了seed，说明再缩小范围也没意义
                        //删除掉已经使用过的token
                        tokens.splice( i, 1 );
                        selector = seed.length && toSelector( tokens );
                        console.log(selector);
                        //如果seed.length > 0 且所有token都用完了，则可以直接返回了
                        if ( !selector ) {
                            push.apply( results, seed );
                            return results;
                        }
                        //如果找到了seed，还有没用完的token要过滤，则跳出循环，执行下面的compile
                        break;
                    }
                }
            }
        }
    }

    // Compile and execute a filtering function
    // Provide `match` to avoid retokenization if we modified the selector above
    //编译好的matcher串，参数为seed,context,xml,result,outermostContext
    console.log(‘select compile‘);
    compile( selector, match )(
        seed,
        context,
        !documentIsHTML,
        results,
        rsibling.test( selector ) && testContext( context.parentNode ) || context
    );
    console.log(‘select after compile‘);
    console.log(results);
    return results;
}