上文solr源码分析之数据导入DataImporter追溯中提到了solr的工作流程,其核心是各种handler。

handler定义了各种search Component,

  @Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception
{
List<SearchComponent> components = getComponents();
ResponseBuilder rb = new ResponseBuilder(req, rsp, components);
}

然后调用组件的prepare方法:

if (timer == null) {
// non-debugging prepare phase
for( SearchComponent c : components ) {
c.prepare(rb);
}
} else {
// debugging prepare phase
RTimer subt = timer.sub( "prepare" );
for( SearchComponent c : components ) {
rb.setTimer( subt.sub( c.getName() ) );
c.prepare(rb);
rb.getTimer().stop();
}
subt.stop();
}

再调用组件的process方法:

if(!rb.isDebug()) {
// Process
for( SearchComponent c : components ) {
c.process(rb);
}
}
else {
// Process
RTimer subt = timer.sub( "process" );
for( SearchComponent c : components ) {
rb.setTimer( subt.sub( c.getName() ) );
c.process(rb);
rb.getTimer().stop();
}
subt.stop(); // add the timing info
if (rb.isDebugTimings()) {
rb.addDebugInfo("timing", timer.asNamedList() );
}
}

从源码上来分析一下search Component,searchComponent的结构如下:

以QueryComponent为例,理解一下它的工作原理。

prepare方法获取查询语句:

 @Override
public void prepare(ResponseBuilder rb) throws IOException
{ SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrQueryResponse rsp = rb.rsp; // Set field flags
ReturnFields returnFields = new SolrReturnFields( req );
rsp.setReturnFields( returnFields );
int flags = 0;
if (returnFields.wantsScore()) {
flags |= SolrIndexSearcher.GET_SCORES;
}
rb.setFieldFlags( flags ); String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE); // get it from the response builder to give a different component a chance
// to set it.
String queryString = rb.getQueryString();
if (queryString == null) {
// this is the normal way it's set.
queryString = params.get( CommonParams.Q );
rb.setQueryString(queryString);
} try {
QParser parser = QParser.getParser(rb.getQueryString(), defType, req);
Query q = parser.getQuery();
if (q == null) {
// normalize a null query to a query that matches nothing
q = new MatchNoDocsQuery();
} rb.setQuery( q ); String rankQueryString = rb.req.getParams().get(CommonParams.RQ);
if(rankQueryString != null) {
QParser rqparser = QParser.getParser(rankQueryString, defType, req);
Query rq = rqparser.getQuery();
if(rq instanceof RankQuery) {
RankQuery rankQuery = (RankQuery)rq;
rb.setRankQuery(rankQuery);
MergeStrategy mergeStrategy = rankQuery.getMergeStrategy();
if(mergeStrategy != null) {
rb.addMergeStrategy(mergeStrategy);
if(mergeStrategy.handlesMergeFields()) {
rb.mergeFieldHandler = mergeStrategy;
}
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST,"rq parameter must be a RankQuery");
}
} rb.setSortSpec( parser.getSort(true) );
rb.setQparser(parser); final String cursorStr = rb.req.getParams().get(CursorMarkParams.CURSOR_MARK_PARAM);
if (null != cursorStr) {
final CursorMark cursorMark = new CursorMark(rb.req.getSchema(),
rb.getSortSpec());
cursorMark.parseSerializedTotem(cursorStr);
rb.setCursorMark(cursorMark);
} String[] fqs = req.getParams().getParams(CommonParams.FQ);
if (fqs!=null && fqs.length!=0) {
List<Query> filters = rb.getFilters();
// if filters already exists, make a copy instead of modifying the original
filters = filters == null ? new ArrayList<Query>(fqs.length) : new ArrayList<>(filters);
for (String fq : fqs) {
if (fq != null && fq.trim().length()!=0) {
QParser fqp = QParser.getParser(fq, null, req);
filters.add(fqp.getQuery());
}
}
// only set the filters if they are not empty otherwise
// fq=&someotherParam= will trigger all docs filter for every request
// if filter cache is disabled
if (!filters.isEmpty()) {
rb.setFilters( filters );
}
}
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
} if (params.getBool(GroupParams.GROUP, false)) {
prepareGrouping(rb);
} else {
//Validate only in case of non-grouping search.
if(rb.getSortSpec().getCount() < 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'rows' parameter cannot be negative");
}
} //Input validation.
if (rb.getQueryCommand().getOffset() < 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative");
}
}

process方法执行查询:

/**
* Actually run the query
*/
@Override
public void process(ResponseBuilder rb) throws IOException
{
LOG.debug("process: {}", rb.req.getParams()); SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrIndexSearcher searcher = req.getSearcher(); StatsCache statsCache = req.getCore().getStatsCache(); int purpose = params.getInt(ShardParams.SHARDS_PURPOSE, ShardRequest.PURPOSE_GET_TOP_IDS);
if ((purpose & ShardRequest.PURPOSE_GET_TERM_STATS) != 0) {
statsCache.returnLocalStats(rb, searcher);
return;
}
// check if we need to update the local copy of global dfs
if ((purpose & ShardRequest.PURPOSE_SET_TERM_STATS) != 0) {
// retrieve from request and update local cache
statsCache.receiveGlobalStats(req);
} SolrQueryResponse rsp = rb.rsp;
IndexSchema schema = searcher.getSchema(); // Optional: This could also be implemented by the top-level searcher sending
// a filter that lists the ids... that would be transparent to
// the request handler, but would be more expensive (and would preserve score
// too if desired).
String ids = params.get(ShardParams.IDS);
if (ids != null) {
SchemaField idField = schema.getUniqueKeyField();
List<String> idArr = StrUtils.splitSmart(ids, ",", true);
int[] luceneIds = new int[idArr.size()];
int docs = 0;
for (int i=0; i<idArr.size(); i++) {
int id = searcher.getFirstMatch(
new Term(idField.getName(), idField.getType().toInternal(idArr.get(i))));
if (id >= 0)
luceneIds[docs++] = id;
} DocListAndSet res = new DocListAndSet();
res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0);
if (rb.isNeedDocSet()) {
// TODO: create a cache for this!
List<Query> queries = new ArrayList<>();
queries.add(rb.getQuery());
List<Query> filters = rb.getFilters();
if (filters != null) queries.addAll(filters);
res.docSet = searcher.getDocSet(queries);
}
rb.setResults(res); ResultContext ctx = new ResultContext();
ctx.docs = rb.getResults().docList;
ctx.query = null; // anything?
rsp.add("response", ctx);
return;
} // -1 as flag if not set.
long timeAllowed = params.getLong(CommonParams.TIME_ALLOWED, -1L);
if (null != rb.getCursorMark() && 0 < timeAllowed) {
// fundamentally incompatible
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Can not search using both " +
CursorMarkParams.CURSOR_MARK_PARAM + " and " + CommonParams.TIME_ALLOWED);
} SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed); req.getContext().put(SolrIndexSearcher.STATS_SOURCE, statsCache.get(req)); SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult(); //
// grouping / field collapsing
//
GroupingSpecification groupingSpec = rb.getGroupingSpec();
if (groupingSpec != null) {
try {
boolean needScores = (cmd.getFlags() & SolrIndexSearcher.GET_SCORES) != 0;
if (params.getBool(GroupParams.GROUP_DISTRIBUTED_FIRST, false)) {
CommandHandler.Builder topsGroupsActionBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
.setNeedDocSet(false) // Order matters here
.setIncludeHitCount(true)
.setSearcher(searcher); for (String field : groupingSpec.getFields()) {
topsGroupsActionBuilder.addCommandField(new SearchGroupsFieldCommand.Builder()
.setField(schema.getField(field))
.setGroupSort(groupingSpec.getGroupSort())
.setTopNGroups(cmd.getOffset() + cmd.getLen())
.setIncludeGroupCount(groupingSpec.isIncludeGroupCount())
.build()
);
} CommandHandler commandHandler = topsGroupsActionBuilder.build();
commandHandler.execute();
SearchGroupsResultTransformer serializer = new SearchGroupsResultTransformer(searcher);
rsp.add("firstPhase", commandHandler.processResult(result, serializer));
rsp.add("totalHitCount", commandHandler.getTotalHitCount());
rb.setResult(result);
return;
} else if (params.getBool(GroupParams.GROUP_DISTRIBUTED_SECOND, false)) {
CommandHandler.Builder secondPhaseBuilder = new CommandHandler.Builder()
.setQueryCommand(cmd)
.setTruncateGroups(groupingSpec.isTruncateGroups() && groupingSpec.getFields().length > 0)
.setSearcher(searcher); for (String field : groupingSpec.getFields()) {
SchemaField schemaField = schema.getField(field);
String[] topGroupsParam = params.getParams(GroupParams.GROUP_DISTRIBUTED_TOPGROUPS_PREFIX + field);
if (topGroupsParam == null) {
topGroupsParam = new String[0];
} List<SearchGroup<BytesRef>> topGroups = new ArrayList<>(topGroupsParam.length);
for (String topGroup : topGroupsParam) {
SearchGroup<BytesRef> searchGroup = new SearchGroup<>();
if (!topGroup.equals(TopGroupsShardRequestFactory.GROUP_NULL_VALUE)) {
searchGroup.groupValue = new BytesRef(schemaField.getType().readableToIndexed(topGroup));
}
topGroups.add(searchGroup);
} secondPhaseBuilder.addCommandField(
new TopGroupsFieldCommand.Builder()
.setField(schemaField)
.setGroupSort(groupingSpec.getGroupSort())
.setSortWithinGroup(groupingSpec.getSortWithinGroup())
.setFirstPhaseGroups(topGroups)
.setMaxDocPerGroup(groupingSpec.getGroupOffset() + groupingSpec.getGroupLimit())
.setNeedScores(needScores)
.setNeedMaxScore(needScores)
.build()
);
} for (String query : groupingSpec.getQueries()) {
secondPhaseBuilder.addCommandField(new QueryCommand.Builder()
.setDocsToCollect(groupingSpec.getOffset() + groupingSpec.getLimit())
.setSort(groupingSpec.getGroupSort())
.setQuery(query, rb.req)
.setDocSet(searcher)
.build()
);
} CommandHandler commandHandler = secondPhaseBuilder.build();
commandHandler.execute();
TopGroupsResultTransformer serializer = new TopGroupsResultTransformer(rb);
rsp.add("secondPhase", commandHandler.processResult(result, serializer));
rb.setResult(result);
return;
} int maxDocsPercentageToCache = params.getInt(GroupParams.GROUP_CACHE_PERCENTAGE, 0);
boolean cacheSecondPassSearch = maxDocsPercentageToCache >= 1 && maxDocsPercentageToCache <= 100;
Grouping.TotalCount defaultTotalCount = groupingSpec.isIncludeGroupCount() ?
Grouping.TotalCount.grouped : Grouping.TotalCount.ungrouped;
int limitDefault = cmd.getLen(); // this is normally from "rows"
Grouping grouping =
new Grouping(searcher, result, cmd, cacheSecondPassSearch, maxDocsPercentageToCache, groupingSpec.isMain());
grouping.setSort(groupingSpec.getGroupSort())
.setGroupSort(groupingSpec.getSortWithinGroup())
.setDefaultFormat(groupingSpec.getResponseFormat())
.setLimitDefault(limitDefault)
.setDefaultTotalCount(defaultTotalCount)
.setDocsPerGroupDefault(groupingSpec.getGroupLimit())
.setGroupOffsetDefault(groupingSpec.getGroupOffset())
.setGetGroupedDocSet(groupingSpec.isTruncateGroups()); if (groupingSpec.getFields() != null) {
for (String field : groupingSpec.getFields()) {
grouping.addFieldCommand(field, rb.req);
}
} if (groupingSpec.getFunctions() != null) {
for (String groupByStr : groupingSpec.getFunctions()) {
grouping.addFunctionCommand(groupByStr, rb.req);
}
} if (groupingSpec.getQueries() != null) {
for (String groupByStr : groupingSpec.getQueries()) {
grouping.addQueryCommand(groupByStr, rb.req);
}
} if (rb.doHighlights || rb.isDebug() || params.getBool(MoreLikeThisParams.MLT, false)) {
// we need a single list of the returned docs
cmd.setFlags(SolrIndexSearcher.GET_DOCLIST);
} grouping.execute();
if (grouping.isSignalCacheWarning()) {
rsp.add(
"cacheWarning",
String.format(Locale.ROOT, "Cache limit of %d percent relative to maxdoc has exceeded. Please increase cache size or disable caching.", maxDocsPercentageToCache)
);
}
rb.setResult(result); if (grouping.mainResult != null) {
ResultContext ctx = new ResultContext();
ctx.docs = grouping.mainResult;
ctx.query = null; // TODO? add the query?
rsp.add("response", ctx);
rsp.getToLog().add("hits", grouping.mainResult.matches());
} else if (!grouping.getCommands().isEmpty()) { // Can never be empty since grouping.execute() checks for this.
rsp.add("grouped", result.groupedResults);
rsp.getToLog().add("hits", grouping.getCommands().get(0).getMatches());
}
return;
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
} // normal search result
searcher.search(result, cmd);
rb.setResult(result); ResultContext ctx = new ResultContext();
ctx.docs = rb.getResults().docList;
ctx.query = rb.getQuery();
rsp.add("response", ctx);
rsp.getToLog().add("hits", rb.getResults().docList.matches()); if ( ! rb.req.getParams().getBool(ShardParams.IS_SHARD,false) ) {
if (null != rb.getNextCursorMark()) {
rb.rsp.add(CursorMarkParams.CURSOR_MARK_NEXT,
rb.getNextCursorMark().getSerializedTotem());
}
} if(rb.mergeFieldHandler != null) {
rb.mergeFieldHandler.handleMergeFields(rb, searcher);
} else {
doFieldSortValues(rb, searcher);
} doPrefetch(rb);
}

solr源码分析之searchComponent的更多相关文章

  1. solr源码分析之solrclound

    一.简介 SolrCloud是Solr4.0版本以后基于Solr和Zookeeper的分布式搜索方案.SolrCloud是Solr的基于Zookeeper一种部署方式.Solr可以以多种方式部署,例如 ...

  2. solr源码分析之数据导入DataImporter追溯。

    若要搜索的信息都是被存储在数据库里面的,但是solr不能直接搜数据库,所以只有借助Solr组件将要搜索的信息在搜索服务器上进行索引,然后在客户端供客户使用. 1. SolrDispatchFilter ...

  3. Solr初始化源码分析-Solr初始化与启动

    用solr做项目已经有一年有余,但都是使用层面,只是利用solr现有机制,修改参数,然后监控调优,从没有对solr进行源码级别的研究.但是,最近手头的一个项目,让我感觉必须把solrn内部原理和扩展机 ...

  4. Solr4.8.0源码分析(7)之Solr SPI

    Solr4.8.0源码分析(7)之Solr SPI 查看Solr源码时候会发现,每一个package都会由对应的resources. 如下图所示: 一时对这玩意好奇了,看了文档以后才发现,这个serv ...

  5. Solr4.8.0源码分析(4)之Eclipse Solr调试环境搭建

    Solr4.8.0源码分析(4)之Eclipse Solr调试环境搭建 由于公司里的Solr调试都是用远程jpda进行的,但是家里只有一台电脑所以不能jpda进行调试,这是因为jpda的端口冲突.所以 ...

  6. Solr4.8.0源码分析(5)之查询流程分析总述

    Solr4.8.0源码分析(5)之查询流程分析总述 前面已经写到,solr查询是通过http发送命令,solr servlet接受并进行处理.所以solr的查询流程从SolrDispatchsFilt ...

  7. Solr5.0源码分析-SolrDispatchFilter

    年初,公司开发法律行业的搜索引擎.当时,我作为整个系统的核心成员,选择solr,并在solr根据我们的要求做了相应的二次开发.但是,对solr的还没有进行认真仔细的研究.最近,事情比较清闲,翻翻sol ...

  8. Heritrix源码分析(十四) 如何让Heritrix不间断的抓取(转)

    欢迎加入Heritrix群(QQ):109148319,10447185 , Lucene/Solr群(QQ) :  118972724 本博客已迁移到本人独立博客: http://www.yun5u ...

  9. Heritrix源码分析(九) Heritrix的二次抓取以及如何让Heritrix抓取你不想抓取的URL

    本博客属原创文章,欢迎转载!转载请务必注明出处:http://guoyunsky.iteye.com/blog/644396       本博客已迁移到本人独立博客: http://www.yun5u ...

随机推荐

  1. 20155336 2016-2017-2《JAVA程序设计》第二周学习总结

    20155336 2016-2017-2 <JAVA 程序设计>第二周学习总结 教材学习内容 1: GIT版本检测 2: JAVA中基本类型 整数 字节 浮点数 字符 布尔(▲) 通过AP ...

  2. TPO 02 - Desert Formation

    TPO 02 - Desert Formation NOTE: 主要意思(大概就是主谓宾)用粗体标出:重要的其它用斜体: []中的是大致意思,可能与原文有关也可能无关,但不会离题 目的为训练句子/段落 ...

  3. Kibana TypeError : Object #<GlobalState> has no method 'setDefaults'

    在windows server中装完elasticsearch和kibana后,elasticsearch能正常访问(http://localhost:9200): 而访问kibana的地址(http ...

  4. 关于购买Redis服务器:腾讯云、阿里云还是华为云?

    个人分类: redis使用 编辑 新年伊始,很多商家都开始进行新年产品大促销,在分布是缓存Redis领域,几家大公司也是打得如火如荼,各有千秋啊. 现在市场上比较有口碑的商家有腾讯云.阿里云.华为云三 ...

  5. PHP性能优化 -实战篇

    借助xhprof 工具分析PHP性能 XHPorf(源自Fackbook 的PHP性能分析工具) 实战     通过分析Wordpress程序,做优化! 优化 找到需要优化的函数 grep 'impo ...

  6. 王者荣耀交流协会--第2次Scrum会议

    Scrum master:袁玥 要求1:工作照片 要求2:时间跨度:2017年10月14号  6:02--6:43  共计41min 要求3:地点:一食堂二楼两张桌子旁(靠近卖方便面那边) 要求4:立 ...

  7. 欢迎来怼---作业要求 20171015 beta冲刺贡献分分配规则

    一.小组信息 队名:欢迎来怼 小组成员 队长:田继平 成员:李圆圆,葛美义,王伟东,姜珊,邵朔,阚博文 基础分      每人占个人总分的百分之40% leangoo里面的得分    每人占个人总分里 ...

  8. 解决Ubuntu16.04下git clone太慢问题

    记录一些博客,省着自己再去找了... ss-qt5安装 生成.pac genpac --pac-proxy "SOCKS5 127.0.0.1:1080" --gfwlist-pr ...

  9. 软工1816·Alpha冲刺(10/10)

    团队信息 队名:爸爸饿了 组长博客:here 作业博客:here 组员情况 组员1(组长):王彬 过去两天完成了哪些任务 协助完成前端各个页面的整合 协助解决前端操作逻辑存在的问题 完成前端的美化,使 ...

  10. struts2文件上传突破2M限制

    struts配置文件 <action name="upload" class="strutsFileUpload"> <result name ...