spark源码阅读之network(2)
在上节的解读中发现spark的源码中大量使用netty的buffer部分的api,该节将看到netty核心的一些api,比如channel:
privatestaticclassClientPool{TransportClient[] clients;Object[] locks;publicClientPool(int size){clients =newTransportClient[size];locks =newObject[size];for(int i =0; i < size; i++){locks[i]=newObject();}}
publicTransportClient createClient(String remoteHost,int remotePort)throwsIOException{// Get connection from the connection pool first.// If it is not found or not active, create a new one.finalInetSocketAddress address =newInetSocketAddress(remoteHost, remotePort);// Create the ClientPool if we don't have it yet.ClientPool clientPool = connectionPool.get(address);if(clientPool ==null){connectionPool.putIfAbsent(address,newClientPool(numConnectionsPerPeer));clientPool = connectionPool.get(address);}int clientIndex = rand.nextInt(numConnectionsPerPeer);TransportClient cachedClient = clientPool.clients[clientIndex];if(cachedClient !=null&& cachedClient.isActive()){logger.trace("Returning cached connection to {}: {}", address, cachedClient);return cachedClient;}// If we reach here, we don't have an existing connection open. Let's create a new one.// Multiple threads might race here to create new connections. Keep only one of them active.synchronized(clientPool.locks[clientIndex]){cachedClient = clientPool.clients[clientIndex];if(cachedClient !=null){if(cachedClient.isActive()){logger.trace("Returning cached connection to {}: {}", address, cachedClient);return cachedClient;}else{logger.info("Found inactive connection to {}, creating a new one.", address);}}clientPool.clients[clientIndex]= createClient(address);return clientPool.clients[clientIndex];}}
*/publicTransportClient createUnmanagedClient(String remoteHost,int remotePort)throwsIOException{finalInetSocketAddress address =newInetSocketAddress(remoteHost, remotePort);return createClient(address);}/** Create a completely new {@link TransportClient} to the remote address. */privateTransportClient createClient(InetSocketAddress address)throwsIOException{logger.debug("Creating new connection to "+ address);Bootstrap bootstrap =newBootstrap();bootstrap.group(workerGroup).channel(socketChannelClass)// Disable Nagle's Algorithm since we don't want packets to wait.option(ChannelOption.TCP_NODELAY,true).option(ChannelOption.SO_KEEPALIVE,true).option(ChannelOption.CONNECT_TIMEOUT_MILLIS, conf.connectionTimeoutMs()).option(ChannelOption.ALLOCATOR, pooledAllocator);finalAtomicReference<TransportClient> clientRef =newAtomicReference<TransportClient>();finalAtomicReference<Channel> channelRef =newAtomicReference<Channel>();bootstrap.handler(newChannelInitializer<SocketChannel>(){@Overridepublicvoid initChannel(SocketChannel ch){TransportChannelHandler clientHandler = context.initializePipeline(ch);clientRef.set(clientHandler.getClient());channelRef.set(ch);}});// Connect to the remote serverlong preConnect =System.nanoTime();ChannelFuture cf = bootstrap.connect(address);if(!cf.awaitUninterruptibly(conf.connectionTimeoutMs())){thrownewIOException(String.format("Connecting to %s timed out (%s ms)", address, conf.connectionTimeoutMs()));}elseif(cf.cause()!=null){thrownewIOException(String.format("Failed to connect to %s", address), cf.cause());}TransportClient client = clientRef.get();Channel channel = channelRef.get();assert client !=null:"Channel future completed successfully with null client";// Execute any client bootstraps synchronously before marking the Client as successful.long preBootstrap =System.nanoTime();logger.debug("Connection to {} successful, running bootstraps...", address);try{for(TransportClientBootstrap clientBootstrap : clientBootstraps){clientBootstrap.doBootstrap(client, channel);}}catch(Exception e){// catch non-RuntimeExceptions too as bootstrap may be written in Scalalong bootstrapTimeMs =(System.nanoTime()- preBootstrap)/1000000;logger.error("Exception while bootstrapping client after "+ bootstrapTimeMs +" ms", e);client.close();throwThrowables.propagate(e);}long postBootstrap =System.nanoTime();logger.debug("Successfully created connection to {} after {} ms ({} ms spent in bootstraps)",address,(postBootstrap - preConnect)/1000000,(postBootstrap - preBootstrap)/1000000);return client;}
privatefinalChannel channel;privatefinalTransportResponseHandler handler;@NullableprivateString clientId;
publicvoid fetchChunk(long streamId,finalint chunkIndex,finalChunkReceivedCallback callback){finalString serverAddr =NettyUtils.getRemoteAddress(channel);finallong startTime =System.currentTimeMillis();logger.debug("Sending fetch chunk request {} to {}", chunkIndex, serverAddr);finalStreamChunkId streamChunkId =newStreamChunkId(streamId, chunkIndex);handler.addFetchRequest(streamChunkId, callback);channel.writeAndFlush(newChunkFetchRequest(streamChunkId)).addListener(newChannelFutureListener(){@Overridepublicvoid operationComplete(ChannelFuture future)throwsException{if(future.isSuccess()){long timeTaken =System.currentTimeMillis()- startTime;logger.trace("Sending request {} to {} took {} ms", streamChunkId, serverAddr,timeTaken);}else{String errorMsg =String.format("Failed to send request %s to %s: %s", streamChunkId,serverAddr, future.cause());logger.error(errorMsg, future.cause());handler.removeFetchRequest(streamChunkId);channel.close();try{callback.onFailure(chunkIndex,newIOException(errorMsg, future.cause()));}catch(Exception e){logger.error("Uncaught exception in RPC response callback handler!", e);}}}});}
publicvoid stream(finalString streamId,finalStreamCallback callback){finalString serverAddr =NettyUtils.getRemoteAddress(channel);finallong startTime =System.currentTimeMillis();logger.debug("Sending stream request for {} to {}", streamId, serverAddr);// Need to synchronize here so that the callback is added to the queue and the RPC is// written to the socket atomically, so that callbacks are called in the right order// when responses arrive.synchronized(this){handler.addStreamCallback(callback);channel.writeAndFlush(newStreamRequest(streamId)).addListener(newChannelFutureListener(){@Overridepublicvoid operationComplete(ChannelFuture future)throwsException{if(future.isSuccess()){long timeTaken =System.currentTimeMillis()- startTime;logger.trace("Sending request for {} to {} took {} ms", streamId, serverAddr,timeTaken);}else{String errorMsg =String.format("Failed to send request for %s to %s: %s", streamId,serverAddr, future.cause());logger.error(errorMsg, future.cause());channel.close();try{callback.onFailure(streamId,newIOException(errorMsg, future.cause()));}catch(Exception e){logger.error("Uncaught exception in RPC response callback handler!", e);}}}});}}
publicbyte[] sendRpcSync(byte[] message,long timeoutMs){finalSettableFuture<byte[]> result =SettableFuture.create();sendRpc(message,newRpcResponseCallback(){@Overridepublicvoid onSuccess(byte[] response){result.set(response);}@Overridepublicvoid onFailure(Throwable e){result.setException(e);}});try{return result.get(timeoutMs,TimeUnit.MILLISECONDS);}catch(ExecutionException e){throwThrowables.propagate(e.getCause());}catch(Exception e){throwThrowables.propagate(e);}}
spark源码阅读之network(2)的更多相关文章
- spark源码阅读之network(1)
spark将在1.6中替换掉akka,而采用netty实现整个集群的rpc的框架,netty的内存管理和NIO支持将有效的提高spark集群的网络传输能力,为了看懂这块代码,在网上找了两本书看< ...
- spark源码阅读之network(3)
TransportContext用来创建TransportServer和TransportclientFactory,同时使用TransportChannelHandler用来配置channel的pi ...
- Spark源码阅读之存储体系--存储体系概述与shuffle服务
一.概述 根据<深入理解Spark:核心思想与源码分析>一书,结合最新的spark源代码master分支进行源码阅读,对新版本的代码加上自己的一些理解,如有错误,希望指出. 1.块管理器B ...
- win7+idea+maven搭建spark源码阅读环境
1.参考. 利用IDEA工具编译Spark源码(1.60~2.20) https://blog.csdn.net/He11o_Liu/article/details/78739699 Maven编译打 ...
- spark源码阅读
根据spark2.2的编译顺序来确定源码阅读顺序,只阅读核心的基本部分. 1.common目录 ①Tags②Sketch③Networking④Shuffle Streaming Service⑤Un ...
- emacs+ensime+sbt打造spark源码阅读环境
欢迎转载,转载请注明出处,徽沪一郎. 概述 Scala越来越流行, Spark也愈来愈红火, 对spark的代码进行走读也成了一个很普遍的行为.不巧的是,当前java社区中很流行的ide如eclips ...
- spark源码阅读---Utils.getCallSite
1 作用 当该方法在spark内部代码中调用时,会返回当前调用spark代码的用户类的名称,以及其所调用的spark方法.所谓用户类,就是我们这些用户使用spark api的类. 2 内部实现 2.1 ...
- spark源码阅读--SparkContext启动过程
##SparkContext启动过程 基于spark 2.1.0 scala 2.11.8 spark源码的体系结构实在是很庞大,从使用spark-submit脚本提交任务,到向yarn申请容器,启 ...
- Spark源码阅读(1): Stage划分
Spark中job由action动作生成,那么stage是如何划分的呢?一般的解答是根据宽窄依赖划分.那么我们深入源码看看吧 一个action 例如count,会在多次runJob中传递,最终会到一个 ...
随机推荐
- webpack新版本4.12应用九(配置文件之多种配置类型)
除了导出单个配置对象,还有一些方式满足其他需求. 导出为一个函数 最终,你会发现需要在开发和生产构建之间,消除 webpack.config.js 的差异.(至少)有两种选项: 作为导出一个配置对象的 ...
- Flyway客户端使用
一.flyway介绍 Flyway是一款开源的数据库版本管理工具,它更倾向于规约优于配置的方式.Flyway可以独立于应用实现管理并跟踪数据库变更,支持数据库版本自动升级,并且有一套默认的规约,不需要 ...
- JDBC预编译语句表名占位异常
有时候,我们有这样的需求,需要清空多个表的内容,这样我们有两种做法,可用delete from table 或 truncate table table,两种方法视情况而定,前者只是一条条的删除表数据 ...
- (转)Android高性能编程(1)--基础篇
关于专题 本专题将深入研究Android的高性能编程方面,其中涉及到的内容会有Android内存优化,算法优化,Android的界面优化,Android指令级优化,以及Android应用内存占 ...
- webstorm设置修改文件后自动编译并刷新浏览器页面
转载:http://www.cnblogs.com/ssrsblogs/p/6155747.html 重装了 webstorm ,从10升级到了2016 一升不要紧,打开老项目,开启webpakc-d ...
- 浅谈FPGA有限状态机
状态机几乎可以实现一切时序电路. 有限状态机(FiniteStateMachine, FSM),根据状态机的输出是否与输入有关,可分为Moore型状态机和Mealy型状态机.Moore型状态机输出仅仅 ...
- codeforce 977 F. Consecutive Subsequence
F. Consecutive Subsequence time limit per test 2 seconds memory limit per test 256 megabytes input s ...
- python开发模块基础:re正则
一,re模块的用法 #findall #直接返回一个列表 #正常的正则表达式 #但是只会把分组里的显示出来#search #返回一个对象 .group()#match #返回一个对象 .group() ...
- Docker - Upgrade from 1.12 to 1.13
引言 历经半年,docker的更新终于来了,看着新版本中各种诱人的新特性,我们也第一时间来尝试一下. 升级 之前一直使用的是1.12,所以这次尝试的是从原来的版本升级到新版本. 1. 更新 yum p ...
- verilog 建模笔记--低级建模
来源 <verilog HDL那些事--建模篇> 1.并行建模的思想. 2.每个模块最好只有一个功能.(便于修改和扩展,特别在大的项目中) 典型的 HDL 教科书中,才不会要读者了解“模 ...