spark通信原理

https://github.com/apache/spark/tree/master/core/src/main/scala/org/apache/spark/network

https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/network/BlockTransferService.scala

https://github.com/apache/spark/blob/master/core/src/main/scala/org/apache/spark/network/BlockDataManager.scala

package org.apache.spark.network

import scala.reflect.ClassTag

import org.apache.spark.network.buffer.ManagedBuffer

import org.apache.spark.storage.{BlockId, StorageLevel}

private[spark]

trait BlockDataManager {

  /**

   * Interface to get local block data. Throws an exception if the block cannot be found or

   * cannot be read successfully.

   */

  def getBlockData(blockId: BlockId): ManagedBuffer

  /**

   * Put the block locally, using the given storage level.

   *

   * Returns true if the block was stored and false if the put operation failed or the block

   * already existed.

   */

  def putBlockData(

      blockId: BlockId,

      data: ManagedBuffer,

      level: StorageLevel,

      classTag: ClassTag[_]): Boolean

  /**

   * Release locks acquired by [[putBlockData()]] and [[getBlockData()]].

   */

  def releaseLock(blockId: BlockId, taskAttemptId: Option[Long]): Unit

}

package org.apache.spark.network

import java.io.Closeable

import java.nio.ByteBuffer

import scala.concurrent.{Future, Promise}

import scala.concurrent.duration.Duration

import scala.reflect.ClassTag

import org.apache.spark.internal.Logging

import org.apache.spark.network.buffer.{FileSegmentManagedBuffer, ManagedBuffer, NioManagedBuffer}

import org.apache.spark.network.shuffle.{BlockFetchingListener, ShuffleClient, TempFileManager}

import org.apache.spark.storage.{BlockId, StorageLevel}

import org.apache.spark.util.ThreadUtils

private[spark]

abstract class BlockTransferService extends ShuffleClient with Closeable with Logging {

  /**

   * Initialize the transfer service by giving it the BlockDataManager that can be used to fetch

   * local blocks or put local blocks.

   */

  def init(blockDataManager: BlockDataManager): Unit

  /**

   * Tear down the transfer service.

   */

  def close(): Unit

  /**

   * Port number the service is listening on, available only after [[init]] is invoked.

   */

  def port: Int

  /**

   * Host name the service is listening on, available only after [[init]] is invoked.

   */

  def hostName: String

  /**

   * Fetch a sequence of blocks from a remote node asynchronously,

   * available only after [[init]] is invoked.

   *

   * Note that this API takes a sequence so the implementation can batch requests, and does not

   * return a future so the underlying implementation can invoke onBlockFetchSuccess as soon as

   * the data of a block is fetched, rather than waiting for all blocks to be fetched.

   */

  override def fetchBlocks(

      host: String,

      port: Int,

      execId: String,

      blockIds: Array[String],

      listener: BlockFetchingListener,

      tempFileManager: TempFileManager): Unit

  /**

   * Upload a single block to a remote node, available only after [[init]] is invoked.

   */

  def uploadBlock(

      hostname: String,

      port: Int,

      execId: String,

      blockId: BlockId,

      blockData: ManagedBuffer,

      level: StorageLevel,

      classTag: ClassTag[_]): Future[Unit]

  /**

   * A special case of [[fetchBlocks]], as it fetches only one block and is blocking.

   *

   * It is also only available after [[init]] is invoked.

   */

  def fetchBlockSync(

      host: String,

      port: Int,

      execId: String,

      blockId: String,

      tempFileManager: TempFileManager): ManagedBuffer = {

    // A monitor for the thread to wait on.

    val result = Promise[ManagedBuffer]()

    fetchBlocks(host, port, execId, Array(blockId),

      new BlockFetchingListener {

        override def onBlockFetchFailure(blockId: String, exception: Throwable): Unit = {

          result.failure(exception)

        }

        override def onBlockFetchSuccess(blockId: String, data: ManagedBuffer): Unit = {

          data match {

            case f: FileSegmentManagedBuffer =>

              result.success(f)

            case _ =>

              val ret = ByteBuffer.allocate(data.size.toInt)

              ret.put(data.nioByteBuffer())

              ret.flip()

              result.success(new NioManagedBuffer(ret))

          }

        }

      }, tempFileManager)

    ThreadUtils.awaitResult(result.future, Duration.Inf)

  }

  /**

   * Upload a single block to a remote node, available only after [[init]] is invoked.

   *

   * This method is similar to [[uploadBlock]], except this one blocks the thread

   * until the upload finishes.

   */

  def uploadBlockSync(

      hostname: String,

      port: Int,

      execId: String,

      blockId: BlockId,

      blockData: ManagedBuffer,

      level: StorageLevel,

      classTag: ClassTag[_]): Unit = {

    val future = uploadBlock(hostname, port, execId, blockId, blockData, level, classTag)

    ThreadUtils.awaitResult(future, Duration.Inf)

  }

}

spark通信原理的更多相关文章

Spark Shuffle 堆外内存溢出问题与解决（Shuffle通信原理）
Spark Shuffle 堆外内存溢出问题与解决(Shuffle通信原理) http://xiguada.org/spark-shuffle-direct-buffer-oom/ 问题描述 Spar ...
[Spark内核] 第32课：Spark Worker原理和源码剖析解密：Worker工作流程图、Worker启动Driver源码解密、Worker启动Executor源码解密等
本課主題 Spark Worker 原理 Worker 启动 Driver 源码鉴赏 Worker 启动 Executor 源码鉴赏 Worker 与 Master 的交互关系 [引言部份:你希望读者 ...
Spark核心技术原理透视一（Spark运行原理）
在大数据领域,只有深挖数据科学领域,走在学术前沿,才能在底层算法和模型方面走在前面,从而占据领先地位. Spark的这种学术基因,使得它从一开始就在大数据领域建立了一定优势.无论是性能,还是方案的统一 ...
spark核心原理
spark运行结构图如下: spark基本概念应用程序(application):用户编写的spark应用程序,包含驱动程序(Driver)和分布在集群中多个节点上运行的Executor代码,在执行 ...
Spark集群基础概念与 spark架构原理
一.Spark集群基础概念将DAG划分为多个stage阶段,遵循以下原则: 1.将尽可能多的窄依赖关系的RDD划为同一个stage阶段. 2.当遇到shuffle操作,就意味着上一个stage阶段结 ...
Spark Worker原理和源码剖析解密：Worker工作流程图、Worker启动Driver源码解密、Worker启动Executor源码解密等
本课主题 Spark Worker 原理 Worker 启动 Driver 源码鉴赏 Worker 启动 Executor 源码鉴赏 Worker 与 Master 的交互关系 Spark Worke ...
基于web的IM软件通信原理分析
关于IM(InstantMessaging)即时通信类软件(如微信,QQ),大多数都是桌面应用程序或者native应用较为流行,而网上关于原生IM或桌面IM软件类的通信原理介绍也较多,此处不再赘述.而 ...
Socket 通信原理(Android客户端和服务器以TCP&&UDP方式互通)
转载地址:http://blog.csdn.net/mad1989/article/details/9147661 ZERO.前言有关通信原理内容是在网上或百科整理得到,代码部分为本人所写,如果不当 ...
SSL 通信原理及Tomcat SSL 配置
SSL 通信原理及Tomcat SSL 双向配置目录1 参考资料 .................................................................. ...

随机推荐

Python之虚拟机操作：利用VIX二次开发，实现自己的pyvix（系列一）成果展示和python实例
在日常工作中,需要使用python脚本去自动化控制VMware虚拟机,现有的pyvix功能较少,而且不适合个人编程习惯,故萌发了开发一个berlin版本pyvix的想法,暂且叫其OpenPyVix.O ...
算法复习——差分约束（ssoi种树）
题目: 题目描述为了绿化乡村,H 村积极响应号召,开始种树了. H 村里有 n 幢房屋,这些屋子的排列顺序很有特点,在一条直线上.于是方便起见,我们给它们标上 1-n .树就种在房子前面的空地上. ...
P1681 最大正方形II (动态规划)
题目背景忙完了学校的事,v神终于可以做他的"正事":陪女朋友散步.一天,他和女朋友走着走着,不知不觉就来到了一个千里无烟的地方.v神正要往回走,如发现了一块牌子,牌子上有有一行小 ...
Snoop resynchronization mechanism to preserve read ordering
A processor employing a post-cache (LS2) buffer. Loads are stored into the LS2buffer after probing t ...
标准C程序设计七---04
Linux应用编程深入语言编程标准C程序设计七---经典C11程序设计以下内容为阅读: <标准C程序设计>(第7版) 作者 ...
Mongodb报错：ERROR: child process failed, exited with error number 1
Mongodb在启动时报错: 2018-10-16T11:18:54.533+0800 I CONTROL [main] Automatically disabling TLS 1.0, to for ...
android Paint属性
** * Paint类介绍 * * Paint即画笔,在绘图过程中起到了极其重要的作用,画笔主要保存了颜色, * 样式等绘制信息,指定了如何绘制文本和图形 ...
MySQL与MSSQL的一些语法差异（持续更新中）
分号不能少:分号不能少:分号不能少:重要的事情说3遍 Insert或者Update的数据包含反斜杠\的时候需要进行转义\\,例:insert into tablename(id,name) value ...
让win7任务条上的文件夹打开是c,d,e,f而不是库
如果资源管理器是打开的,则右键点击资源管理器的图标,在跳出的菜单中,右键点击“Windows资源管理器”,选择“属性”. 在“快捷方式’选项卡,“目标”一栏,默认的是 %windir%\explore ...
sklearn 特征选择
1.移除低方差的特征(Removing features with low variance) VarianceThreshold 是特征选择中的一项基本方法.它会移除所有方差不满足阈值的特征.默认设 ...

spark通信原理

spark通信原理的更多相关文章

随机推荐

热门专题