发送数据一般通过,collector.collect

public interface Collector<T> {

    /**
* Emits a record.
*
* @param record The record to collect.
*/
void collect(T record); /**
* Closes the collector. If any data was buffered, that data will be flushed.
*/
void close();
}

output继承,

public interface Output<T> extends Collector<T> {

    /**
* Emits a {@link Watermark} from an operator. This watermark is broadcast to all downstream
* operators.
*
* <p>A watermark specifies that no element with a timestamp lower or equal to the watermark
* timestamp will be emitted in the future.
*/
void emitWatermark(Watermark mark);
}

RecordWriterOutput

public class RecordWriterOutput<OUT> implements Output<StreamRecord<OUT>> {

    private StreamRecordWriter<SerializationDelegate<StreamElement>> recordWriter;

    private SerializationDelegate<StreamElement> serializationDelegate;

    @Override
public void collect(StreamRecord<OUT> record) {
serializationDelegate.setInstance(record); try {
recordWriter.emit(serializationDelegate);
}
catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
}
}

 

RecordWriter

public class RecordWriter<T extends IOReadableWritable> {

    protected final ResultPartitionWriter writer; //负责写入ResultPartition的writer

    private final ChannelSelector<T> channelSelector; //选择写入哪个channel,默认RoundRobinChannelSelector

    private final int numChannels;

    /** {@link RecordSerializer} per outgoing channel */
private final RecordSerializer<T>[] serializers; public RecordWriter(ResultPartitionWriter writer) {
this(writer, new RoundRobinChannelSelector<T>());
} @SuppressWarnings("unchecked")
public RecordWriter(ResultPartitionWriter writer, ChannelSelector<T> channelSelector) {
this.writer = writer;
this.channelSelector = channelSelector; this.numChannels = writer.getNumberOfOutputChannels(); //获取channel数 /**
* The runtime exposes a channel abstraction for the produced results
* (see {@link ChannelSelector}). Every channel has an independent
* serializer.
*/
this.serializers = new SpanningRecordSerializer[numChannels];
for (int i = 0; i < numChannels; i++) {
serializers[i] = new SpanningRecordSerializer<T>(); //为每个channel初始化Serializer
}
} public void emit(T record) throws IOException, InterruptedException {
for (int targetChannel : channelSelector.selectChannels(record, numChannels)) { //对于选中的channels
// serialize with corresponding serializer and send full buffer
RecordSerializer<T> serializer = serializers[targetChannel]; synchronized (serializer) { //加锁,一条channel的serializer不能并发写
SerializationResult result = serializer.addRecord(record);
while (result.isFullBuffer()) { //buffer,即memorySegment已满
Buffer buffer = serializer.getCurrentBuffer(); //将buffer取出 if (buffer != null) {
writeBuffer(buffer, targetChannel, serializer); //将buffer写入
} buffer = writer.getBufferProvider().requestBufferBlocking(); //申请新的buffer
result = serializer.setNextBuffer(buffer); //set新的buffer到serializer
}
}
}
}

writeBuffer

private void writeBuffer(
Buffer buffer,
int targetChannel,
RecordSerializer<T> serializer) throws IOException { try {
writer.writeBuffer(buffer, targetChannel);
}
finally {
serializer.clearCurrentBuffer();
}
}

可以看到写入和申请buffer都是通过ResultPartitionWriter

public final class ResultPartitionWriter implements EventListener<TaskEvent> {

    private final ResultPartition partition; //Result Partition

    private final TaskEventHandler taskEventHandler = new TaskEventHandler();

    public ResultPartitionWriter(ResultPartition partition) {
this.partition = partition;
} // ------------------------------------------------------------------------
// Attributes
// ------------------------------------------------------------------------ public ResultPartitionID getPartitionId() {
return partition.getPartitionId();
} public BufferProvider getBufferProvider() {
return partition.getBufferProvider();
} public int getNumberOfOutputChannels() {
return partition.getNumberOfSubpartitions();
} // ------------------------------------------------------------------------
// Data processing
// ------------------------------------------------------------------------ public void writeBuffer(Buffer buffer, int targetChannel) throws IOException {
partition.add(buffer, targetChannel);
}
}

而ResultPartitionWriter操作都通过ResultPartition, writerBuffer只是把buffer,add到partition

 

ResultPartition

初始化的过程,

task初始化ResultPartition,

// Produced intermediate result partitions
this.producedPartitions = new ResultPartition[partitions.size()];
this.writers = new ResultPartitionWriter[partitions.size()]; for (int i = 0; i < this.producedPartitions.length; i++) {
ResultPartitionDeploymentDescriptor desc = partitions.get(i);
ResultPartitionID partitionId = new ResultPartitionID(desc.getPartitionId(), executionId); this.producedPartitions[i] = new ResultPartition(
taskNameWithSubtaskAndId,
jobId,
partitionId,
desc.getPartitionType(),
desc.getEagerlyDeployConsumers(),
desc.getNumberOfSubpartitions(),
networkEnvironment.getPartitionManager(),
networkEnvironment.getPartitionConsumableNotifier(),
ioManager,
networkEnvironment.getDefaultIOMode()); this.writers[i] = new ResultPartitionWriter(this.producedPartitions[i]);
}

在task.run中先到NetworkEnvironment中register,

network.registerTask(this);

这里做的主要的工作是,创建等同于subPartiton大小的localBuffer,并register到ResultPartition

bufferPool = networkBufferPool.createBufferPool(partition.getNumberOfSubpartitions(), false); //创建LocalPool,注意Reqired的segment数目是Subpartitions的数目,即一个subP一个segment
partition.registerBufferPool(bufferPool); //把localPool注册到ResultPartition

 

所以,

writer.getBufferProvider().requestBufferBlocking();

就是调用localBufferPool.requestBuffer

如果有availableMemorySegments就直接用

如果没有,

if (numberOfRequestedMemorySegments < currentPoolSize) {
final MemorySegment segment = networkBufferPool.requestMemorySegment(); //如果还有可申请的,就去networkBufferPool申请 if (segment != null) {
numberOfRequestedMemorySegments++;
availableMemorySegments.add(segment); continue;
}
} if (askToRecycle) { //如果不能申请新的,让owner去试图释放
owner.releaseMemory(1);
} if (isBlocking) { //实在不行,blocking等2秒
availableMemorySegments.wait(2000);
}
public void releaseMemory(int toRelease) throws IOException {
for (ResultSubpartition subpartition : subpartitions) {
toRelease -= subpartition.releaseMemory(); //让subpartition去releaseMemory // Only release as much memory as needed
if (toRelease <= 0) {
break;
}
}
}

可以看到,如果在emit的时候,如果没有可用的segment,是会blocking等待的

对于pipelineSubpartition的release,什么都不会做,所以这里如果buffer没有被及时发送出去并回收,会不断的blocking等待

public int releaseMemory() {
// The pipelined subpartition does not react to memory release requests. The buffers will be
// recycled by the consuming task.
return 0;
}

 

ResultPartition.add
public void add(Buffer buffer, int subpartitionIndex) throws IOException {
boolean success = false; try {
checkInProduceState(); final ResultSubpartition subpartition = subpartitions[subpartitionIndex]; //取出index相应的ResultSubpartition synchronized (subpartition) {
success = subpartition.add(buffer); //把buffer add到ResultSubpartition // Update statistics
totalNumberOfBuffers++;
totalNumberOfBytes += buffer.getSize();
}
}
finally {
if (success) {
notifyPipelinedConsumers(); //通知ResultPartitionConsumableNotifier触发notifyPartitionConsumable
}
else {
buffer.recycle(); //失败,回收此buffer
}
}
}

 

对于PipelinedSubpartition,add逻辑就是加入buffer

/**
* A pipelined in-memory only subpartition, which can be consumed once.
*/
class PipelinedSubpartition extends ResultSubpartition { /**
* A data availability listener. Registered, when the consuming task is faster than the
* producing task.
*/
private NotificationListener registeredListener; //来数据后,通知consuming /** The read view to consume this subpartition. */
private PipelinedSubpartitionView readView; //read view /** All buffers of this subpartition. Access to the buffers is synchronized on this object. */
final ArrayDeque<Buffer> buffers = new ArrayDeque<Buffer>(); //buffer队列 PipelinedSubpartition(int index, ResultPartition parent) {
super(index, parent);
} @Override
public boolean add(Buffer buffer) {
checkNotNull(buffer); final NotificationListener listener; synchronized (buffers) {
if (isReleased || isFinished) {
return false;
} // Add the buffer and update the stats
buffers.add(buffer); //加入buffer队列
updateStatistics(buffer); // Get the listener...
listener = registeredListener;
registeredListener = null;
} // Notify the listener outside of the synchronized block
if (listener != null) {
listener.onNotification(); //触发listener
} return true;
}

 

 

NettyConnectionManager

@Override
public void start(ResultPartitionProvider partitionProvider, TaskEventDispatcher taskEventDispatcher, NetworkBufferPool networkbufferPool)
throws IOException {
PartitionRequestProtocol partitionRequestProtocol =
new PartitionRequestProtocol(partitionProvider, taskEventDispatcher, networkbufferPool); client.init(partitionRequestProtocol, bufferPool);
server.init(partitionRequestProtocol, bufferPool);
}

 

PartitionRequestProtocol

// +-------------------------------------------------------------------+
// | SERVER CHANNEL PIPELINE |
// | |
// | +----------+----------+ (3) write +----------------------+ |
// | | Queue of queues +----------->| Message encoder | |
// | +----------+----------+ +-----------+----------+ |
// | /|\ \|/ |
// | | (2) enqueue | |
// | +----------+----------+ | |
// | | Request handler | | |
// | +----------+----------+ | |
// | /|\ | |
// | | | |
// | +----------+----------+ | |
// | | Message decoder | | |
// | +----------+----------+ | |
// | /|\ | |
// | | | |
// | +----------+----------+ | |
// | | Frame decoder | | |
// | +----------+----------+ | |
// | /|\ | |
// +---------------+-----------------------------------+---------------+
// | | (1) client request \|/
// +---------------+-----------------------------------+---------------+
// | | | |
// | [ Socket.read() ] [ Socket.write() ] |
// | |
// | Netty Internal I/O Threads (Transport Implementation) |
// +-------------------------------------------------------------------+ @Override
public ChannelHandler[] getServerChannelHandlers() {
PartitionRequestQueue queueOfPartitionQueues = new PartitionRequestQueue();
PartitionRequestServerHandler serverHandler = new PartitionRequestServerHandler(
partitionProvider, taskEventDispatcher, queueOfPartitionQueues, networkbufferPool); return new ChannelHandler[] {
messageEncoder,
createFrameLengthDecoder(),
messageDecoder,
serverHandler,
queueOfPartitionQueues
};
}

 

PartitionRequestServerHandler

ServerHandler会分配大小至少为1的bufferpool,因为后面是false,意思是如果networkbufferpool有多余的segment,会分配进来

public void channelRegistered(ChannelHandlerContext ctx) throws Exception {
super.channelRegistered(ctx); bufferPool = networkBufferPool.createBufferPool(1, false);
}

 

protected void channelRead0(ChannelHandlerContext ctx, NettyMessage msg) throws Exception
PartitionRequest request = (PartitionRequest) msg;

LOG.debug("Read channel on {}: {}.", ctx.channel().localAddress(), request);

try {
ResultSubpartitionView subpartition =
partitionProvider.createSubpartitionView(
request.partitionId,
request.queueIndex,
bufferPool); outboundQueue.enqueue(subpartition, request.receiverId); //放入PartitionRequestQueue,进行发送
}

 

ResultPartitionManager继承自partitionProvider

调用ResultPartitionManager.createSubpartitionView

synchronized (registeredPartitions) {
final ResultPartition partition = registeredPartitions.get(partitionId.getProducerId(),
partitionId.getPartitionId()); return partition.createSubpartitionView(subpartitionIndex, bufferProvider);
}

 

 

ResultPartition

public ResultSubpartitionView createSubpartitionView(int index, BufferProvider bufferProvider) throws IOException {
int refCnt = pendingReferences.get(); checkState(refCnt != -1, "Partition released.");
checkState(refCnt > 0, "Partition not pinned."); ResultSubpartitionView readView = subpartitions[index].createReadView(bufferProvider); return readView;
}

pendingReferences的定义

/**
* The total number of references to subpartitions of this result. The result partition can be
* safely released, iff the reference count is zero. A reference count of -1 denotes that the
* result partition has been released.
*/
private final AtomicInteger pendingReferences = new AtomicInteger();

 

PipelinedSubpartitionView

class PipelinedSubpartitionView implements ResultSubpartitionView {

    /** The subpartition this view belongs to. */
private final PipelinedSubpartition parent; /** Flag indicating whether this view has been released. */
private AtomicBoolean isReleased = new AtomicBoolean(); PipelinedSubpartitionView(PipelinedSubpartition parent) {
this.parent = checkNotNull(parent);
} @Override
public Buffer getNextBuffer() {
synchronized (parent.buffers) {
return parent.buffers.poll(); //从parent,PipelinedSubpartition,的buffers里面直接poll
}
} @Override
public boolean registerListener(NotificationListener listener) {
return !isReleased.get() && parent.registerListener(listener);
} @Override
public void notifySubpartitionConsumed() { //消费完释放该Subpartition
releaseAllResources();
} @Override
public void releaseAllResources() {
if (isReleased.compareAndSet(false, true)) {
// The view doesn't hold any resources and the parent cannot be restarted. Therefore,
// it's OK to notify about consumption as well.
parent.onConsumedSubpartition();
}
}

 

PartitionRequestQueue在发送数据的时候,会调用getNextBuffer获取数据

发送完,即收到EndOfPartitionEvent后,调用notifySubpartitionConsumed

释放会调用到,PipelinedSubpartition.onConsumedSubpartition –>  ResultPartition.onConsumedSubpartition

void onConsumedSubpartition(int subpartitionIndex) {

    if (isReleased.get()) { //如果已经释放了
return;
} int refCnt = pendingReferences.decrementAndGet(); //一个Subpartition消费完,减1 if (refCnt == 0) { //如果所有subPartition消费完
partitionManager.onConsumedPartition(this); //通知partitionManager,release这个partition
}
}

 

PipelinedSubpartition中的buffer,何时被释放,放回localbufferpool

具体看下,PartitionRequestQueue的发送过程,

writeAndFlushNextMessageIfPossible
buffer = currentPartitionQueue.getNextBuffer();

BufferResponse resp = new BufferResponse(buffer, currentPartitionQueue.getSequenceNumber(), currentPartitionQueue.getReceiverId()); //将buffer封装成BufferResponse

if (!buffer.isBuffer() &&
EventSerializer.fromBuffer(buffer, getClass().getClassLoader()).getClass() == EndOfPartitionEvent.class) { //如果收到partition结束event currentPartitionQueue.notifySubpartitionConsumed(); //通知
currentPartitionQueue.releaseAllResources(); //释放所有资源
markAsReleased(currentPartitionQueue.getReceiverId()); currentPartitionQueue = null;
} channel.writeAndFlush(resp).addListener(writeListener); //真正的发送,WriteAndFlushNextMessageIfPossibleListener会再次调用writeAndFlushNextMessageIfPossible,反复读取
 

在BufferResponse中,

@Override
ByteBuf write(ByteBufAllocator allocator) throws IOException {
int length = 16 + 4 + 1 + 4 + buffer.getSize(); ByteBuf result = null;
try {
result = allocateBuffer(allocator, ID, length); receiverId.writeTo(result);
result.writeInt(sequenceNumber);
result.writeBoolean(buffer.isBuffer());
result.writeInt(buffer.getSize());
result.writeBytes(buffer.getNioBuffer()); return result;
}
catch (Throwable t) {
if (result != null) {
result.release();
} throw new IOException(t);
}
finally {
if (buffer != null) {
buffer.recycle(); //回收buffer
}
}
}

 

Buffer

public void recycle() {
synchronized (recycleLock) {
if (--referenceCount == 0) {
recycler.recycle(memorySegment);
}
}
}

 

LocalBufferPool

@Override
public void recycle(MemorySegment segment) {
synchronized (availableMemorySegments) {
if (isDestroyed || numberOfRequestedMemorySegments > currentPoolSize) {
returnMemorySegment(segment);
}
else {
EventListener<Buffer> listener = registeredListeners.poll(); if (listener == null) {
availableMemorySegments.add(segment); //没有listener,直接放回availableMemorySegments,下次使用
availableMemorySegments.notify();
}
else {
try {
listener.onEvent(new Buffer(segment, this)); //如果有listener,直接扔给listener处理
}
catch (Throwable ignored) {
availableMemorySegments.add(segment);
availableMemorySegments.notify();
}
}
}
}
}

Flink - ResultPartition的更多相关文章

  1. Flink Internals

    https://cwiki.apache.org/confluence/display/FLINK/Flink+Internals   Memory Management (Batch API) In ...

  2. 追源索骥:透过源码看懂Flink核心框架的执行流程

    li,ol.inline>li{display:inline-block;padding-right:5px;padding-left:5px}dl{margin-bottom:20px}dt, ...

  3. Flink的TaskManager启动(源码分析)

    通过启动脚本已经找到了TaskManager 的启动类org.apache.flink.runtime.taskexecutor.TaskManagerRunner 来看一下它的main方法中 最后被 ...

  4. Flink的Job启动TaskManager端(源码分析)

    前面说到了  Flink的JobManager启动(源码分析)  启动了TaskManager 然后  Flink的Job启动JobManager端(源码分析)  说到JobManager会将转化得到 ...

  5. Flink task之间的数据交换

    Flink中的数据交换是围绕着下面的原则设计的: 1.数据交换的控制流(即,为了启动交换而传递的消息)是由接收者发起的,就像原始的MapReduce一样. 2.用于数据交换的数据流,即通过电缆的实际数 ...

  6. Apache Flink - 架构和拓扑

    Flink结构: flink cli 解析本地环境配置,启动 ApplicationMaster 在 ApplicationMaster 中启动 JobManager 在 ApplicationMas ...

  7. 【转帖】两年Flink迁移之路:从standalone到on yarn,处理能力提升五倍

    两年Flink迁移之路:从standalone到on yarn,处理能力提升五倍 https://segmentfault.com/a/1190000020209179 flink 1.7k 次阅读 ...

  8. Flink整体执行流程

    以Flink源码中自带的WordCount为例,执行的入口从用户程序的execute()函数入手,execute()的源码如下: public JobExecutionResult execute(S ...

  9. Apache Flink 的迁移之路,2 年处理效果提升 5 倍

    一.背景与痛点 在 2017 年上半年以前,TalkingData 的 App Analytics 和 Game Analytics 两个产品,流式框架使用的是自研的 td-etl-framework ...

随机推荐

  1. Java四类八种数据类型

    http://www.cnblogs.com/simplefrog/archive/2012/07/15/2592011.html 第一类:逻辑型boolean 第二类:文本型char 第三类:整数型 ...

  2. Solr学习笔记——导入JSON数据

    1.导入JSON数据的方式有两种,一种是在web管理界面中导入,另一种是使用curl命令来导入 curl http://localhost:8983/solr/baikeperson/update/j ...

  3. Scala学习笔记——类型

    1.Option类型 Option类型可以有两种类型,一种是Some(x),一种是None对象 比如Scala的Map的get方法发现了指定键,返回Some(x),没有发现,返回None对象 2.列表 ...

  4. 13组合模式Composite

    一.什么是组合模式 Composite模式也叫组合模式,是构造型的设 计模式之一.通过递归手段来构造树形的对象结 构,并可以通过一个对象来访问整个对象树. 二.组合模式的结构 三.组合模式的角色和职责 ...

  5. 解决Win10 Virtualbox5.2.18桥接不能联网小记

    1.设备管理器,右键没添加过时硬件(如果没有,则在cmd中键入命令hdwwiz C:\windows\system32>hdwwiz),按照如下图操作 如此安装Microsoft KM-TEST ...

  6. python实现微信接口——itchat模块

    python实现微信接口——itchat模块 安装 sudo pip install itchat 登录 itchat.auto_login()  这种方法将会通过微信扫描二维码登录,但是这种登录的方 ...

  7. 【代码审计】XIAOCMS_后台database.php页面存在任意文件删除漏洞

      0x00 环境准备 XIAOCMS官网: http://www.xiaocms.com/ 网站源码版本:XiaoCms (发布时间:2014-12-29) 程序源码下载:http://www.xi ...

  8. [转]Git忽略规则及.gitignore规则不生效的解决办法

    在git中如果想忽略掉某个文件,不让这个文件提交到版本库中,可以使用修改根目录中 .gitignore 文件的方法(如无,则需自己手工建立此文件).这个文件每一行保存了一个匹配的规则例如: # 此为注 ...

  9. Qt编写软件运行时间记录(开源)

    在早期开发的软件中,尤其是初学者入门者写的软件,软件运行久了,难免遇到意外崩溃的时候,可是大部分的运行设备可能在现场客户那,需要记住每一次从软件启动后到软件意外关闭前的运行时间,需要记录的信息包括:编 ...

  10. css3整理--media

    media语法: <link rel="stylesheet" media="screen and (max-width: 600px)" href=&q ...