发送数据一般通过,collector.collect

public interface Collector<T> {

    /**
* Emits a record.
*
* @param record The record to collect.
*/
void collect(T record); /**
* Closes the collector. If any data was buffered, that data will be flushed.
*/
void close();
}

output继承,

public interface Output<T> extends Collector<T> {

    /**
* Emits a {@link Watermark} from an operator. This watermark is broadcast to all downstream
* operators.
*
* <p>A watermark specifies that no element with a timestamp lower or equal to the watermark
* timestamp will be emitted in the future.
*/
void emitWatermark(Watermark mark);
}

RecordWriterOutput

public class RecordWriterOutput<OUT> implements Output<StreamRecord<OUT>> {

    private StreamRecordWriter<SerializationDelegate<StreamElement>> recordWriter;

    private SerializationDelegate<StreamElement> serializationDelegate;

    @Override
public void collect(StreamRecord<OUT> record) {
serializationDelegate.setInstance(record); try {
recordWriter.emit(serializationDelegate);
}
catch (Exception e) {
throw new RuntimeException(e.getMessage(), e);
}
}

 

RecordWriter

public class RecordWriter<T extends IOReadableWritable> {

    protected final ResultPartitionWriter writer; //负责写入ResultPartition的writer

    private final ChannelSelector<T> channelSelector; //选择写入哪个channel,默认RoundRobinChannelSelector

    private final int numChannels;

    /** {@link RecordSerializer} per outgoing channel */
private final RecordSerializer<T>[] serializers; public RecordWriter(ResultPartitionWriter writer) {
this(writer, new RoundRobinChannelSelector<T>());
} @SuppressWarnings("unchecked")
public RecordWriter(ResultPartitionWriter writer, ChannelSelector<T> channelSelector) {
this.writer = writer;
this.channelSelector = channelSelector; this.numChannels = writer.getNumberOfOutputChannels(); //获取channel数 /**
* The runtime exposes a channel abstraction for the produced results
* (see {@link ChannelSelector}). Every channel has an independent
* serializer.
*/
this.serializers = new SpanningRecordSerializer[numChannels];
for (int i = 0; i < numChannels; i++) {
serializers[i] = new SpanningRecordSerializer<T>(); //为每个channel初始化Serializer
}
} public void emit(T record) throws IOException, InterruptedException {
for (int targetChannel : channelSelector.selectChannels(record, numChannels)) { //对于选中的channels
// serialize with corresponding serializer and send full buffer
RecordSerializer<T> serializer = serializers[targetChannel]; synchronized (serializer) { //加锁,一条channel的serializer不能并发写
SerializationResult result = serializer.addRecord(record);
while (result.isFullBuffer()) { //buffer,即memorySegment已满
Buffer buffer = serializer.getCurrentBuffer(); //将buffer取出 if (buffer != null) {
writeBuffer(buffer, targetChannel, serializer); //将buffer写入
} buffer = writer.getBufferProvider().requestBufferBlocking(); //申请新的buffer
result = serializer.setNextBuffer(buffer); //set新的buffer到serializer
}
}
}
}

writeBuffer

private void writeBuffer(
Buffer buffer,
int targetChannel,
RecordSerializer<T> serializer) throws IOException { try {
writer.writeBuffer(buffer, targetChannel);
}
finally {
serializer.clearCurrentBuffer();
}
}

可以看到写入和申请buffer都是通过ResultPartitionWriter

public final class ResultPartitionWriter implements EventListener<TaskEvent> {

    private final ResultPartition partition; //Result Partition

    private final TaskEventHandler taskEventHandler = new TaskEventHandler();

    public ResultPartitionWriter(ResultPartition partition) {
this.partition = partition;
} // ------------------------------------------------------------------------
// Attributes
// ------------------------------------------------------------------------ public ResultPartitionID getPartitionId() {
return partition.getPartitionId();
} public BufferProvider getBufferProvider() {
return partition.getBufferProvider();
} public int getNumberOfOutputChannels() {
return partition.getNumberOfSubpartitions();
} // ------------------------------------------------------------------------
// Data processing
// ------------------------------------------------------------------------ public void writeBuffer(Buffer buffer, int targetChannel) throws IOException {
partition.add(buffer, targetChannel);
}
}

而ResultPartitionWriter操作都通过ResultPartition, writerBuffer只是把buffer,add到partition

 

ResultPartition

初始化的过程,

task初始化ResultPartition,

// Produced intermediate result partitions
this.producedPartitions = new ResultPartition[partitions.size()];
this.writers = new ResultPartitionWriter[partitions.size()]; for (int i = 0; i < this.producedPartitions.length; i++) {
ResultPartitionDeploymentDescriptor desc = partitions.get(i);
ResultPartitionID partitionId = new ResultPartitionID(desc.getPartitionId(), executionId); this.producedPartitions[i] = new ResultPartition(
taskNameWithSubtaskAndId,
jobId,
partitionId,
desc.getPartitionType(),
desc.getEagerlyDeployConsumers(),
desc.getNumberOfSubpartitions(),
networkEnvironment.getPartitionManager(),
networkEnvironment.getPartitionConsumableNotifier(),
ioManager,
networkEnvironment.getDefaultIOMode()); this.writers[i] = new ResultPartitionWriter(this.producedPartitions[i]);
}

在task.run中先到NetworkEnvironment中register,

network.registerTask(this);

这里做的主要的工作是,创建等同于subPartiton大小的localBuffer,并register到ResultPartition

bufferPool = networkBufferPool.createBufferPool(partition.getNumberOfSubpartitions(), false); //创建LocalPool,注意Reqired的segment数目是Subpartitions的数目,即一个subP一个segment
partition.registerBufferPool(bufferPool); //把localPool注册到ResultPartition

 

所以,

writer.getBufferProvider().requestBufferBlocking();

就是调用localBufferPool.requestBuffer

如果有availableMemorySegments就直接用

如果没有,

if (numberOfRequestedMemorySegments < currentPoolSize) {
final MemorySegment segment = networkBufferPool.requestMemorySegment(); //如果还有可申请的,就去networkBufferPool申请 if (segment != null) {
numberOfRequestedMemorySegments++;
availableMemorySegments.add(segment); continue;
}
} if (askToRecycle) { //如果不能申请新的,让owner去试图释放
owner.releaseMemory(1);
} if (isBlocking) { //实在不行,blocking等2秒
availableMemorySegments.wait(2000);
}
public void releaseMemory(int toRelease) throws IOException {
for (ResultSubpartition subpartition : subpartitions) {
toRelease -= subpartition.releaseMemory(); //让subpartition去releaseMemory // Only release as much memory as needed
if (toRelease <= 0) {
break;
}
}
}

可以看到,如果在emit的时候,如果没有可用的segment,是会blocking等待的

对于pipelineSubpartition的release,什么都不会做,所以这里如果buffer没有被及时发送出去并回收,会不断的blocking等待

public int releaseMemory() {
// The pipelined subpartition does not react to memory release requests. The buffers will be
// recycled by the consuming task.
return 0;
}

 

ResultPartition.add
public void add(Buffer buffer, int subpartitionIndex) throws IOException {
boolean success = false; try {
checkInProduceState(); final ResultSubpartition subpartition = subpartitions[subpartitionIndex]; //取出index相应的ResultSubpartition synchronized (subpartition) {
success = subpartition.add(buffer); //把buffer add到ResultSubpartition // Update statistics
totalNumberOfBuffers++;
totalNumberOfBytes += buffer.getSize();
}
}
finally {
if (success) {
notifyPipelinedConsumers(); //通知ResultPartitionConsumableNotifier触发notifyPartitionConsumable
}
else {
buffer.recycle(); //失败,回收此buffer
}
}
}

 

对于PipelinedSubpartition,add逻辑就是加入buffer

/**
* A pipelined in-memory only subpartition, which can be consumed once.
*/
class PipelinedSubpartition extends ResultSubpartition { /**
* A data availability listener. Registered, when the consuming task is faster than the
* producing task.
*/
private NotificationListener registeredListener; //来数据后,通知consuming /** The read view to consume this subpartition. */
private PipelinedSubpartitionView readView; //read view /** All buffers of this subpartition. Access to the buffers is synchronized on this object. */
final ArrayDeque<Buffer> buffers = new ArrayDeque<Buffer>(); //buffer队列 PipelinedSubpartition(int index, ResultPartition parent) {
super(index, parent);
} @Override
public boolean add(Buffer buffer) {
checkNotNull(buffer); final NotificationListener listener; synchronized (buffers) {
if (isReleased || isFinished) {
return false;
} // Add the buffer and update the stats
buffers.add(buffer); //加入buffer队列
updateStatistics(buffer); // Get the listener...
listener = registeredListener;
registeredListener = null;
} // Notify the listener outside of the synchronized block
if (listener != null) {
listener.onNotification(); //触发listener
} return true;
}

 

 

NettyConnectionManager

@Override
public void start(ResultPartitionProvider partitionProvider, TaskEventDispatcher taskEventDispatcher, NetworkBufferPool networkbufferPool)
throws IOException {
PartitionRequestProtocol partitionRequestProtocol =
new PartitionRequestProtocol(partitionProvider, taskEventDispatcher, networkbufferPool); client.init(partitionRequestProtocol, bufferPool);
server.init(partitionRequestProtocol, bufferPool);
}

 

PartitionRequestProtocol

// +-------------------------------------------------------------------+
// | SERVER CHANNEL PIPELINE |
// | |
// | +----------+----------+ (3) write +----------------------+ |
// | | Queue of queues +----------->| Message encoder | |
// | +----------+----------+ +-----------+----------+ |
// | /|\ \|/ |
// | | (2) enqueue | |
// | +----------+----------+ | |
// | | Request handler | | |
// | +----------+----------+ | |
// | /|\ | |
// | | | |
// | +----------+----------+ | |
// | | Message decoder | | |
// | +----------+----------+ | |
// | /|\ | |
// | | | |
// | +----------+----------+ | |
// | | Frame decoder | | |
// | +----------+----------+ | |
// | /|\ | |
// +---------------+-----------------------------------+---------------+
// | | (1) client request \|/
// +---------------+-----------------------------------+---------------+
// | | | |
// | [ Socket.read() ] [ Socket.write() ] |
// | |
// | Netty Internal I/O Threads (Transport Implementation) |
// +-------------------------------------------------------------------+ @Override
public ChannelHandler[] getServerChannelHandlers() {
PartitionRequestQueue queueOfPartitionQueues = new PartitionRequestQueue();
PartitionRequestServerHandler serverHandler = new PartitionRequestServerHandler(
partitionProvider, taskEventDispatcher, queueOfPartitionQueues, networkbufferPool); return new ChannelHandler[] {
messageEncoder,
createFrameLengthDecoder(),
messageDecoder,
serverHandler,
queueOfPartitionQueues
};
}

 

PartitionRequestServerHandler

ServerHandler会分配大小至少为1的bufferpool,因为后面是false,意思是如果networkbufferpool有多余的segment,会分配进来

public void channelRegistered(ChannelHandlerContext ctx) throws Exception {
super.channelRegistered(ctx); bufferPool = networkBufferPool.createBufferPool(1, false);
}

 

protected void channelRead0(ChannelHandlerContext ctx, NettyMessage msg) throws Exception
PartitionRequest request = (PartitionRequest) msg;

LOG.debug("Read channel on {}: {}.", ctx.channel().localAddress(), request);

try {
ResultSubpartitionView subpartition =
partitionProvider.createSubpartitionView(
request.partitionId,
request.queueIndex,
bufferPool); outboundQueue.enqueue(subpartition, request.receiverId); //放入PartitionRequestQueue,进行发送
}

 

ResultPartitionManager继承自partitionProvider

调用ResultPartitionManager.createSubpartitionView

synchronized (registeredPartitions) {
final ResultPartition partition = registeredPartitions.get(partitionId.getProducerId(),
partitionId.getPartitionId()); return partition.createSubpartitionView(subpartitionIndex, bufferProvider);
}

 

 

ResultPartition

public ResultSubpartitionView createSubpartitionView(int index, BufferProvider bufferProvider) throws IOException {
int refCnt = pendingReferences.get(); checkState(refCnt != -1, "Partition released.");
checkState(refCnt > 0, "Partition not pinned."); ResultSubpartitionView readView = subpartitions[index].createReadView(bufferProvider); return readView;
}

pendingReferences的定义

/**
* The total number of references to subpartitions of this result. The result partition can be
* safely released, iff the reference count is zero. A reference count of -1 denotes that the
* result partition has been released.
*/
private final AtomicInteger pendingReferences = new AtomicInteger();

 

PipelinedSubpartitionView

class PipelinedSubpartitionView implements ResultSubpartitionView {

    /** The subpartition this view belongs to. */
private final PipelinedSubpartition parent; /** Flag indicating whether this view has been released. */
private AtomicBoolean isReleased = new AtomicBoolean(); PipelinedSubpartitionView(PipelinedSubpartition parent) {
this.parent = checkNotNull(parent);
} @Override
public Buffer getNextBuffer() {
synchronized (parent.buffers) {
return parent.buffers.poll(); //从parent,PipelinedSubpartition,的buffers里面直接poll
}
} @Override
public boolean registerListener(NotificationListener listener) {
return !isReleased.get() && parent.registerListener(listener);
} @Override
public void notifySubpartitionConsumed() { //消费完释放该Subpartition
releaseAllResources();
} @Override
public void releaseAllResources() {
if (isReleased.compareAndSet(false, true)) {
// The view doesn't hold any resources and the parent cannot be restarted. Therefore,
// it's OK to notify about consumption as well.
parent.onConsumedSubpartition();
}
}

 

PartitionRequestQueue在发送数据的时候,会调用getNextBuffer获取数据

发送完,即收到EndOfPartitionEvent后,调用notifySubpartitionConsumed

释放会调用到,PipelinedSubpartition.onConsumedSubpartition –>  ResultPartition.onConsumedSubpartition

void onConsumedSubpartition(int subpartitionIndex) {

    if (isReleased.get()) { //如果已经释放了
return;
} int refCnt = pendingReferences.decrementAndGet(); //一个Subpartition消费完,减1 if (refCnt == 0) { //如果所有subPartition消费完
partitionManager.onConsumedPartition(this); //通知partitionManager,release这个partition
}
}

 

PipelinedSubpartition中的buffer,何时被释放,放回localbufferpool

具体看下,PartitionRequestQueue的发送过程,

writeAndFlushNextMessageIfPossible
buffer = currentPartitionQueue.getNextBuffer();

BufferResponse resp = new BufferResponse(buffer, currentPartitionQueue.getSequenceNumber(), currentPartitionQueue.getReceiverId()); //将buffer封装成BufferResponse

if (!buffer.isBuffer() &&
EventSerializer.fromBuffer(buffer, getClass().getClassLoader()).getClass() == EndOfPartitionEvent.class) { //如果收到partition结束event currentPartitionQueue.notifySubpartitionConsumed(); //通知
currentPartitionQueue.releaseAllResources(); //释放所有资源
markAsReleased(currentPartitionQueue.getReceiverId()); currentPartitionQueue = null;
} channel.writeAndFlush(resp).addListener(writeListener); //真正的发送,WriteAndFlushNextMessageIfPossibleListener会再次调用writeAndFlushNextMessageIfPossible,反复读取
 

在BufferResponse中,

@Override
ByteBuf write(ByteBufAllocator allocator) throws IOException {
int length = 16 + 4 + 1 + 4 + buffer.getSize(); ByteBuf result = null;
try {
result = allocateBuffer(allocator, ID, length); receiverId.writeTo(result);
result.writeInt(sequenceNumber);
result.writeBoolean(buffer.isBuffer());
result.writeInt(buffer.getSize());
result.writeBytes(buffer.getNioBuffer()); return result;
}
catch (Throwable t) {
if (result != null) {
result.release();
} throw new IOException(t);
}
finally {
if (buffer != null) {
buffer.recycle(); //回收buffer
}
}
}

 

Buffer

public void recycle() {
synchronized (recycleLock) {
if (--referenceCount == 0) {
recycler.recycle(memorySegment);
}
}
}

 

LocalBufferPool

@Override
public void recycle(MemorySegment segment) {
synchronized (availableMemorySegments) {
if (isDestroyed || numberOfRequestedMemorySegments > currentPoolSize) {
returnMemorySegment(segment);
}
else {
EventListener<Buffer> listener = registeredListeners.poll(); if (listener == null) {
availableMemorySegments.add(segment); //没有listener,直接放回availableMemorySegments,下次使用
availableMemorySegments.notify();
}
else {
try {
listener.onEvent(new Buffer(segment, this)); //如果有listener,直接扔给listener处理
}
catch (Throwable ignored) {
availableMemorySegments.add(segment);
availableMemorySegments.notify();
}
}
}
}
}

Flink - ResultPartition的更多相关文章

  1. Flink Internals

    https://cwiki.apache.org/confluence/display/FLINK/Flink+Internals   Memory Management (Batch API) In ...

  2. 追源索骥:透过源码看懂Flink核心框架的执行流程

    li,ol.inline>li{display:inline-block;padding-right:5px;padding-left:5px}dl{margin-bottom:20px}dt, ...

  3. Flink的TaskManager启动(源码分析)

    通过启动脚本已经找到了TaskManager 的启动类org.apache.flink.runtime.taskexecutor.TaskManagerRunner 来看一下它的main方法中 最后被 ...

  4. Flink的Job启动TaskManager端(源码分析)

    前面说到了  Flink的JobManager启动(源码分析)  启动了TaskManager 然后  Flink的Job启动JobManager端(源码分析)  说到JobManager会将转化得到 ...

  5. Flink task之间的数据交换

    Flink中的数据交换是围绕着下面的原则设计的: 1.数据交换的控制流(即,为了启动交换而传递的消息)是由接收者发起的,就像原始的MapReduce一样. 2.用于数据交换的数据流,即通过电缆的实际数 ...

  6. Apache Flink - 架构和拓扑

    Flink结构: flink cli 解析本地环境配置,启动 ApplicationMaster 在 ApplicationMaster 中启动 JobManager 在 ApplicationMas ...

  7. 【转帖】两年Flink迁移之路:从standalone到on yarn,处理能力提升五倍

    两年Flink迁移之路:从standalone到on yarn,处理能力提升五倍 https://segmentfault.com/a/1190000020209179 flink 1.7k 次阅读 ...

  8. Flink整体执行流程

    以Flink源码中自带的WordCount为例,执行的入口从用户程序的execute()函数入手,execute()的源码如下: public JobExecutionResult execute(S ...

  9. Apache Flink 的迁移之路,2 年处理效果提升 5 倍

    一.背景与痛点 在 2017 年上半年以前,TalkingData 的 App Analytics 和 Game Analytics 两个产品,流式框架使用的是自研的 td-etl-framework ...

随机推荐

  1. idea 添加项目到svn

    version control 改变项目为svn管理 如图  分享 share dic 添加新地址 share 剩下的 就是设置忽略目录 提交

  2. 2. Attention Is All You Need(Transformer)算法原理解析

    1. 语言模型 2. Attention Is All You Need(Transformer)算法原理解析 3. ELMo算法原理解析 4. OpenAI GPT算法原理解析 5. BERT算法原 ...

  3. servlet中web.xml配置详解

    Web.xml常用元素 <web-app> 所有部署描述符文件的顶层(根)元素 <display-name></display-name>定义了WEB应用的名字 & ...

  4. 【iCore1S 双核心板_ARM】例程十三:SDIO实验——读取SD卡信息

    实验现象: 核心代码: int main(void) { /* USER CODE BEGIN 1 */ HAL_SD_TransferStateTypedef State; /* USER CODE ...

  5. 前端项目微金所1 - bootstrap模板,Compatible(兼容),Viewport(视口),条件注释,第三方依赖,MediaQuery媒体查询

    前端项目微金所笔记1 基础的bootstrap模板 <!DOCTYPE html> <html lang="en"> <head> <me ...

  6. 重新入坑-IntelliJ Maven

    写Restful的服务,使用IntelliJ+maven,发现有几个依赖总是没法配置好.通过检查POM.xml,发现犯了错误,<dependency>写到了<dependencies ...

  7. 关于Unity的两种调试方法

    Unity的两种调试方法 1.Debug.Log()输出语句调试,平时经常用这个 2.把MonoDevelop和Unity进行连接后断点调试 先把编辑器选择为MonoDevelop,Edit----& ...

  8. Linux磁盘概念及其管理工具fdisk

    Linux磁盘概念及其管理工具fdisk [日期:2016-08-27] 来源:Linux社区  作者:chawan [字体:大 中 小]   引言:冯诺依曼体系中的数据存储器就是我们常说的磁盘或硬盘 ...

  9. for循环 while循环 break跳出循环 continue结束本次循环 exit退出整个脚本

  10. Zookeeper安装使用及JavaAPI使用

    一.Zookeeper单击模式安装及使用 1.系统环境 2.导入JDK和Zookeeper包 1).使用SecureCRT工具打开SFTP连接,直接拖拽,到当前用户文件夹下,然后使用mv命令(mv 文 ...