Flink Flow


1. Create environment for stream computing
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.getConfig().disableSysoutLogging();
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(4, 10000));
env.enableCheckpointing(5000); // create a checkpoint every 5 seconds
env.getConfig().setGlobalJobParameters(parameterTool); // make parameters available in the web interface
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
public static StreamExecutionEnvironment getExecutionEnvironment() {
if (contextEnvironmentFactory != null) {
return contextEnvironmentFactory.createExecutionEnvironment();
}
// because the streaming project depends on "flink-clients" (and not the other way around)
// we currently need to intercept the data set environment and create a dependent stream env.
// this should be fixed once we rework the project dependencies
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
if (env instanceof ContextEnvironment) {
return new StreamContextEnvironment((ContextEnvironment) env);
} else if (env instanceof OptimizerPlanEnvironment || env instanceof PreviewPlanEnvironment) {
return new StreamPlanEnvironment(env);
} else {
return createLocalEnvironment();
}
}
2. Now we need to add the data source for further computing
DataStream<KafkaEvent> input = env
.addSource( new FlinkKafkaConsumer010<>(
parameterTool.getRequired("input-topic"),
new KafkaEventSchema(),
parameterTool.getProperties()).assignTimestampsAndWatermarks(new CustomWatermarkExtractor()))
.keyBy("word")
.map(new RollingAdditionMapper());
public <OUT> DataStreamSource<OUT> addSource(SourceFunction<OUT> function) {
return addSource(function, "Custom Source");
}
@SuppressWarnings("unchecked")
public <OUT> DataStreamSource<OUT> addSource(SourceFunction<OUT> function, String sourceName, TypeInformation<OUT> typeInfo) {
if (typeInfo == null) {
if (function instanceof ResultTypeQueryable) {
typeInfo = ((ResultTypeQueryable<OUT>) function).getProducedType();
} else {
try {
typeInfo = TypeExtractor.createTypeInfo(
SourceFunction.class,
function.getClass(), 0, null, null);
} catch (final InvalidTypesException e) {
typeInfo = (TypeInformation<OUT>) new MissingTypeInfo(sourceName, e);
}
}
}
boolean isParallel = function instanceof ParallelSourceFunction;
clean(function);
StreamSource<OUT, ?> sourceOperator;
if (function instanceof StoppableFunction) {
sourceOperator = new StoppableStreamSource<>(cast2StoppableSourceFunction(function));
} else {
sourceOperator = new StreamSource<>(function);
}
return new DataStreamSource<>(this, typeInfo, sourceOperator, isParallel, sourceName);
}
public <R> SingleOutputStreamOperator<R> map(MapFunction<T, R> mapper) {
TypeInformation<R> outType = TypeExtractor.getMapReturnTypes(clean(mapper), getType(),
Utils.getCallLocationName(), true);
return transform("Map", outType, new StreamMap<>(clean(mapper)));
}
public <R> SingleOutputStreamOperator<R> transform(String operatorName, TypeInformation<R> outTypeInfo, OneInputStreamOperator<T, R> operator) {
// read the output type of the input Transform to coax out errors about MissingTypeInfo
transformation.getOutputType();
OneInputTransformation<T, R> resultTransform = new OneInputTransformation<>(
this.transformation,
operatorName,
operator,
outTypeInfo,
environment.getParallelism());
@SuppressWarnings({ "unchecked", "rawtypes" })
SingleOutputStreamOperator<R> returnStream = new SingleOutputStreamOperator(environment, resultTransform);
getExecutionEnvironment().addOperator(resultTransform);
return returnStream;
}
@Internal
public void addOperator(StreamTransformation<?> transformation) {
Preconditions.checkNotNull(transformation, "transformation must not be null.");
this.transformations.add(transformation);
}
protected final List<StreamTransformation<?>> transformations = new ArrayList<>();
public KeyedStream<T, Tuple> keyBy(String... fields) {
return keyBy(new Keys.ExpressionKeys<>(fields, getType()));
}
private KeyedStream<T, Tuple> keyBy(Keys<T> keys) {
return new KeyedStream<>(this, clean(KeySelectorUtil.getSelectorForKeys(keys,
getType(), getExecutionConfig())));
}
3. The data from data source will be streamed into Flink Distributed Computing Runtime and the computed result will be transfered to data Sink.
input.addSink( new FlinkKafkaProducer010<>(
parameterTool.getRequired("output-topic"),
new KafkaEventSchema(),
parameterTool.getProperties()));
public DataStreamSink<T> addSink(SinkFunction<T> sinkFunction) {
// read the output type of the input Transform to coax out errors about MissingTypeInfo
transformation.getOutputType();
// configure the type if needed
if (sinkFunction instanceof InputTypeConfigurable) {
((InputTypeConfigurable) sinkFunction).setInputType(getType(), getExecutionConfig());
}
StreamSink<T> sinkOperator = new StreamSink<>(clean(sinkFunction));
DataStreamSink<T> sink = new DataStreamSink<>(this, sinkOperator);
getExecutionEnvironment().addOperator(sink.getTransformation());
return sink;
}
@Internal
public void addOperator(StreamTransformation<?> transformation) {
Preconditions.checkNotNull(transformation, "transformation must not be null.");
this.transformations.add(transformation);
}
protected final List<StreamTransformation<?>> transformations = new ArrayList<>();
4. The last step is to start executing.
env.execute("Kafka 0.10 Example");
The mapper computing template is defined as blow.
private static class RollingAdditionMapper extends RichMapFunction<KafkaEvent, KafkaEvent> {
private static final long serialVersionUID = 1180234853172462378L;
private transient ValueState<Integer> currentTotalCount;
@Override
public KafkaEvent map(KafkaEvent event) throws Exception {
Integer totalCount = currentTotalCount.value();
if (totalCount == null) {
totalCount = 0;
}
totalCount += event.getFrequency();
currentTotalCount.update(totalCount);
return new KafkaEvent(event.getWord(), totalCount, event.getTimestamp());
}
@Override
public void open(Configuration parameters) throws Exception {
currentTotalCount = getRuntimeContext().getState(new ValueStateDescriptor<>("currentTotalCount", Integer.class));
}
}






http://www.debugrun.com/a/LjK8Nni.html
Flink Flow的更多相关文章
- 在 Cloudera Data Flow 上运行你的第一个 Flink 例子
文档编写目的 Cloudera Data Flow(CDF) 作为 Cloudera 一个独立的产品单元,围绕着实时数据采集,实时数据处理和实时数据分析有多个不同的功能模块,如下图所示: 图中 4 个 ...
- Flink Internals
https://cwiki.apache.org/confluence/display/FLINK/Flink+Internals Memory Management (Batch API) In ...
- Peeking into Apache Flink's Engine Room
http://flink.apache.org/news/2015/03/13/peeking-into-Apache-Flinks-Engine-Room.html Join Processin ...
- Flink - Juggling with Bits and Bytes
http://www.36dsj.com/archives/33650 http://flink.apache.org/news/2015/05/11/Juggling-with-Bits-and-B ...
- Flink资料(3)-- Flink一般架构和处理模型
Flink一般架构和处理模型 本文翻译自General Architecture and Process Model ----------------------------------------- ...
- Flink资料(2)-- 数据流容错机制
数据流容错机制 该文档翻译自Data Streaming Fault Tolerance,文档描述flink在流式数据流图上的容错机制. ------------------------------- ...
- Flink架构、原理与部署测试
Apache Flink是一个面向分布式数据流处理和批量数据处理的开源计算平台,它能够基于同一个Flink运行时,提供支持流处理和批处理两种类型应用的功能. 现有的开源计算方案,会把流处理和批处理作为 ...
- [Note] Apache Flink 的数据流编程模型
Apache Flink 的数据流编程模型 抽象层次 Flink 为开发流式应用和批式应用设计了不同的抽象层次 状态化的流 抽象层次的最底层是状态化的流,它通过 ProcessFunction 嵌入到 ...
- Apache Flink 分布式执行
Flink 的分布式执行过程包含两个重要的角色,master 和 worker,参与 Flink 程序执行的有多个进程,包括 Job Manager,Task Manager 以及 Job Clien ...
随机推荐
- 初识express
初识Express 1.简介: express是基于Nodejs平台的快速,开放,极简的web开发框架 2.安装 npm install express --save 3.Hello world: c ...
- 【算法笔记】B1033 旧键盘打字
1033 旧键盘打字 (20 分) 旧键盘上坏了几个键,于是在敲一段文字的时候,对应的字符就不会出现.现在给出应该输入的一段文字.以及坏掉的那些键,打出的结果文字会是怎样? 输入格式: 输入在 2 行 ...
- vue 打印
vue 方法 第一种方法:通过npm 安装插件 1,安装 npm install vue-print-nb --save 2,引入 安装好以后在main.js文件中引入 import Print ...
- python量化分析系列之---5行代码实现1秒内获取一次所有股票的实时分笔数据
python量化分析系列之---5行代码实现1秒内获取一次所有股票的实时分笔数据 最近工作太忙了,有一个星期没有更新文章了,本来这一期打算分享一些对龙虎榜数据的分析结果的,现在还没有把数据内的价值很好 ...
- linux CentOS中文输入法安装及设置
摘自百度空间,不错,一次搞定! centos 6.3用yum安装中文输入法 1.需要root权限,所以要用root登录 ,或su root 2.yum install "@Chinese S ...
- 再探php
1. 如何打开一个php文件? 启动本地服务器和MySQL, 然后将php文件放在xampp -> htdocs 目录下(可以是子目录.孙子目录 ......),打开浏览器,在浏览器中输入 l ...
- Struts2注解详解
一,引入支持Struts2支持注解开发jar包: struts2-convention-plugin-2.1.8.1.jar(支持Struts2框架注解开发的jar包) 二,Struts2使用注解开发 ...
- ES6学习准备
ES6学习准备 选择运行环境 ES6的语法,nodeJs.浏览器不一定都支持,不同版本的支持情况不一样.在学习过程中,如何确定是自己写的代码有问题,还是运行环境不支持呢? 首先,浏览器端一般支持的特性 ...
- hibernate 返回自定义对象
关键代码老是忘记 setResultTransformer(Transformers.aliasToBean(LabourResult.class)) 代码用例: public List<Lab ...
- [转]OLAP的12条准则
OLAP的12条准则 Multidimensional conceptual view OLAP模型必须提供多维概念视图 User-analysts would view an enterprise ...