内容:所有原创文章分类汇总及配套源码,涉及Java、Docker、Kubernetes、DevOPS等; 系列文章链接本篇概览
参考文章梳理流程
源码下载如果您不想写代码,整个系列的源码可在GitHub下载到,地址和链接信息如下表所示(https://github.com/zq2599/blog_demos):
这个git项目中有多个文件夹,本章的应用在flinkstudy文件夹下,如下图红框所示: CoProcessFunction的子类
package com.bolingcavalry.coprocessfunction; import com.bolingcavalry.Utils; import org.apache.flink.api.common.state.ValueState; import org.apache.flink.api.common.state.ValueStateDescriptor; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.configuration.Configuration; import org.apache.flink.streaming.api.functions.co.CoProcessFunction; import org.apache.flink.util.Collector; import org.apache.flink.util.OutputTag; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * 实现双流业务逻辑的功能类 */ public class ExecuteWithTimeoutCoProcessFunction extends CoProcessFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>> { private static final Logger logger = LoggerFactory.getLogger(ExecuteWithTimeoutCoProcessFunction.class); /** * 等待时间 */ private static final long WAIT_TIME = 10000L; public ExecuteWithTimeoutCoProcessFunction(OutputTag<String> source1SideOutput, OutputTag<String> source2SideOutput) { super(); this.source1SideOutput = source1SideOutput; this.source2SideOutput = source2SideOutput; } private OutputTag<String> source1SideOutput; private OutputTag<String> source2SideOutput; // 某个key在processElement1中存入的状态 private ValueState<Integer> state1; // 某个key在processElement2中存入的状态 private ValueState<Integer> state2; // 如果创建了定时器,就在状态中保存定时器的key private ValueState<Long> timerState; // onTimer中拿不到当前key,只能提前保存在状态中(KeyedProcessFunction的OnTimerContext有API可以取到,但是CoProcessFunction的OnTimerContext却没有) private ValueState<String> currentKeyState; @Override public void open(Configuration parameters) throws Exception { // 初始化状态 state1 = getRuntimeContext().getState(new ValueStateDescriptor<>("myState1", Integer.class)); state2 = getRuntimeContext().getState(new ValueStateDescriptor<>("myState2", Integer.class)); timerState = getRuntimeContext().getState(new ValueStateDescriptor<>("timerState", Long.class)); currentKeyState = getRuntimeContext().getState(new ValueStateDescriptor<>("currentKeyState", String.class)); } /** * 所有状态都清理掉 */ private void clearAllState() { state1.clear(); state2.clear(); currentKeyState.clear(); timerState.clear(); } @Override public void processElement1(Tuple2<String, Integer> value, Context ctx, Collector<Tuple2<String, Integer>> out) throws Exception { logger.info("processElement1:处理元素1:{}", value); String key = value.f0; Integer value2 = state2.value(); // value2为空,就表示processElement2还没有处理或这个key, // 这时候就把value1保存起来 if(null==value2) { logger.info("processElement1:2号流还未收到过[{}],把1号流收到的值[{}]保存起来", key, value.f1); state1.update(value.f1); currentKeyState.update(key); // 开始10秒的定时器,10秒后会进入 long timerKey = ctx.timestamp() + WAIT_TIME; ctx.timerService().registerProcessingTimeTimer(timerKey); // 保存定时器的key timerState.update(timerKey); logger.info("processElement1:创建定时器[{}],等待2号流接收数据", Utils.time(timerKey)); } else { logger.info("processElement1:2号流收到过[{}],值是[{}],现在把两个值相加后输出", key, value2); // 输出一个新的元素到下游节点 out.collect(new Tuple2<>(key, value.f1 + value2)); // 删除定时器(这个定时器应该是processElement2创建的) long timerKey = timerState.value(); logger.info("processElement1:[{}]的新元素已输出到下游,删除定时器[{}]", key, Utils.time(timerKey)); ctx.timerService().deleteProcessingTimeTimer(timerKey); clearAllState(); } } @Override public void processElement2(Tuple2<String, Integer> value, Context ctx, Collector<Tuple2<String, Integer>> out) throws Exception { logger.info("processElement2:处理元素2:{}", value); String key = value.f0; Integer value1 = state1.value(); // value1为空,就表示processElement1还没有处理或这个key, // 这时候就把value2保存起来 if(null==value1) { logger.info("processElement2:1号流还未收到过[{}],把2号流收到的值[{}]保存起来", key, value.f1); state2.update(value.f1); currentKeyState.update(key); // 开始10秒的定时器,10秒后会进入 long timerKey = ctx.timestamp() + WAIT_TIME; ctx.timerService().registerProcessingTimeTimer(timerKey); // 保存定时器的key timerState.update(timerKey); logger.info("processElement2:创建定时器[{}],等待1号流接收数据", Utils.time(timerKey)); } else { logger.info("processElement2:1号流收到过[{}],值是[{}],现在把两个值相加后输出", key, value1); // 输出一个新的元素到下游节点 out.collect(new Tuple2<>(key, value.f1 + value1)); // 删除定时器(这个定时器应该是processElement1创建的) long timerKey = timerState.value(); logger.info("processElement2:[{}]的新元素已输出到下游,删除定时器[{}]", key, Utils.time(timerKey)); ctx.timerService().deleteProcessingTimeTimer(timerKey); clearAllState(); } } @Override public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple2<String, Integer>> out) throws Exception { super.onTimer(timestamp, ctx, out); String key = currentKeyState.value(); // 定时器被触发,意味着此key只在一个中出现过 logger.info("[{}]的定时器[{}]被触发了", key, Utils.time(timestamp)); Integer value1 = state1.value(); Integer value2 = state2.value(); if(null!=value1) { logger.info("只有1号流收到过[{}],值为[{}]", key, value1); // 侧输出 ctx.output(source1SideOutput, "source1 side, key [" + key+ "], value [" + value1 + "]"); } if(null!=value2) { logger.info("只有2号流收到过[{}],值为[{}]", key, value2); // 侧输出 ctx.output(source2SideOutput, "source2 side, key [" + key+ "], value [" + value2 + "]"); } clearAllState(); } }
业务执行类AddTwoSourceValueWithTimeout
package com.bolingcavalry.coprocessfunction; import com.bolingcavalry.Utils; import org.apache.flink.api.java.tuple.Tuple; import org.apache.flink.api.java.tuple.Tuple2; import org.apache.flink.streaming.api.datastream.KeyedStream; import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator; import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment; import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks; import org.apache.flink.streaming.api.functions.co.CoProcessFunction; import org.apache.flink.streaming.api.watermark.Watermark; import org.apache.flink.util.OutputTag; import org.slf4j.Logger; import org.slf4j.LoggerFactory; /** * @author will * @email zq2599@gmail.com * @date 2020-11-11 09:48 * @description 将两个流中相通key的value相加,当key在一个流中出现后, * 会在有限时间内等待它在另一个流中出现,如果超过等待时间任未出现就在旁路输出 */ public class AddTwoSourceValueWithTimeout extends AbstractCoProcessFunctionExecutor { private static final Logger logger = LoggerFactory.getLogger(AddTwoSourceValueWithTimeout.class); // 假设aaa流入1号源后,在2号源超过10秒没有收到aaa,那么1号源的aaa就会流入source1SideOutput final OutputTag<String> source1SideOutput = new OutputTag<String>("source1-sideoutput"){}; // 假设aaa流入2号源后,如果1号源超过10秒没有收到aaa,那么2号源的aaa就会流入source2SideOutput final OutputTag<String> source2SideOutput = new OutputTag<String>("source2-sideoutput"){}; /** * 重写父类的方法,保持父类逻辑不变,仅增加了时间戳分配器,向元素中加入时间戳 * @param port * @return */ @Override protected KeyedStream<Tuple2<String, Integer>, Tuple> buildStreamFromSocket(StreamExecutionEnvironment env, int port) { return env // 监听端口 .socketTextStream("localhost", port) // 得到的字符串"aaa,3"转成Tuple2实例,f0="aaa",f1=3 .map(new WordCountMap()) // 设置时间戳分配器,用当前时间作为时间戳 .assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple2<String, Integer>>() { @Override public long extractTimestamp(Tuple2<String, Integer> element, long previousElementTimestamp) { long timestamp = System.currentTimeMillis(); logger.info("添加时间戳,值:{},时间戳:{}", element, Utils.time(timestamp)); // 使用当前系统时间作为时间戳 return timestamp; } @Override public Watermark getCurrentWatermark() { // 本例不需要watermark,返回null return null; } }) // 将单词作为key分区 .keyBy(0); } @Override protected CoProcessFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>> getCoProcessFunctionInstance() { return new ExecuteWithTimeoutCoProcessFunction(source1SideOutput, source2SideOutput); } @Override protected void doSideOutput(SingleOutputStreamOperator<Tuple2<String, Integer>> mainDataStream) { // 两个侧输出都直接打印 mainDataStream.getSideOutput(source1SideOutput).print(); mainDataStream.getSideOutput(source2SideOutput).print(); } public static void main(String[] args) throws Exception { new AddTwoSourceValueWithTimeout().execute(); } }
验证(不超时的操作)
18:18:10,472 INFO AddTwoSourceValueWithTimeout - 添加时间戳,值:(aaa,1),时间戳:2020-11-12 06:18:10 18:18:10,550 INFO ExecuteWithTimeoutCoProcessFunction - processElement1:处理元素1:(aaa,1) 18:18:10,550 INFO ExecuteWithTimeoutCoProcessFunction - processElement1:2号流还未收到过[aaa],把1号流收到的值[1]保存起来 18:18:10,553 INFO ExecuteWithTimeoutCoProcessFunction - processElement1:创建定时器[2020-11-12 06:18:20],等待2号流接收数据
18:18:15,813 INFO AddTwoSourceValueWithTimeout - 添加时间戳,值:(aaa,2),时间戳:2020-11-12 06:18:15 18:18:15,887 INFO ExecuteWithTimeoutCoProcessFunction - processElement2:处理元素2:(aaa,2) 18:18:15,887 INFO ExecuteWithTimeoutCoProcessFunction - processElement2:1号流收到过[aaa],值是[1],现在把两个值相加后输出 (aaa,3) 18:18:15,888 INFO ExecuteWithTimeoutCoProcessFunction - processElement2:[aaa]的新元素已输出到下游,删除定时器[2020-11-12 06:18:20] 验证(超时的操作)
18:23:37,393 INFO AddTwoSourceValueWithTimeout - 添加时间戳,值:(aaa,1),时间戳:2020-11-12 06:23:37 18:23:37,417 INFO ExecuteWithTimeoutCoProcessFunction - processElement1:处理元素1:(aaa,1) 18:23:37,417 INFO ExecuteWithTimeoutCoProcessFunction - processElement1:2号流还未收到过[aaa],把1号流收到的值[1]保存起来 18:23:37,417 INFO ExecuteWithTimeoutCoProcessFunction - processElement1:创建定时器[2020-11-12 06:23:47],等待2号流接收数据 18:23:47,398 INFO ExecuteWithTimeoutCoProcessFunction - [aaa]的定时器[2020-11-12 06:23:47]被触发了 18:23:47,399 INFO ExecuteWithTimeoutCoProcessFunction - 只有1号流收到过[aaa],值为[1] source1 side, key [aaa], value [1]
|
|
来自: python_lover > 《待分类》