Spark常用算子】的更多相关文章

算子分为value-transform, key-value-transform, action三种.f是输入给算子的函数,比如lambda x: x**2 常用算子: keys: 取pair rdd的key部分 values: 取pair rdd的value部分 map: f作用于每个元素 flatMap: f作用于每个元素.输出list,然后对list压平 mapValues: f作用于pair rdd的value部分 flatMapValues: f作用于pair rdd的value部分,…
package com.test; import java.util.ArrayList; import java.util.List; import java.util.Map; import org.apache.spark.Partitioner; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD;…
package com.test; import java.util.ArrayList; import java.util.Arrays; import java.util.Iterator; import java.util.List; import java.util.Map; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.ap…
import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.VoidFunction;import java.util.Arrays;import java.util.List; /** * union 算子: * 取两个RD…
import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext; import java.util.Arrays;import java.util.List; /** * sampleTake 算子: * 先 sample 再 take * 第一个参数:是否可以重复 * 第二个参数:返回take(n) * 第三个…
import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaPairRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function2;import org.apache.spark.api.java.function.VoidFunction;import scala.T…
import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.VoidFunction; import java.util.Arrays;import java.util.List; /** *sampleoperator(wi…
import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function; import java.util.Arrays;import java.util.List; /** * saveastextfile 算子: *…
import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function2;import org.apache.spark.api.java.function.VoidFunction; import java.util.…
import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.api.java.function.Function2; import java.util.Arrays;import java.util.List; /** * reduce(fun) 算子: * 每…