HiBench成长笔记——(5) HiBench-Spark-SQL-Scan源码分析
run.sh
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/sql/scan.conf
. "${root_dir}/bin/functions/load_bench_config.sh"
enter_bench ScalaSparkScan ${workload_config} ${current_dir}
show_bannar start
# prepare SQL
HIVEBENCH_SQL_FILE=${WORKLOAD_RESULT_FOLDER}/rankings_uservisits_scan.hive
prepare_sql_scan ${HIVEBENCH_SQL_FILE}
START_TIME=`timestamp`
rmr_hdfs $OUTPUT_HDFS
run_spark_job com.intel.hibench.sparkbench.sql.ScalaSparkSQLBench ScalaScan ${HIVEBENCH_SQL_FILE}
END_TIME=`timestamp`
SIZE=`dir_size $OUTPUT_HDFS`
gen_report ${START_TIME} ${END_TIME} ${SIZE:-}
show_bannar finish
leave_bench
workload_functions.sh
function run_spark_job() {
LIB_JARS=
while (($#)); do
if [ "$1" = "--jars" ]; then
LIB_JARS="--jars $2"
continue
fi
break
done
CLS=$
shift
export_withlog SPARKBENCH_PROPERTIES_FILES
YARN_OPTS=""
if [[ "$SPARK_MASTER" == yarn-* ]]; then
export_withlog HADOOP_CONF_DIR
YARN_OPTS="--num-executors ${YARN_NUM_EXECUTORS}"
if [[ -n "${YARN_EXECUTOR_CORES:-}" ]]; then
YARN_OPTS="${YARN_OPTS} --executor-cores ${YARN_EXECUTOR_CORES}"
fi
if [[ -n "${SPARK_YARN_EXECUTOR_MEMORY:-}" ]]; then
YARN_OPTS="${YARN_OPTS} --executor-memory ${SPARK_YARN_EXECUTOR_MEMORY}"
fi
if [[ -n "${SPAKR_YARN_DRIVER_MEMORY:-}" ]]; then
YARN_OPTS="${YARN_OPTS} --driver-memory ${SPARK_YARN_DRIVER_MEMORY}"
fi
fi
if [[ "$CLS" == *.py ]]; then
LIB_JARS="$LIB_JARS --jars ${SPARKBENCH_JAR}"
SUBMIT_CMD="${SPARK_HOME}/bin/spark-submit ${LIB_JARS} --properties-file ${SPARK_PROP_CONF} --master ${SPARK_MASTER} ${YARN_OPTS} ${CLS} $@"
else
SUBMIT_CMD="${SPARK_HOME}/bin/spark-submit ${LIB_JARS} --properties-file ${SPARK_PROP_CONF} --class ${CLS} --master ${SPARK_MASTER} ${YARN_OPTS} ${SPARKBENCH_JAR} $@"
fi
echo -e "${BGreen}Submit Spark job: ${Green}${SUBMIT_CMD}${Color_Off}"
MONITOR_PID=`start_monitor`
execute_withlog ${SUBMIT_CMD}
result=$?
stop_monitor ${MONITOR_PID}
]
then
echo -e "${BRed}ERROR${Color_Off}: Spark job ${BYellow}${CLS}${Color_Off} failed to run successfully."
echo -e "${BBlue}Hint${Color_Off}: You can goto ${BYellow}${WORKLOAD_RESULT_FOLDER}/bench.log${Color_Off} to check for detailed log.\nOpening log tail for you:\n"
tail ${WORKLOAD_RESULT_FOLDER}/bench.log
exit $result
fi
}
ScalaSparkSQLBench.scala
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intel.hibench.sparkbench.sql
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.hive.HiveContext
/*
* ported from HiBench's hive bench
*/
object ScalaSparkSQLBench{
def main(args: Array[String]){
if (args.length < 2){
System.err.println(
s"Usage: $ScalaSparkSQLBench <workload name> <SQL sciprt file>"
)
System.exit(1)
}
val workload_name = args(0)
val sql_file = args(1)
val sparkConf = new SparkConf().setAppName(workload_name)
val sc = new SparkContext(sparkConf)
val hc = new HiveContext(sc)
val _sql = scala.io.Source.fromFile(sql_file).mkString
_sql.split(';').foreach { x =>
if (x.trim.nonEmpty)
hc.sql(x)
}
sc.stop()
}
}
HiveData.java
package HiBench;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleInputs;
import org.apache.hadoop.mapred.lib.NLineInputFormat;
public class HiveData {
private static final Log log = LogFactory.getLog(HiveData.class.getName());
private static final String RANKINGS = "rankings";
private static final String USERVISITS = "uservisits";
public static final String uagentf = "user_agents";
public static final String countryf = "country_codes";
public static final String searchkeyf = "search_keys";
private DataOptions options;
private long visits;
// client side delim
private String cdelim = ",";
private int chashsize = 150 * 1024 * 1024;
private Dummy dummy;
HiveData(DataOptions options) {
this.options = options;
parseArgs(options.getRemainArgs());
}
private void parseArgs(String[] args) {
for (int i=0; i<args.length; i++) {
if ("-v".equals(args[i])) {
visits = Long.parseLong(args[++i]);
} else if ("-d".equals(args[i])) {
cdelim = args[++i];
} else {
DataOptions.printUsage("Unknown hive data arguments -- " + args[i] + "!!!");
}
}
if (chashsize > options.getNumPages()) {
chashsize = (int) options.getNumPages();
}
}
private void setRankingsOptions(JobConf job) throws URISyntaxException {
job.setLong("pages", options.getNumPages());
job.setLong("slotpages", options.getNumSlotPages());
job.set("delimiter", cdelim);
job.setInt("hashsize", chashsize);
Utils.shareLinkZipfCore(options, job);
}
private void setVisitsOptions(JobConf job) {
job.setInt("slots", options.getNumMaps());
job.setLong("pages", options.getNumPages());
job.setLong("visits", visits);
job.set("delimiter", cdelim);
}
public static class DummyToRankingsMapper extends MapReduceBase implements
Mapper<LongWritable, Text, LongWritable, JoinBytesInt> {
private static final Log log = LogFactory.getLog(DummyToRankingsMapper.class.getName());
private HtmlCore generator;
private long pages, slotpages;
private boolean outset;
private OutputCollector<LongWritable, JoinBytesInt> myout;
private JoinBytesInt uitem, ritem;
private short[] hash;
private HashMap<Integer, Integer> hm;
private int hashsize;
private void getOptions(JobConf job) {
pages = job.getLong("pages", 0);
slotpages = job.getLong("slotpages", 0);
hashsize = job.getInt("hashsize", 0);
}
public void configure(JobConf job) {
getOptions(job);
try {
generator = new HtmlCore(job);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
outset = false;
myout = null;
uitem = new JoinBytesInt();
uitem.url = new byte[HtmlCore.getMaxUrlLength()];
ritem = new JoinBytesInt();
ritem.refs = 1;
hash = new short[hashsize];
hm = new HashMap<Integer, Integer>();
}
public void map(LongWritable key, Text value, OutputCollector<LongWritable, JoinBytesInt> output,
Reporter reporter) throws IOException {
if (!outset) {
myout = output;
outset = true;
}
int slotId = Integer.parseInt(value.toString().trim());
generator.fireRandom(slotId);
long[] range = HtmlCore.getPageRange(slotId, pages, slotpages);
/**
* For output collect
*/
for (long i=range[0]; i<range[1]; i++) {
key.set(i);
generator.nextUrlJoinBytesInt(uitem);
output.collect(key, uitem);
long[] linkids = generator.genPureLinkIds();
for (int j=0; j<linkids.length; j++) {
long uid = linkids[j];
if (uid < hashsize) {
int iid = (int) uid;
if (hash[iid]>=0) {
if (hash[iid]==HtmlCore.MAX_SHORT) {
hm.put(iid, (int) (hash[iid]) + 1);
hash[iid] = -1;
} else {
hash[iid]++;
}
} else {
hm.put(iid, hm.get(iid) + 1);
}
} else {
key.set(uid);
output.collect(key, ritem);
}
}
if (0==(i % 10000)) {
log.info("still running: " + (i - range[0]) + " of " + slotpages);
}
}
}
@Override
public void close ()
{
try {
LongWritable k = new LongWritable();
for (int i=0; i<hash.length; i++) {
if (hash[i] > 0) {
k.set(i);
ritem.refs = hash[i];
myout.collect(k, ritem);
} else if (hash[i] < 0) {
k.set(i);
ritem.refs = hm.get(i);
myout.collect(k, ritem);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public static class JoinBytesIntCombiner extends MapReduceBase implements
Reducer<LongWritable, JoinBytesInt, LongWritable, JoinBytesInt> {
// Log log = null;
JoinBytesInt item;
@Override
public void configure (JobConf job)
{
item = new JoinBytesInt();
// log = LogFactory.getLog(JoinBytesIntCombiner.class.getName());
}
@Override
public void reduce(LongWritable key, Iterator<JoinBytesInt> values,
OutputCollector<LongWritable, JoinBytesInt> output, Reporter reporter) throws IOException {
item.clear();
// StringBuffer sb = new StringBuffer("Combine: " + v.toString());
while (values.hasNext()) {
item.add(values.next());
// sb.append("-> " + v.toString());
}
output.collect(key, item);
// log.info(sb);
}
}
public static class GenerateRankingsReducer extends MapReduceBase implements
Reducer<LongWritable, JoinBytesInt, LongWritable, Text> {
private static final Log log = LogFactory.getLog(GenerateRankingsReducer.class.getName());
private Random rand;
private int errors, missed;
private JoinBytesInt v;
private int pid;
// job side delimiter
private String delim;
// private String missedids;
public void configure (JobConf job)
{
delim = job.get("delimiter");
pid = job.getInt("mapred.task.partition", 0);
rand = new Random(pid + 1);
v = new JoinBytesInt();
errors = 0;
missed = 0;
// missedids = "";
}
public void close ()
{
log.info("pid: " + pid + ", " + errors + " erros, " + missed + " missed");
}
@Override
public void reduce(LongWritable key, Iterator<JoinBytesInt> values,
OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException {
v.clear();
while (values.hasNext()) {
v.add(values.next());
}
if (0!=v.ulen) {
if (v.refs > 0) {
Text value = new Text(
new String(v.url) +
delim +
v.refs +
delim +
(rand.nextInt(99) + 1)
);
output.collect(
key, value);
reporter.incrCounter(HiBench.Counters.BYTES_DATA_GENERATED, 8+value.getLength());
} else {
missed++;
}
} else {
errors++;
}
}
}
private void createRankingsTableDirectly() throws IOException, URISyntaxException {
log.info("Creating table rankings...");
Path fout = new Path(options.getResultPath(), RANKINGS);
JobConf job = new JobConf(HiveData.class);
String jobname = "Create rankings";
/** TODO: change another more effective way as this operation may cause
* about 2 min delay (originally ~15min in total)
*/
setRankingsOptions(job);
job.setJobName(jobname);
job.set("mapred.reduce.slowstart.completed.maps", "0.3");
job.set("mapreduce.job.reduce.slowstart.completedmaps", "0.3");
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(JoinBytesInt.class);
job.setJarByClass(DummyToRankingsMapper.class);
job.setJarByClass(JoinBytesIntCombiner.class);
job.setJarByClass(GenerateRankingsReducer.class);
job.setMapperClass(DummyToRankingsMapper.class);
job.setCombinerClass(JoinBytesIntCombiner.class);
job.setReducerClass(GenerateRankingsReducer.class);
if (options.getNumReds() > 0) {
job.setNumReduceTasks(options.getNumReds());
} else {
job.setNumReduceTasks(Utils.getMaxNumReds());
}
job.setInputFormat(NLineInputFormat.class);
FileInputFormat.setInputPaths(job, dummy.getPath());
job.set("mapred.map.output.compression.type", "BLOCK");
job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");
MapFileOutputFormat.setCompressOutput(job, true);
// MapFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.LzoCodec.class);
MapFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.DefaultCodec.class);
if (options.isSequenceOut()) {
job.setOutputFormat(SequenceFileOutputFormat.class);
} else {
job.setOutputFormat(TextOutputFormat.class);
}
if (null != options.getCodecClass()) {
job.set("mapred.output.compression.type","BLOCK");
job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
}
FileOutputFormat.setOutputPath(job, fout);
log.info("Running Job: " +jobname);
log.info("Pages file " + dummy.getPath() + " as input");
log.info("Rankings file " + fout + " as output");
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);
}
/***
* Mapper to randomly create user visits. In map step, only the target
* urls of user visits are created, the rest content of visits will be
* created in reduce step
* @author lyi2
*
*/
public static class DummyToAccessNoMapper extends MapReduceBase implements
Mapper<LongWritable, Text, LongWritable, JoinBytesInt> {
private JoinBytesInt vitem;
private long pages;
private long slots;
private long visits;
// job side delimiter
private String delim;
private Visit visit;
public void configure (JobConf job)
{
try {
pages = job.getLong("pages", 0);
slots = job.getLong("slots", 0);
visits = job.getLong("visits", 0);
delim = job.get("delimiter");
visit = new Visit(DistributedCache.getLocalCacheFiles(job),
delim, pages);
vitem = new JoinBytesInt();
vitem.refs = 1;
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<LongWritable, JoinBytesInt> output, Reporter reporter)
throws IOException {
int slotId = Integer.parseInt(value.toString().trim());
visit.fireRandom(slotId);
for (long i=slotId; i<=visits;) {
// simply setting url id is fine in map step
key.set(visit.nextUrlId());
output.collect(key, vitem);
i = i + slots;
}
}
}
public static class SequenceRankingsToUrlsMapper extends MapReduceBase implements
Mapper<LongWritable, Text, LongWritable, JoinBytesInt> {
public JoinBytesInt uitem;
public void configure(JobConf job) {
uitem = new JoinBytesInt();
// getBasicOptions(job);
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<LongWritable, JoinBytesInt> output, Reporter reporter) throws IOException {
uitem.url= value.toString().split(",")[0].getBytes();
uitem.ulen = (byte) uitem.url.length;
output.collect(key, uitem);
}
}
public static class TextRankingsToUrlsMapper extends MapReduceBase implements
Mapper<LongWritable, Text, LongWritable, JoinBytesInt> {
public JoinBytesInt uitem;
public void configure(JobConf job) {
uitem = new JoinBytesInt();
// getBasicOptions(job);
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<LongWritable, JoinBytesInt> output, Reporter reporter) throws IOException {
String[] items = value.toString().split("[,\t]");
key.set(Long.parseLong(items[0]));
uitem.url= items[1].getBytes();
uitem.ulen = (byte) uitem.url.length;
output.collect(key, uitem);
}
}
public static class CreateUserVisitsReducer extends MapReduceBase implements
Reducer<LongWritable, JoinBytesInt, LongWritable, Text> {
private static final Log log = LogFactory.getLog(CreateUserVisitsReducer.class.getName());
private long pages;
private Visit visit;
private int errors, missed;
private JoinBytesInt vitem;
// job side delimiter
private String delim;
private int pid;
public void configure (JobConf job)
{
try {
pages = job.getLong("pages", 0);
delim = job.get("delimiter");
pid = job.getInt("mapred.task.partition", 0);
visit = new Visit(DistributedCache.getLocalCacheFiles(job),
delim, pages);
visit.fireRandom(pid + 1);
vitem = new JoinBytesInt();
errors = 0;
missed = 0;
} catch (IOException e) {
e.printStackTrace();
}
}
public void close ()
{
log.info("pid: " + pid + ", " + errors + " erros, " + missed + " missed");
}
/**
* Reduce: to sum up the record sizes (of slots) one by one so that to determine the
* corresponding start point to hold the records for each slot.
*/
@Override
public void reduce(LongWritable key, Iterator<JoinBytesInt> values,
OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException {
vitem.clear();
// StringBuffer sb = new StringBuffer("Reduce: " + v.toString());
while (values.hasNext()) {
vitem.add(values.next());
// sb.append("-> " + v.toString());
}
// log.info(sb);
if (0!=vitem.ulen) {
if (vitem.refs > 0) {
for (int i=0; i<vitem.refs; i++) {
Text value = new Text(visit.nextAccess(new String(vitem.url)));
output.collect(key, value);
reporter.incrCounter(HiBench.Counters.BYTES_DATA_GENERATED, 8+value.getLength());
}
} else {
missed++;
}
} else {
errors++;
}
}
}
private void createUserVisitsTableDirectly() throws IOException, URISyntaxException {
log.info("Creating user visits...");
Path rankings = new Path(options.getResultPath(), RANKINGS);
Path fout = new Path(options.getResultPath(), USERVISITS);
JobConf job = new JobConf(HiveData.class);
String jobname = "Create uservisits";
job.setJobName(jobname);
setVisitsOptions(job);
/***
* Set distributed cache file for table generation,
* cache files include:
* 1. user agents
* 2. country code and language code
* 3. search keys
*/
Path uagentPath = new Path(options.getWorkPath(), uagentf);
DistributedCache.addCacheFile(uagentPath.toUri(), job);
Path countryPath = new Path(options.getWorkPath(), countryf);
DistributedCache.addCacheFile(countryPath.toUri(), job);
Path searchkeyPath = new Path(options.getWorkPath(), searchkeyf);
DistributedCache.addCacheFile(searchkeyPath.toUri(), job);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(JoinBytesInt.class);
MultipleInputs.addInputPath(job, dummy.getPath(),
NLineInputFormat.class, DummyToAccessNoMapper.class);
if (options.isSequenceOut()) {
MultipleInputs.addInputPath(job, rankings,
SequenceFileInputFormat.class, SequenceRankingsToUrlsMapper.class);
} else {
MultipleInputs.addInputPath(job, rankings,
TextInputFormat.class, TextRankingsToUrlsMapper.class);
}
job.setCombinerClass(JoinBytesIntCombiner.class);
job.setReducerClass(CreateUserVisitsReducer.class);
if (options.getNumReds() > 0) {
job.setNumReduceTasks(options.getNumReds());
} else {
job.setNumReduceTasks(Utils.getMaxNumReds());
}
// job.setNumReduceTasks(options.slots/2);
if (options.isSequenceOut()) {
job.setOutputFormat(SequenceFileOutputFormat.class);
} else {
job.setOutputFormat(TextOutputFormat.class);
}
if (null != options.getCodecClass()) {
job.set("mapred.output.compression.type","BLOCK");
job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
}
FileOutputFormat.setOutputPath(job, fout);
log.info("Running Job: " +jobname);
log.info("Dummy file " + dummy.getPath() + " as input");
log.info("Rankings file " + rankings + " as input");
log.info("Ouput file " + fout);
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);
}
public void generate() throws Exception {
log.info("Generating hive data files...");
init();
createRankingsTableDirectly();
createUserVisitsTableDirectly();
close();
}
public void loadFiles() throws IOException {
RawData.createSearchKeys(new Path(options.getWorkPath(), searchkeyf));
RawData.createUserAgents(new Path(options.getWorkPath(), uagentf));
RawData.createCCodes(new Path(options.getWorkPath(), countryf));
}
private void init() throws IOException {
log.info("Initializing hive date generator...");
Utils.checkHdfsPath(options.getResultPath(), true);
Utils.checkHdfsPath(options.getWorkPath(), true);
loadFiles();
Utils.serialLinkZipf(options);
dummy = new Dummy(options.getWorkPath(), options.getNumMaps());
}
public void close() throws IOException {
log.info("Closing hive data generator...");
Utils.checkHdfsPath(options.getWorkPath());
}
}
HiBench成长笔记——(5) HiBench-Spark-SQL-Scan源码分析的更多相关文章
- 第八篇:Spark SQL Catalyst源码分析之UDF
/** Spark SQL源码分析系列文章*/ 在SQL的世界里,除了官方提供的常用的处理函数之外,一般都会提供可扩展的对外自定义函数接口,这已经成为一种事实的标准. 在前面Spark SQL源码分析 ...
- 第五篇:Spark SQL Catalyst源码分析之Optimizer
/** Spark SQL源码分析系列文章*/ 前几篇文章介绍了Spark SQL的Catalyst的核心运行流程.SqlParser,和Analyzer 以及核心类库TreeNode,本文将详细讲解 ...
- 第六篇:Spark SQL Catalyst源码分析之Physical Plan
/** Spark SQL源码分析系列文章*/ 前面几篇文章主要介绍的是spark sql包里的的spark sql执行流程,以及Catalyst包内的SqlParser,Analyzer和Optim ...
- 第四篇:Spark SQL Catalyst源码分析之TreeNode Library
/** Spark SQL源码分析系列文章*/ 前几篇文章介绍了Spark SQL的Catalyst的核心运行流程.SqlParser,和Analyzer,本来打算直接写Optimizer的,但是发现 ...
- 第三篇:Spark SQL Catalyst源码分析之Analyzer
/** Spark SQL源码分析系列文章*/ 前面几篇文章讲解了Spark SQL的核心执行流程和Spark SQL的Catalyst框架的Sql Parser是怎样接受用户输入sql,经过解析生成 ...
- 第二篇:Spark SQL Catalyst源码分析之SqlParser
/** Spark SQL源码分析系列文章*/ Spark SQL的核心执行流程我们已经分析完毕,可以参见Spark SQL核心执行流程,下面我们来分析执行流程中各个核心组件的工作职责. 本文先从入口 ...
- 【原】Spark中Client源码分析(二)
继续前一篇的内容.前一篇内容为: Spark中Client源码分析(一)http://www.cnblogs.com/yourarebest/p/5313006.html DriverClient中的 ...
- 【原】Spark中Master源码分析(二)
继续上一篇的内容.上一篇的内容为: Spark中Master源码分析(一) http://www.cnblogs.com/yourarebest/p/5312965.html 4.receive方法, ...
- 【原】 Spark中Worker源码分析(二)
继续前一篇的内容.前一篇内容为: Spark中Worker源码分析(一)http://www.cnblogs.com/yourarebest/p/5300202.html 4.receive方法, r ...
- Spark Scheduler模块源码分析之TaskScheduler和SchedulerBackend
本文是Scheduler模块源码分析的第二篇,第一篇Spark Scheduler模块源码分析之DAGScheduler主要分析了DAGScheduler.本文接下来结合Spark-1.6.0的源码继 ...
随机推荐
- video兼容ie,ckplayer网页播放器
<!DOCTYPE html> <html> <head> <meta charset="UTF-8"> <title> ...
- 「JSOI2011」棒棒糖
「JSOI2011」棒棒糖 传送门 双倍经验 考虑主席树做法. 对于当前的主席树节点,如果 \(\le mid\) 的个数足够就往左边走,否则就尝试往右边走,都不行就返回 \(0\). 参考代码: # ...
- Python回收机制
1.小整数对象池 整数在程序中的使用非常广泛,python 为了优化速度,使用了小整数对象池,避免整数频繁申请和销毁和内存空间. Python 对小整数的定义事[-5, 257]这些整数对象的hi提前 ...
- LeetCode 21. Merge Two Sorted Lists(合并两个有序链表)
题意:合并两个有序链表 /** * Definition for singly-linked list. * struct ListNode { * int val; * ListNode *next ...
- vs2013 SQLite数据库增删改查
代码:Database2.7z Database.h: #pragma once #include<string> #include<vector> #include<W ...
- 【转】 android之如何在两个activity之间传递handler_利用broadcast广播机制
原文:http://blog.csdn.net/jason0539/article/details/18075293 这算是如何在两个activity之间传递handler的解决方案二了,解决方案一见 ...
- vue重置data数据
可以通过this.$data获取当前状态下的data,通过this.$options.data()获取该组件初始状态下的data. 然后只要使用Object.assign(this.$data, th ...
- [经验] SpringBoot 远程连接 Linux 上的 Redis
开发环境: ---------- springboot 2.X ---------- Linux Ubuntu 18.0.04 关于怎么在 Ubuntu 上安装 Linux , 网上的教程一大堆, 这 ...
- 31 反射方式给类的属性赋值 和 对象赋值(clone)
1.配置类 package com.da.tool.util.configuration.reflect; /** */ public class JobInfo { private String j ...
- Tomcat 8 Invalid character found in the request target. The valid characters are defined in RFC 3986
终极解决方案: Invalid character found in the request target. The valid characters are defined in RFC 3986 ...