HiBench成长笔记——(5) HiBench-Spark-SQL-Scan源码分析
run.sh
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
current_dir=`dirname "$0"`
current_dir=`cd "$current_dir"; pwd`
root_dir=${current_dir}/../../../../../
workload_config=${root_dir}/conf/workloads/sql/scan.conf
. "${root_dir}/bin/functions/load_bench_config.sh"
enter_bench ScalaSparkScan ${workload_config} ${current_dir}
show_bannar start
# prepare SQL
HIVEBENCH_SQL_FILE=${WORKLOAD_RESULT_FOLDER}/rankings_uservisits_scan.hive
prepare_sql_scan ${HIVEBENCH_SQL_FILE}
START_TIME=`timestamp`
rmr_hdfs $OUTPUT_HDFS
run_spark_job com.intel.hibench.sparkbench.sql.ScalaSparkSQLBench ScalaScan ${HIVEBENCH_SQL_FILE}
END_TIME=`timestamp`
SIZE=`dir_size $OUTPUT_HDFS`
gen_report ${START_TIME} ${END_TIME} ${SIZE:-}
show_bannar finish
leave_bench
workload_functions.sh
function run_spark_job() {
LIB_JARS=
while (($#)); do
if [ "$1" = "--jars" ]; then
LIB_JARS="--jars $2"
continue
fi
break
done
CLS=$
shift
export_withlog SPARKBENCH_PROPERTIES_FILES
YARN_OPTS=""
if [[ "$SPARK_MASTER" == yarn-* ]]; then
export_withlog HADOOP_CONF_DIR
YARN_OPTS="--num-executors ${YARN_NUM_EXECUTORS}"
if [[ -n "${YARN_EXECUTOR_CORES:-}" ]]; then
YARN_OPTS="${YARN_OPTS} --executor-cores ${YARN_EXECUTOR_CORES}"
fi
if [[ -n "${SPARK_YARN_EXECUTOR_MEMORY:-}" ]]; then
YARN_OPTS="${YARN_OPTS} --executor-memory ${SPARK_YARN_EXECUTOR_MEMORY}"
fi
if [[ -n "${SPAKR_YARN_DRIVER_MEMORY:-}" ]]; then
YARN_OPTS="${YARN_OPTS} --driver-memory ${SPARK_YARN_DRIVER_MEMORY}"
fi
fi
if [[ "$CLS" == *.py ]]; then
LIB_JARS="$LIB_JARS --jars ${SPARKBENCH_JAR}"
SUBMIT_CMD="${SPARK_HOME}/bin/spark-submit ${LIB_JARS} --properties-file ${SPARK_PROP_CONF} --master ${SPARK_MASTER} ${YARN_OPTS} ${CLS} $@"
else
SUBMIT_CMD="${SPARK_HOME}/bin/spark-submit ${LIB_JARS} --properties-file ${SPARK_PROP_CONF} --class ${CLS} --master ${SPARK_MASTER} ${YARN_OPTS} ${SPARKBENCH_JAR} $@"
fi
echo -e "${BGreen}Submit Spark job: ${Green}${SUBMIT_CMD}${Color_Off}"
MONITOR_PID=`start_monitor`
execute_withlog ${SUBMIT_CMD}
result=$?
stop_monitor ${MONITOR_PID}
]
then
echo -e "${BRed}ERROR${Color_Off}: Spark job ${BYellow}${CLS}${Color_Off} failed to run successfully."
echo -e "${BBlue}Hint${Color_Off}: You can goto ${BYellow}${WORKLOAD_RESULT_FOLDER}/bench.log${Color_Off} to check for detailed log.\nOpening log tail for you:\n"
tail ${WORKLOAD_RESULT_FOLDER}/bench.log
exit $result
fi
}
ScalaSparkSQLBench.scala
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.intel.hibench.sparkbench.sql
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.sql.hive.HiveContext
/*
* ported from HiBench's hive bench
*/
object ScalaSparkSQLBench{
def main(args: Array[String]){
if (args.length < 2){
System.err.println(
s"Usage: $ScalaSparkSQLBench <workload name> <SQL sciprt file>"
)
System.exit(1)
}
val workload_name = args(0)
val sql_file = args(1)
val sparkConf = new SparkConf().setAppName(workload_name)
val sc = new SparkContext(sparkConf)
val hc = new HiveContext(sc)
val _sql = scala.io.Source.fromFile(sql_file).mkString
_sql.split(';').foreach { x =>
if (x.trim.nonEmpty)
hc.sql(x)
}
sc.stop()
}
}
HiveData.java
package HiBench;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.filecache.DistributedCache;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobClient;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.MapFileOutputFormat;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hadoop.mapred.SequenceFileOutputFormat;
import org.apache.hadoop.mapred.TextInputFormat;
import org.apache.hadoop.mapred.TextOutputFormat;
import org.apache.hadoop.mapred.lib.MultipleInputs;
import org.apache.hadoop.mapred.lib.NLineInputFormat;
public class HiveData {
private static final Log log = LogFactory.getLog(HiveData.class.getName());
private static final String RANKINGS = "rankings";
private static final String USERVISITS = "uservisits";
public static final String uagentf = "user_agents";
public static final String countryf = "country_codes";
public static final String searchkeyf = "search_keys";
private DataOptions options;
private long visits;
// client side delim
private String cdelim = ",";
private int chashsize = 150 * 1024 * 1024;
private Dummy dummy;
HiveData(DataOptions options) {
this.options = options;
parseArgs(options.getRemainArgs());
}
private void parseArgs(String[] args) {
for (int i=0; i<args.length; i++) {
if ("-v".equals(args[i])) {
visits = Long.parseLong(args[++i]);
} else if ("-d".equals(args[i])) {
cdelim = args[++i];
} else {
DataOptions.printUsage("Unknown hive data arguments -- " + args[i] + "!!!");
}
}
if (chashsize > options.getNumPages()) {
chashsize = (int) options.getNumPages();
}
}
private void setRankingsOptions(JobConf job) throws URISyntaxException {
job.setLong("pages", options.getNumPages());
job.setLong("slotpages", options.getNumSlotPages());
job.set("delimiter", cdelim);
job.setInt("hashsize", chashsize);
Utils.shareLinkZipfCore(options, job);
}
private void setVisitsOptions(JobConf job) {
job.setInt("slots", options.getNumMaps());
job.setLong("pages", options.getNumPages());
job.setLong("visits", visits);
job.set("delimiter", cdelim);
}
public static class DummyToRankingsMapper extends MapReduceBase implements
Mapper<LongWritable, Text, LongWritable, JoinBytesInt> {
private static final Log log = LogFactory.getLog(DummyToRankingsMapper.class.getName());
private HtmlCore generator;
private long pages, slotpages;
private boolean outset;
private OutputCollector<LongWritable, JoinBytesInt> myout;
private JoinBytesInt uitem, ritem;
private short[] hash;
private HashMap<Integer, Integer> hm;
private int hashsize;
private void getOptions(JobConf job) {
pages = job.getLong("pages", 0);
slotpages = job.getLong("slotpages", 0);
hashsize = job.getInt("hashsize", 0);
}
public void configure(JobConf job) {
getOptions(job);
try {
generator = new HtmlCore(job);
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
outset = false;
myout = null;
uitem = new JoinBytesInt();
uitem.url = new byte[HtmlCore.getMaxUrlLength()];
ritem = new JoinBytesInt();
ritem.refs = 1;
hash = new short[hashsize];
hm = new HashMap<Integer, Integer>();
}
public void map(LongWritable key, Text value, OutputCollector<LongWritable, JoinBytesInt> output,
Reporter reporter) throws IOException {
if (!outset) {
myout = output;
outset = true;
}
int slotId = Integer.parseInt(value.toString().trim());
generator.fireRandom(slotId);
long[] range = HtmlCore.getPageRange(slotId, pages, slotpages);
/**
* For output collect
*/
for (long i=range[0]; i<range[1]; i++) {
key.set(i);
generator.nextUrlJoinBytesInt(uitem);
output.collect(key, uitem);
long[] linkids = generator.genPureLinkIds();
for (int j=0; j<linkids.length; j++) {
long uid = linkids[j];
if (uid < hashsize) {
int iid = (int) uid;
if (hash[iid]>=0) {
if (hash[iid]==HtmlCore.MAX_SHORT) {
hm.put(iid, (int) (hash[iid]) + 1);
hash[iid] = -1;
} else {
hash[iid]++;
}
} else {
hm.put(iid, hm.get(iid) + 1);
}
} else {
key.set(uid);
output.collect(key, ritem);
}
}
if (0==(i % 10000)) {
log.info("still running: " + (i - range[0]) + " of " + slotpages);
}
}
}
@Override
public void close ()
{
try {
LongWritable k = new LongWritable();
for (int i=0; i<hash.length; i++) {
if (hash[i] > 0) {
k.set(i);
ritem.refs = hash[i];
myout.collect(k, ritem);
} else if (hash[i] < 0) {
k.set(i);
ritem.refs = hm.get(i);
myout.collect(k, ritem);
}
}
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
public static class JoinBytesIntCombiner extends MapReduceBase implements
Reducer<LongWritable, JoinBytesInt, LongWritable, JoinBytesInt> {
// Log log = null;
JoinBytesInt item;
@Override
public void configure (JobConf job)
{
item = new JoinBytesInt();
// log = LogFactory.getLog(JoinBytesIntCombiner.class.getName());
}
@Override
public void reduce(LongWritable key, Iterator<JoinBytesInt> values,
OutputCollector<LongWritable, JoinBytesInt> output, Reporter reporter) throws IOException {
item.clear();
// StringBuffer sb = new StringBuffer("Combine: " + v.toString());
while (values.hasNext()) {
item.add(values.next());
// sb.append("-> " + v.toString());
}
output.collect(key, item);
// log.info(sb);
}
}
public static class GenerateRankingsReducer extends MapReduceBase implements
Reducer<LongWritable, JoinBytesInt, LongWritable, Text> {
private static final Log log = LogFactory.getLog(GenerateRankingsReducer.class.getName());
private Random rand;
private int errors, missed;
private JoinBytesInt v;
private int pid;
// job side delimiter
private String delim;
// private String missedids;
public void configure (JobConf job)
{
delim = job.get("delimiter");
pid = job.getInt("mapred.task.partition", 0);
rand = new Random(pid + 1);
v = new JoinBytesInt();
errors = 0;
missed = 0;
// missedids = "";
}
public void close ()
{
log.info("pid: " + pid + ", " + errors + " erros, " + missed + " missed");
}
@Override
public void reduce(LongWritable key, Iterator<JoinBytesInt> values,
OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException {
v.clear();
while (values.hasNext()) {
v.add(values.next());
}
if (0!=v.ulen) {
if (v.refs > 0) {
Text value = new Text(
new String(v.url) +
delim +
v.refs +
delim +
(rand.nextInt(99) + 1)
);
output.collect(
key, value);
reporter.incrCounter(HiBench.Counters.BYTES_DATA_GENERATED, 8+value.getLength());
} else {
missed++;
}
} else {
errors++;
}
}
}
private void createRankingsTableDirectly() throws IOException, URISyntaxException {
log.info("Creating table rankings...");
Path fout = new Path(options.getResultPath(), RANKINGS);
JobConf job = new JobConf(HiveData.class);
String jobname = "Create rankings";
/** TODO: change another more effective way as this operation may cause
* about 2 min delay (originally ~15min in total)
*/
setRankingsOptions(job);
job.setJobName(jobname);
job.set("mapred.reduce.slowstart.completed.maps", "0.3");
job.set("mapreduce.job.reduce.slowstart.completedmaps", "0.3");
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(JoinBytesInt.class);
job.setJarByClass(DummyToRankingsMapper.class);
job.setJarByClass(JoinBytesIntCombiner.class);
job.setJarByClass(GenerateRankingsReducer.class);
job.setMapperClass(DummyToRankingsMapper.class);
job.setCombinerClass(JoinBytesIntCombiner.class);
job.setReducerClass(GenerateRankingsReducer.class);
if (options.getNumReds() > 0) {
job.setNumReduceTasks(options.getNumReds());
} else {
job.setNumReduceTasks(Utils.getMaxNumReds());
}
job.setInputFormat(NLineInputFormat.class);
FileInputFormat.setInputPaths(job, dummy.getPath());
job.set("mapred.map.output.compression.type", "BLOCK");
job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");
MapFileOutputFormat.setCompressOutput(job, true);
// MapFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.LzoCodec.class);
MapFileOutputFormat.setOutputCompressorClass(job, org.apache.hadoop.io.compress.DefaultCodec.class);
if (options.isSequenceOut()) {
job.setOutputFormat(SequenceFileOutputFormat.class);
} else {
job.setOutputFormat(TextOutputFormat.class);
}
if (null != options.getCodecClass()) {
job.set("mapred.output.compression.type","BLOCK");
job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
}
FileOutputFormat.setOutputPath(job, fout);
log.info("Running Job: " +jobname);
log.info("Pages file " + dummy.getPath() + " as input");
log.info("Rankings file " + fout + " as output");
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);
}
/***
* Mapper to randomly create user visits. In map step, only the target
* urls of user visits are created, the rest content of visits will be
* created in reduce step
* @author lyi2
*
*/
public static class DummyToAccessNoMapper extends MapReduceBase implements
Mapper<LongWritable, Text, LongWritable, JoinBytesInt> {
private JoinBytesInt vitem;
private long pages;
private long slots;
private long visits;
// job side delimiter
private String delim;
private Visit visit;
public void configure (JobConf job)
{
try {
pages = job.getLong("pages", 0);
slots = job.getLong("slots", 0);
visits = job.getLong("visits", 0);
delim = job.get("delimiter");
visit = new Visit(DistributedCache.getLocalCacheFiles(job),
delim, pages);
vitem = new JoinBytesInt();
vitem.refs = 1;
} catch (IOException e) {
e.printStackTrace();
}
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<LongWritable, JoinBytesInt> output, Reporter reporter)
throws IOException {
int slotId = Integer.parseInt(value.toString().trim());
visit.fireRandom(slotId);
for (long i=slotId; i<=visits;) {
// simply setting url id is fine in map step
key.set(visit.nextUrlId());
output.collect(key, vitem);
i = i + slots;
}
}
}
public static class SequenceRankingsToUrlsMapper extends MapReduceBase implements
Mapper<LongWritable, Text, LongWritable, JoinBytesInt> {
public JoinBytesInt uitem;
public void configure(JobConf job) {
uitem = new JoinBytesInt();
// getBasicOptions(job);
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<LongWritable, JoinBytesInt> output, Reporter reporter) throws IOException {
uitem.url= value.toString().split(",")[0].getBytes();
uitem.ulen = (byte) uitem.url.length;
output.collect(key, uitem);
}
}
public static class TextRankingsToUrlsMapper extends MapReduceBase implements
Mapper<LongWritable, Text, LongWritable, JoinBytesInt> {
public JoinBytesInt uitem;
public void configure(JobConf job) {
uitem = new JoinBytesInt();
// getBasicOptions(job);
}
@Override
public void map(LongWritable key, Text value,
OutputCollector<LongWritable, JoinBytesInt> output, Reporter reporter) throws IOException {
String[] items = value.toString().split("[,\t]");
key.set(Long.parseLong(items[0]));
uitem.url= items[1].getBytes();
uitem.ulen = (byte) uitem.url.length;
output.collect(key, uitem);
}
}
public static class CreateUserVisitsReducer extends MapReduceBase implements
Reducer<LongWritable, JoinBytesInt, LongWritable, Text> {
private static final Log log = LogFactory.getLog(CreateUserVisitsReducer.class.getName());
private long pages;
private Visit visit;
private int errors, missed;
private JoinBytesInt vitem;
// job side delimiter
private String delim;
private int pid;
public void configure (JobConf job)
{
try {
pages = job.getLong("pages", 0);
delim = job.get("delimiter");
pid = job.getInt("mapred.task.partition", 0);
visit = new Visit(DistributedCache.getLocalCacheFiles(job),
delim, pages);
visit.fireRandom(pid + 1);
vitem = new JoinBytesInt();
errors = 0;
missed = 0;
} catch (IOException e) {
e.printStackTrace();
}
}
public void close ()
{
log.info("pid: " + pid + ", " + errors + " erros, " + missed + " missed");
}
/**
* Reduce: to sum up the record sizes (of slots) one by one so that to determine the
* corresponding start point to hold the records for each slot.
*/
@Override
public void reduce(LongWritable key, Iterator<JoinBytesInt> values,
OutputCollector<LongWritable, Text> output, Reporter reporter) throws IOException {
vitem.clear();
// StringBuffer sb = new StringBuffer("Reduce: " + v.toString());
while (values.hasNext()) {
vitem.add(values.next());
// sb.append("-> " + v.toString());
}
// log.info(sb);
if (0!=vitem.ulen) {
if (vitem.refs > 0) {
for (int i=0; i<vitem.refs; i++) {
Text value = new Text(visit.nextAccess(new String(vitem.url)));
output.collect(key, value);
reporter.incrCounter(HiBench.Counters.BYTES_DATA_GENERATED, 8+value.getLength());
}
} else {
missed++;
}
} else {
errors++;
}
}
}
private void createUserVisitsTableDirectly() throws IOException, URISyntaxException {
log.info("Creating user visits...");
Path rankings = new Path(options.getResultPath(), RANKINGS);
Path fout = new Path(options.getResultPath(), USERVISITS);
JobConf job = new JobConf(HiveData.class);
String jobname = "Create uservisits";
job.setJobName(jobname);
setVisitsOptions(job);
/***
* Set distributed cache file for table generation,
* cache files include:
* 1. user agents
* 2. country code and language code
* 3. search keys
*/
Path uagentPath = new Path(options.getWorkPath(), uagentf);
DistributedCache.addCacheFile(uagentPath.toUri(), job);
Path countryPath = new Path(options.getWorkPath(), countryf);
DistributedCache.addCacheFile(countryPath.toUri(), job);
Path searchkeyPath = new Path(options.getWorkPath(), searchkeyf);
DistributedCache.addCacheFile(searchkeyPath.toUri(), job);
job.setOutputKeyClass(LongWritable.class);
job.setOutputValueClass(Text.class);
job.setMapOutputKeyClass(LongWritable.class);
job.setMapOutputValueClass(JoinBytesInt.class);
MultipleInputs.addInputPath(job, dummy.getPath(),
NLineInputFormat.class, DummyToAccessNoMapper.class);
if (options.isSequenceOut()) {
MultipleInputs.addInputPath(job, rankings,
SequenceFileInputFormat.class, SequenceRankingsToUrlsMapper.class);
} else {
MultipleInputs.addInputPath(job, rankings,
TextInputFormat.class, TextRankingsToUrlsMapper.class);
}
job.setCombinerClass(JoinBytesIntCombiner.class);
job.setReducerClass(CreateUserVisitsReducer.class);
if (options.getNumReds() > 0) {
job.setNumReduceTasks(options.getNumReds());
} else {
job.setNumReduceTasks(Utils.getMaxNumReds());
}
// job.setNumReduceTasks(options.slots/2);
if (options.isSequenceOut()) {
job.setOutputFormat(SequenceFileOutputFormat.class);
} else {
job.setOutputFormat(TextOutputFormat.class);
}
if (null != options.getCodecClass()) {
job.set("mapred.output.compression.type","BLOCK");
job.set("mapreduce.output.fileoutputformat.compress.type","BLOCK");
FileOutputFormat.setCompressOutput(job, true);
FileOutputFormat.setOutputCompressorClass(job, options.getCodecClass());
}
FileOutputFormat.setOutputPath(job, fout);
log.info("Running Job: " +jobname);
log.info("Dummy file " + dummy.getPath() + " as input");
log.info("Rankings file " + rankings + " as input");
log.info("Ouput file " + fout);
JobClient.runJob(job);
log.info("Finished Running Job: " + jobname);
}
public void generate() throws Exception {
log.info("Generating hive data files...");
init();
createRankingsTableDirectly();
createUserVisitsTableDirectly();
close();
}
public void loadFiles() throws IOException {
RawData.createSearchKeys(new Path(options.getWorkPath(), searchkeyf));
RawData.createUserAgents(new Path(options.getWorkPath(), uagentf));
RawData.createCCodes(new Path(options.getWorkPath(), countryf));
}
private void init() throws IOException {
log.info("Initializing hive date generator...");
Utils.checkHdfsPath(options.getResultPath(), true);
Utils.checkHdfsPath(options.getWorkPath(), true);
loadFiles();
Utils.serialLinkZipf(options);
dummy = new Dummy(options.getWorkPath(), options.getNumMaps());
}
public void close() throws IOException {
log.info("Closing hive data generator...");
Utils.checkHdfsPath(options.getWorkPath());
}
}
HiBench成长笔记——(5) HiBench-Spark-SQL-Scan源码分析的更多相关文章
- 第八篇:Spark SQL Catalyst源码分析之UDF
/** Spark SQL源码分析系列文章*/ 在SQL的世界里,除了官方提供的常用的处理函数之外,一般都会提供可扩展的对外自定义函数接口,这已经成为一种事实的标准. 在前面Spark SQL源码分析 ...
- 第五篇:Spark SQL Catalyst源码分析之Optimizer
/** Spark SQL源码分析系列文章*/ 前几篇文章介绍了Spark SQL的Catalyst的核心运行流程.SqlParser,和Analyzer 以及核心类库TreeNode,本文将详细讲解 ...
- 第六篇:Spark SQL Catalyst源码分析之Physical Plan
/** Spark SQL源码分析系列文章*/ 前面几篇文章主要介绍的是spark sql包里的的spark sql执行流程,以及Catalyst包内的SqlParser,Analyzer和Optim ...
- 第四篇:Spark SQL Catalyst源码分析之TreeNode Library
/** Spark SQL源码分析系列文章*/ 前几篇文章介绍了Spark SQL的Catalyst的核心运行流程.SqlParser,和Analyzer,本来打算直接写Optimizer的,但是发现 ...
- 第三篇:Spark SQL Catalyst源码分析之Analyzer
/** Spark SQL源码分析系列文章*/ 前面几篇文章讲解了Spark SQL的核心执行流程和Spark SQL的Catalyst框架的Sql Parser是怎样接受用户输入sql,经过解析生成 ...
- 第二篇:Spark SQL Catalyst源码分析之SqlParser
/** Spark SQL源码分析系列文章*/ Spark SQL的核心执行流程我们已经分析完毕,可以参见Spark SQL核心执行流程,下面我们来分析执行流程中各个核心组件的工作职责. 本文先从入口 ...
- 【原】Spark中Client源码分析(二)
继续前一篇的内容.前一篇内容为: Spark中Client源码分析(一)http://www.cnblogs.com/yourarebest/p/5313006.html DriverClient中的 ...
- 【原】Spark中Master源码分析(二)
继续上一篇的内容.上一篇的内容为: Spark中Master源码分析(一) http://www.cnblogs.com/yourarebest/p/5312965.html 4.receive方法, ...
- 【原】 Spark中Worker源码分析(二)
继续前一篇的内容.前一篇内容为: Spark中Worker源码分析(一)http://www.cnblogs.com/yourarebest/p/5300202.html 4.receive方法, r ...
- Spark Scheduler模块源码分析之TaskScheduler和SchedulerBackend
本文是Scheduler模块源码分析的第二篇,第一篇Spark Scheduler模块源码分析之DAGScheduler主要分析了DAGScheduler.本文接下来结合Spark-1.6.0的源码继 ...
随机推荐
- python第一章 python基础编程
第一次学习python 首先python对于我来说是我学习的第三门语言,之前大一学习过了c和c++这两门语言. 接触一个新语言,首先应该的是搭载一下编译的环境.我们是老师给我们上传了的python3安 ...
- 关于and 和or的执行优先级问题分析
题目:列出本店价低于60或者高于100.并且商品点击数大于628的商品. 按照下面两种写法,得到的结果是不同的. 第一种:结果数据中有点击数为628的记录,显然不符合题目要求. SELECTgoods ...
- postman 使用post方式提交参数值
参考:https://www.cnblogs.com/haoxuanchen2014/p/7771459.html
- HDU 5564:Clarke and digits 收获颇多的矩阵快速幂 + 前缀和
Clarke and digits Accepts: 16 Submissions: 29 Time Limit: 5000/3000 MS (Java/Others) Memory Limi ...
- sqlalchemy 连接mysql8.0报 RuntimeError: cryptograpy si requeired for sha256_password 错误
cryptography is required for sha256_password or caching_sha2_password 需要cryptography模块的支持才能连接需要sha25 ...
- C语言中的变量和常量的区别和使用
变量 定义一个变量:类型 变量名=值; int a =0; // 变量,可以在赋值 常量 定义一个常量 const 常量类型 常量名称 = 值 const int LENTHER = 521 // 定 ...
- python多线程采集图片
cmd中运行 >python untitled2.py 图片的网站 import requests import threading from bs4 import BeautifulSo ...
- Scrapy采集某小说网站的全部小说
链接: https://pan.baidu.com/s/1hrgYDzhgQIDrf4KmZxhW1w 密码: h1m6 源码以及运行图
- 多年珍藏的55w御剑字典
御剑珍藏55w目录字典,很给力,放在以前直接数据库都能给跑出来. 用法:直接把放入配置文件的目录 链接:https://pan.baidu.com/s/1MGxdd9hH006Y7AO7CpkO8g ...
- vue配置config ‘./.../.../***/**.vue’路径别名
cli-4的脚手架配置 因为组件的引用,经常会遇到import * from '../../../components/common/***.vue‘这样的引入格式,太复杂了,所以可以在vue里面配 ...