os.environ

赋值

ssh://root@192.168.2.51:22/usr/bin/python -u /home/data/tmp_test/trunk/personas/tmp_spark_mongo_relset_javahome.py
JAVA_HOME is not set
Traceback (most recent call last):
File "/home/data/tmp_test/trunk/personas/tmp_spark_mongo_relset_javahome.py", line 16, in <module>
.appName("PythonPi")\
File "/root/.local/lib/python3.6/site-packages/pyspark/sql/session.py", line 169, in getOrCreate
sc = SparkContext.getOrCreate(sparkConf)
File "/root/.local/lib/python3.6/site-packages/pyspark/context.py", line 334, in getOrCreate
SparkContext(conf=conf or SparkConf())
File "/root/.local/lib/python3.6/site-packages/pyspark/context.py", line 115, in __init__
SparkContext._ensure_initialized(self, gateway=gateway, conf=conf)
File "/root/.local/lib/python3.6/site-packages/pyspark/context.py", line 283, in _ensure_initialized
SparkContext._gateway = gateway or launch_gateway(conf)
File "/root/.local/lib/python3.6/site-packages/pyspark/java_gateway.py", line 95, in launch_gateway
raise Exception("Java gateway process exited before sending the driver its port number")
Exception: Java gateway process exited before sending the driver its port number

  


查源码

#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# import atexit
import os
import sys
import select
import signal
import shlex
import socket
import platform
from subprocess import Popen, PIPE if sys.version >= '':
xrange = range from py4j.java_gateway import java_import, JavaGateway, GatewayClient
from pyspark.find_spark_home import _find_spark_home
from pyspark.serializers import read_int def launch_gateway(conf=None):
"""
launch jvm gateway
:param conf: spark configuration passed to spark-submit
:return:
"""
if "PYSPARK_GATEWAY_PORT" in os.environ:
gateway_port = int(os.environ["PYSPARK_GATEWAY_PORT"])
else:
SPARK_HOME = _find_spark_home()
# Launch the Py4j gateway using Spark's run command so that we pick up the
# proper classpath and settings from spark-env.sh
on_windows = platform.system() == "Windows"
script = "./bin/spark-submit.cmd" if on_windows else "./bin/spark-submit"
command = [os.path.join(SPARK_HOME, script)]
if conf:
for k, v in conf.getAll():
command += ['--conf', '%s=%s' % (k, v)]
submit_args = os.environ.get("PYSPARK_SUBMIT_ARGS", "pyspark-shell")
if os.environ.get("SPARK_TESTING"):
submit_args = ' '.join([
"--conf spark.ui.enabled=false",
submit_args
])
command = command + shlex.split(submit_args) # Start a socket that will be used by PythonGatewayServer to communicate its port to us
callback_socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
callback_socket.bind(('127.0.0.1', ))
callback_socket.listen()
callback_host, callback_port = callback_socket.getsockname()
env = dict(os.environ)
env['_PYSPARK_DRIVER_CALLBACK_HOST'] = callback_host
env['_PYSPARK_DRIVER_CALLBACK_PORT'] = str(callback_port) # Launch the Java gateway.
# We open a pipe to stdin so that the Java gateway can die when the pipe is broken
if not on_windows:
# Don't send ctrl-c / SIGINT to the Java gateway:
def preexec_func():
signal.signal(signal.SIGINT, signal.SIG_IGN)
proc = Popen(command, stdin=PIPE, preexec_fn=preexec_func, env=env)
else:
# preexec_fn not supported on Windows
proc = Popen(command, stdin=PIPE, env=env) gateway_port = None
# We use select() here in order to avoid blocking indefinitely if the subprocess dies
# before connecting
while gateway_port is None and proc.poll() is None:
timeout = # (seconds)
readable, _, _ = select.select([callback_socket], [], [], timeout)
if callback_socket in readable:
gateway_connection = callback_socket.accept()[]
# Determine which ephemeral port the server started on:
gateway_port = read_int(gateway_connection.makefile(mode="rb"))
gateway_connection.close()
callback_socket.close()
if gateway_port is None:
raise Exception("Java gateway process exited before sending the driver its port number") # In Windows, ensure the Java child processes do not linger after Python has exited.
# In UNIX-based systems, the child process can kill itself on broken pipe (i.e. when
# the parent process' stdin sends an EOF). In Windows, however, this is not possible
# because java.lang.Process reads directly from the parent process' stdin, contending
# with any opportunity to read an EOF from the parent. Note that this is only best
# effort and will not take effect if the python process is violently terminated.
if on_windows:
# In Windows, the child process here is "spark-submit.cmd", not the JVM itself
# (because the UNIX "exec" command is not available). This means we cannot simply
# call proc.kill(), which kills only the "spark-submit.cmd" process but not the
# JVMs. Instead, we use "taskkill" with the tree-kill option "/t" to terminate all
# child processes in the tree (http://technet.microsoft.com/en-us/library/bb491009.aspx)
def killChild():
Popen(["cmd", "/c", "taskkill", "/f", "/t", "/pid", str(proc.pid)])
atexit.register(killChild) # Connect to the gateway
gateway = JavaGateway(GatewayClient(port=gateway_port), auto_convert=True) # Import the classes used by PySpark
java_import(gateway.jvm, "org.apache.spark.SparkConf")
java_import(gateway.jvm, "org.apache.spark.api.java.*")
java_import(gateway.jvm, "org.apache.spark.api.python.*")
java_import(gateway.jvm, "org.apache.spark.ml.python.*")
java_import(gateway.jvm, "org.apache.spark.mllib.api.python.*")
# TODO(davies): move into sql
java_import(gateway.jvm, "org.apache.spark.sql.*")
java_import(gateway.jvm, "org.apache.spark.sql.hive.*")
java_import(gateway.jvm, "scala.Tuple2") return gateway

解决pyspark-linux-windowsIDE JAVA_HOME not set的更多相关文章

  1. 解决Kali Linux没有声音

    解决Kali Linux没有声音   Kali Linux系统默认状态下,root用户是无法使用声卡的,也就没有声音.启用的方法如下: (1)在终端执行命令:systemctl --user enab ...

  2. 解决redhat linux下IP地址可以ping通,域名无法ping通问题

    解决redhat linux下IP地址可以ping通,域名无法ping通 在/etc/resolv.conf中添点东西 格式如下: nameserver xxx.xxx.xxx.xxx nameser ...

  3. 解决虚拟机linux端mysql数据库无法远程访问

    解决虚拟机linux端mysql数据库无法远程访问 1. 在控制台执行 mysql -u root -p mysql,CentOS系统提示输入数据库root用户的密码,输入完成后即进入mysql控制台 ...

  4. 解决在Linux下安装Oracle时的中文乱码问题

    本帖最后由 TsengYia 于 2012-2-22 17:06 编辑 解决在Linux下安装Oracle时的中文乱码问题 操作系统:Red Hat Enterprise Linux 6.1数据库:O ...

  5. 解决在linux环境下面不显示验证码的问题

    详见:http://blog.yemou.net/article/query/info/tytfjhfascvhzxcyt235 解决在linux环境下面不显示验证码的问题1.tomcat      ...

  6. 解决遇到Linux网络配置,从熟悉网络配置文件入手

    如果接触过Linux,网络配置是一个比较棘手的问题.但是Linux是文件为基础来构建的系统,包括我们windows中设备,Linux也视为文件.所以只要我们明白文件的作用.就能对Linux更加的熟悉, ...

  7. 终于解决了Linux下运行OCCI程序一直报Error while trying to retrieve text for error ORA-01804错误

    终于解决了Linux下运行OCCI程序一直报Error while trying to retrieve text for error ORA-01804错误 http://blog.csdn.net ...

  8. 快速解决Ubuntu/linux 环境下QT生成没有可执行文件(application/x-executable)

    快速解决Ubuntu/linux 环境下QT生成没有可执行文件(application/x-executable)(转载)   问题描述 与windows环境下不同,linux选择debug构建时并不 ...

  9. tomcat(不仅仅是tomcat)通过熵池解决在linux启动应用慢

    tomcat启动过程中报错 -Jul- ::] org.apache.catalina.startup.HostConfig.deployDirectory Deploying web applica ...

  10. 360:且用且珍惜!解决虚拟机linux启动缓慢以及ssh端卡顿的问题!

    优化软件以及杀毒软件想必大家都是用过的,小编自用的第一台电脑自带安装的是金山毒霸,随着时间的偏移渐渐用过小红伞,卡巴斯基,优化大师,鲁大师到后来的360优化杀毒套装,优化软件给大家带来了方便,尤其是上 ...

随机推荐

  1. 【Luogu】P3847调整队形(DP)

    题目链接 DP果真是考思维啊 增加一个数的操作等价于删掉那个不和谐的数的操作. 所以1.2操作可以忽略. 剩下3.4操作,则可以设计f[i][j]是将区间[i,j]变成回文序列需要的操作数. if(a ...

  2. spring中quartz的使用。【转http://www.cnblogs.com/kay/archive/2007/11/02/947372.html】

    注:从spring3到spring4改变 org.springframework.scheduling.quartz.CronTriggerBean org.springframework.sched ...

  3. Aragorn's Story(hdu3966)

    题意:给一棵树,并给定各个点权的值,然后有3种操作: I C1 C2 K: 把C1与C2的路径上的所有点权值加上K D C1 C2 K:把C1与C2的路径上的所有点权值减去K Q C:查询节点编号为C ...

  4. Mac VMware Fusion Centos7 静态ip配置

    一直没用mac装过虚拟机,最近因为一些原因不得不装一个,但是被这个静态ip配置把头都搞痛了(这里吐槽一下百度,我前几页都看了几遍,搜索关键字就是我现在的标题,结果都是一些抄抄抄并且不管用的攻略,最后使 ...

  5. Linux crontab 定时任务Demo

    # 查看定时任务 crontab -l #编辑定时任务crontab -e # 同步时间 */ * * * * /usr/sbin/ntpdate -u 127.0.0.1 # 定时删除文件 */ * ...

  6. Codeforces 552E Vanya and Brackets(枚举 + 表达式计算)

    题目链接 Vanya and Brackets 题目大意是给出一个只由1-9的数.乘号和加号组成的表达式,若要在这个表达式中加上一对括号,求加上括号的表达式的最大值. 我们发现,左括号的位置肯定是最左 ...

  7. 算法笔记字符串处理问题H:编排字符串(2064)

    题目描述 请输入字符串,最多输入4 个字符串,要求后输入的字符串排在前面,例如 输入:EricZ 输出:1=EricZ 输入:David 输出:1=David 2=EricZ 输入:Peter 输出: ...

  8. Spring实战Day6

    3.4 bean的作用域 Spring中bean的作用域 单例(Singleton):在整个应用中,只创建bean的一个实例. 原型(Prototype):每次注入或者通过Spring应用上下文获取的 ...

  9. MFC中的双缓冲技术(解决绘图闪烁问题)

    转自 MFC绘图不闪烁——双缓冲技术[转] 在VC/MFC用CDC绘图时,频繁的刷新,屏幕会出现闪烁的现象,CPU时间占用率相当高,绘图效率极低,很容易出现程序崩溃. 所谓双缓冲技术,下面是百度百科的 ...

  10. 【从零学习openCV】IOS7下的openCV开发起步(Xcode5.1.1&amp;openCV2.49)

    前言: 开发IOS7已经有一月的时间了.近期在准备推研的事,有点想往CV方向发展.于是開始自学openCV. 关注CSDN已经非常久了.也从非常多博主那学到了非常多知识,于是我也从这周开启自己的blo ...