SpringBoot应用调用Linkis进行任务调度执行SQl;进行数据质量分析
基于Linkis的Rest-API调用任务
官网示例:“https://linkis.apache.org/zh-CN/docs/1.3.2/api/linkis-task-operator”

集合Springboot集成
准备工作:SpringBoot-web应用:封装好支持cookie的restClient就行
封装RestTemplate
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.config.Registry;
import org.apache.http.config.RegistryBuilder;
import org.apache.http.conn.socket.ConnectionSocketFactory;
import org.apache.http.conn.socket.PlainConnectionSocketFactory;
import org.apache.http.conn.ssl.SSLConnectionSocketFactory;
import org.apache.http.impl.client.HttpClientBuilder;
import org.apache.http.impl.conn.PoolingHttpClientConnectionManager;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Configuration;
import org.springframework.http.client.ClientHttpRequestFactory;
import org.springframework.http.client.HttpComponentsClientHttpRequestFactory;
import org.springframework.web.client.RestTemplate;
/**
* @author 白嫖老郭
* @createTime 2024-02-26
* @description RestTemplate封装对象的配置与注入
*/
@Configuration
public class RestClientConfig {
@Value("${restTemplate.thread.maxTotal:200}")
private int maxTotal;
@Value("${restTemplate.thread.maxPerRoute:100}")
private int maxPerRoute;
@Value("${restTemplate.request.socketTimeout:10000}")
private int socketTimeout;
@Value("${restTemplate.request.connectTimeout:2000}")
private int connectTimeout;
@Value("${restTemplate.request.connectionRequestTimeout:2000}")
private int connectionRequestTimeout;
@Bean
public RestTemplate restTemplate() {
RestTemplate restTemplate = new RestTemplate(clientHttpRequestFactory());
return restTemplate;
}
private ClientHttpRequestFactory clientHttpRequestFactory() {
HttpComponentsClientHttpRequestFactory factory =
new HttpComponentsClientHttpRequestFactory();
factory.setHttpClient(httpClient());
return factory;
}
private HttpClient httpClient() {
Registry<ConnectionSocketFactory> registry = RegistryBuilder.<ConnectionSocketFactory>create()
.register("http", PlainConnectionSocketFactory.getSocketFactory())
.register("https", SSLConnectionSocketFactory.getSocketFactory())
.build();
PoolingHttpClientConnectionManager connectionManager = new PoolingHttpClientConnectionManager(registry);
// Set max total connection
connectionManager.setMaxTotal(maxTotal);
// Set max route connection
connectionManager.setDefaultMaxPerRoute(maxPerRoute);
RequestConfig requestConfig = RequestConfig.custom()
.setSocketTimeout(socketTimeout)
.setConnectTimeout(connectTimeout)
.setConnectionRequestTimeout(connectionRequestTimeout)
.build();
return HttpClientBuilder.create().setDefaultRequestConfig(requestConfig)
.setConnectionManager(connectionManager)
.build();
}
}
编写封装调用Links任务的接口
import com.alibaba.fastjson.JSONObject;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.lang3.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.http.ResponseEntity;
import org.springframework.web.bind.annotation.GetMapping;
import org.springframework.web.bind.annotation.RestController;
import org.springframework.web.client.RestTemplate;
import javax.servlet.http.HttpServletRequest;
import javax.servlet.http.HttpServletResponse;
import java.util.HashMap;
/**
* 调用底层Linkis执行数据质量分析----调用Spark执行Shell-SQL
*/
@RestController
public class RestControllerLinkis {
Logger logger = LoggerFactory.getLogger(RestControllerLinkis.class);
// 注入RestTemplate实例
@Autowired
private RestTemplate restTemplate;
@Value("${linkis.url:http://10.130.1.37:8188}")
private String linkisUrl;
private String sqlSimpleOne = "select count(*) from default.student where (name='zhangsan') and (sex is null)";
private String sqlSimpleTwo = "select count(*) from default.student where (name='lisi') and (sex is null)";
private String sqlSimpleThree = "select * from default.student where (name = 'zhangsan' or name = 'lisi') and (sex is null)";
/**
* (1)登录并执行任务
*
* @param httpServletRequest httpServletRequest
* @param httpServletResponse httpServletResponse
* @return ResponseEntity
* @throws Exception
*/
@GetMapping("/executeSql")
public ResponseEntity<JSONObject> loginAndExecuteSql(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws Exception {
String sql = httpServletRequest.getParameter("sql");
String executeSql = "";
if (StringUtils.isNotBlank(sql)) {
switch (sql) {
case "one":
executeSql = sqlSimpleOne;
break;
case "two":
executeSql = sqlSimpleTwo;
break;
case "three":
executeSql = sqlSimpleThree;
break;
default:
executeSql = sqlSimpleOne;
break;
}
}
logger.error("============================================================");
ResponseEntity<JSONObject> login = login(restTemplate);
logger.info(login.getBody().toJSONString());
logger.error("============================================================");
ResponseEntity<JSONObject> responseEntity = executeSql(restTemplate, executeSql);
Long taskID = responseEntity.getBody().getJSONObject("data").getLong("taskID");
logger.error("========================TASKID====================================");
logger.info(taskID.toString());
return responseEntity;
}
/**
* (3)获取执行结果:
*
* @param httpServletRequest
* @param httpServletResponse
* @return
* @throws Exception
*/
@GetMapping("/getResult")
public ResponseEntity<JSONObject> getResult(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws Exception {
// 获取任务的保存路径
String resultLocation = getFIleURL(restTemplate, httpServletRequest.getParameter("taskID"));
// 下载保存路径·下文件的类型
String resUrl = linkisUrl + "/api/rest_j/v1/filesystem/openFile?path=" + resultLocation + "/_0.dolphin";
ResponseEntity<JSONObject> resResp = restTemplate.getForEntity(resUrl, JSONObject.class);
if (resResp != null && resResp.getStatusCode().value() == HttpStatus.SC_OK) {
//do something
JSONObject body = resResp.getBody();
assert body != null;
System.out.println(body.toJSONString());
}
return resResp;
}
/**
* (4)获取执行日志:
*
* @param httpServletRequest
* @param httpServletResponse
* @return
* @throws Exception
*/
@GetMapping("/getExecuteLog")
public ResponseEntity<JSONObject> getExecuteLog(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws Exception {
// 获取任务的执行ID
String execID = httpServletRequest.getParameter("execID");
// 下获取执行日志
String resUrl = linkisUrl + "/api/rest_j/v1/entrance/" + execID + "/log?fromLine=0&size=500";
ResponseEntity<JSONObject> resResp = restTemplate.getForEntity(resUrl, JSONObject.class);
if (resResp != null && resResp.getStatusCode().value() == HttpStatus.SC_OK) {
//do something
JSONObject body = resResp.getBody();
assert body != null;
System.out.println(body.toJSONString());
}
return resResp;
}
/**
* (5)终止任务:
*
* @param httpServletRequest
* @param httpServletResponse
* @return
* @throws Exception
*/
@GetMapping("/killExecuteTask")
public ResponseEntity<JSONObject> killExecuteTask(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws Exception {
// 获取任务的执行ID
String execID = httpServletRequest.getParameter("execID");
// 终止任务
String resUrl = linkisUrl + "/api/rest_j/v1/entrance/" + execID + "/kill";
ResponseEntity<JSONObject> resResp = restTemplate.getForEntity(resUrl, JSONObject.class);
if (resResp != null && resResp.getStatusCode().value() == HttpStatus.SC_OK) {
//do something
JSONObject body = resResp.getBody();
assert body != null;
System.out.println(body.toJSONString());
}
return resResp;
}
/**
* (2)获取执行结果的的状态
*
* @param httpServletRequest
* @param httpServletResponse
* @return
* @throws Exception
*/
@GetMapping("/getStatus")
public ResponseEntity<JSONObject> getStatus(HttpServletRequest httpServletRequest, HttpServletResponse httpServletResponse) throws Exception {
String execID = httpServletRequest.getParameter("execID");
String statusUrl = linkisUrl + "/api/rest_j/v1/entrance/" + execID + "/status";
ResponseEntity<JSONObject> statusResp = restTemplate.getForEntity(statusUrl, JSONObject.class);
if (statusResp != null && statusResp.getStatusCode().value() == HttpStatus.SC_OK) {
String status;
for (; ; ) {
// 睡眠6秒钟
Thread.sleep(6000);
// 持续刷新状态
statusResp = restTemplate.getForEntity(statusUrl, JSONObject.class);
status = statusResp.getBody().getJSONObject("data").getString("status");
//死循环查看任务状态,如果任务成功或者失败,则退出循环
if ("Succeed".equals(status) || "Failed".equals(status)) {
break;
}
}
if ("Succeed".equals(status)) {
// do something
logger.error("=========================================状态执行成功=========================================");
}
}
return statusResp;
}
private ResponseEntity<JSONObject> login(RestTemplate restClient) {
JSONObject postData = new JSONObject();
postData.put("userName", "hadoop");
postData.put("password", "f45b9a1af");
String loginUrl = linkisUrl + "/api/rest_j/v1/user/login";
return restClient.postForEntity(loginUrl, postData, JSONObject.class);
}
/**
* @param restClient
* @param sql 要执行的sql代码
* @return
*/
private ResponseEntity<JSONObject> executeSql(RestTemplate restClient, String sql) {
String url = "/api/rest_j/v1/entrance/execute";
JSONObject map = new JSONObject();
map.put("method", url);
map.put("params", new HashMap<>()); //用户指定的运行服务程序的参数,必填,里面的值可以为空
map.put("executeApplicationName", "spark");//执行引擎,我用的hive
map.put("executionCode", sql);
map.put("runType", "sql");//当用户执行如spark服务时,可以选择python、R、SQL等,不能为空
//因为我没有执行文件脚本,所以没有scriptPath参数
String executeSql = linkisUrl + url;
return restClient.postForEntity(executeSql, map, JSONObject.class);
}
/**
* @param restClient
* @param taskID 要执行的sql代码
* @return
*/
private String getFIleURL(RestTemplate restClient, String taskID) {
String historyUrl = "/api/rest_j/v1/jobhistory/" + taskID + "/get";
String executeSql = linkisUrl + historyUrl;
ResponseEntity<JSONObject> hisResp = restClient.getForEntity(executeSql, JSONObject.class);
String resultLocation = null;
logger.info(hisResp.getBody().toJSONString());
if (hisResp != null && hisResp.getStatusCode().value() == HttpStatus.SC_OK) {
resultLocation = hisResp.getBody().getJSONObject("data").getJSONObject("task").getString("resultLocation");
}
return resultLocation;
}
/**
* @param restClient
* @param resultLocation 要执行的sql代码
* @return
*/
private String getRestFileMsg(RestTemplate restClient, String resultLocation) {
String resUrl = "/api/rest_j/v1/filesystem/openFile?path=" + resultLocation + "/_0.dolphin";
String executeSql = linkisUrl + resUrl;
ResponseEntity<JSONObject> resResp = restClient.getForEntity(executeSql, JSONObject.class);
if (resResp != null && resResp.getStatusCode().value() == HttpStatus.SC_OK) {
//do something
JSONObject body = resResp.getBody();
logger.info(body.toJSONString());
}
logger.error("ending==========================");
return resultLocation;
}
}
测试验证
执行任务

查询任务状态

查询任务结果

查询任务日志

到Linkis管理台查询任务

。。。。。。SayGoodBye......
SpringBoot应用调用Linkis进行任务调度执行SQl;进行数据质量分析的更多相关文章
- SpringBoot 设置服务一启动就执行、初始化数据
定义一个类实现ApplicationRunner接口,然后Override这个ApplicationRunner接口的run方法 @Component public class TaskRunner ...
- NHibernate直接执行SQL进行插入
有时候,需要用NHibernate直接执行SQL进行数据insert或update. 怎么写呢?简单一点的,可以直接拼凑出来的SQL,这样写: using NHibernate; StringBuil ...
- AngularJS SQL 获取数据
使用PHP从MySQL中获取数据: <!DOCTYPE html> <html> <head> <meta charset="utf-8" ...
- Oracle sql共享池$sqlarea分析SQL资源使用情况
遇到需要排查一个系统使用sql的情况,可以通过查询Oracle的$sql.$ssssion.$sqlarea进行统计排查 排查时可以先看一下$sql和$session的基本信息 select * fr ...
- 详解SQL Server数据修复命令DBCC的使用
严重级别为 21 表示可能存在数据损坏. 可能的原因包括损坏的页链.损坏的 IAM 或该对象的 sys.objects目录视图中存在无效条目. 这些错误通常由硬件或磁盘设备驱动程序故障而引起. MS ...
- springboot中使用mybatis显示执行sql
springboot 中使用mybatis显示执行sql的配置,在properties中添加如下 logging.你的包名=debug 2018-11-27 16:35:43.044 [DubboSe ...
- spring-boot启动自动执行sql文件失效 解决办法
在springboot1.5及以前的版本,要执行sql文件只需在applicaion文件里指定sql文件的位置即可.但是到了springboot2.x版本, 如果只是这样做的话springboot不会 ...
- Mybaits 源码解析 (六)----- 全网最详细:Select 语句的执行过程分析(上篇)(Mapper方法是如何调用到XML中的SQL的?)
上一篇我们分析了Mapper接口代理类的生成,本篇接着分析是如何调用到XML中的SQL 我们回顾一下MapperMethod 的execute方法 public Object execute(SqlS ...
- Mybaits 源码解析 (七)----- Select 语句的执行过程分析(下篇)(Mapper方法是如何调用到XML中的SQL的?)全网最详细,没有之一
我们上篇文章讲到了查询方法里面的doQuery方法,这里面就是调用JDBC的API了,其中的逻辑比较复杂,我们这边文章来讲,先看看我们上篇文章分析的地方 SimpleExecutor public & ...
- shell调用sqlplus批量执行sql文件
在最近的工作中,经常需要批量执行一些DML, DDL, PL/SQL语句或导入一些Function, Procedure.因为support的国家比较多,常常需要一个登陆到一个国家的数据库上执行完成后 ...
随机推荐
- Django_使用汇总(1)
使用django(4.1.5) 搭建股票信息后台,显示股票信息: Stock -> models.py class Stock(models.Model): symbol = models.Ch ...
- [软件工具使用记录] windows离线ollama部署本地模型并配置continue实现离线代码补全
qwen2.5coder发布之后,觉得差不多可以实现离线模型辅助编程了,所以尝试在公司内网部署模型,配合vsocde插件continue实现代码提示.聊天功能. 目前使用qwen2.5coder的32 ...
- 使用Emgu.CV开发视频播放器简述
OpenCV是大名鼎鼎的视觉处理库,其对应的c#版本为Emgu.CV.本人采用Emgu.CV开发了一款视频播放软件,可对本地视频文件和rstp在线视频流播放,还具有对视频局部区域放大功能.虽然功能比较 ...
- C# Windows Media Player 播放playlist 歌单
using AxWMPLib; using System; using System.Collections.Generic; using System.Linq; using System.Text ...
- git撤销远已经push到程服务器上某次提交
git撤销远已经push到程服务器上某次提交 问题: 不小心把一次错误的代码push到远程服务器上的分支上,或者没有merge强制将本地的方法push到git服务器上. 解决方法: 输入 git lo ...
- 无网环境Docker Rpm离线安装
总体思路:找一台可以联网的linux,下载docker的RPM依赖包而不进行安装(yum localinstall),将所有依赖的rpm环境打包好,再在无网环境中解压逐一安装(rpm: --force ...
- DataV过滤器
人才库: return data.filter(function (item) { return item.职级 === ''; }) 区县分析: //一级指标 const t = Object. ...
- 局域网 yum仓库
有时候在局域网环境中,每台机器上挂载本地镜像充当yum仓库太麻烦. 可以选择局域网一台服务器生成yum仓库,局域网其他服务器通过http协议访问这台服务器的yum仓库. 一.准备 两台虚拟机,其中 A ...
- deepseek-v3 论文阅读
模型结构 MLA(Multi-Head Latent Attention) 主要作用是在保证效果的基础上, 利用低秩压缩的原理优化kvCache, 加速推理, 同时节省训练显存. 先回忆下MHA, 在 ...
- 解决Webstorm Nodejs console.log("这是中文") 控制台乱码
设置文件编码 自定义vm选项文件 添加 文件最后一行添加 -Dfile.encoding=UTF-8 3.修改注册表 Windows+R --> regedit --> 计算机\HKEY_ ...