tpot从elastic search拉攻击数据之三用于拉取的java程序

package download;

import org.json.JSONArray;

import java.io.*;

import java.net.URL;

import java.net.URLConnection;

import java.nio.Buffer;

import java.text.SimpleDateFormat;

import java.util.*;

import org.json.JSONObject;

public class Downloader {

    public static void main(String[] args) throws IOException {

//        Configer.configProxy();

        System.out.println("爬取完成，条数："+getresult().size());

    }

    public static String indexstr = "";

    public static Properties p;

    public static Properties loadPropertiesFromFile(String filename) throws IOException {

        Properties p = new Properties();

        InputStream input = Downloader.class.getClassLoader().getResourceAsStream(filename);

        p.load(input);

        return p;

    }

    static {

        try {

            p = loadPropertiesFromFile("downloader.properties");

        } catch (IOException e) {

            System.out.println("downloader.properties读取失败");

            e.printStackTrace();

        }

    }

    public static  InputStream get_whitelist_inputstream(){

        //获取配置文件的inputstream

        ClassLoader classLoader=Downloader.class.getClassLoader();

        InputStream whitelist_inputstream=classLoader.getResourceAsStream(p.getProperty("white_list_file"));

        return whitelist_inputstream;

        //获取配置文件的路径名

//        ClassLoader classLoader=Downloader.class.getClassLoader();

//        URL resource=classLoader.getResource(p.getProperty("white_list_file"));

//        String path=resource.getPath();

    }

    public static String get_whitelist_regex() throws IOException {

        InputStream whitelist_inputstream=get_whitelist_inputstream();

        BufferedReader whitelist_reader=new BufferedReader(new InputStreamReader(whitelist_inputstream));

        String whitelist_regex="";

        String line=null;

        while((line=whitelist_reader.readLine())!=null){

            whitelist_regex+="("+line+")|";

        }

        if(whitelist_regex.length()!=0){

            whitelist_regex=whitelist_regex.substring(0,whitelist_regex.length()-1);

        }

        whitelist_inputstream.close();

        whitelist_reader.close();

        return whitelist_regex;

    }

    public static String post(String url, String param, Map<String, String> header) throws IOException {

        PrintWriter out = null;

        BufferedReader in = null;

        String result = "";

        URL realUrl = new URL(url);

        // 打开和URL之间的连接

        URLConnection conn = realUrl.openConnection();

        //设置超时时间

        conn.setConnectTimeout(5000);

        conn.setReadTimeout(15000);

        // 设置通用的请求属性

        if (header != null) {

            for (Map.Entry<String, String> entry : header.entrySet()) {

                conn.setRequestProperty(entry.getKey(), entry.getValue());

            }

        }

        conn.setRequestProperty("accept", "*/*");

        conn.setRequestProperty("connection", "Keep-Alive");

        conn.setRequestProperty("user-agent",

                "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");

        // 发送POST请求必须设置如下两行

        conn.setDoOutput(true);

        conn.setDoInput(true);

        // 获取URLConnection对象对应的输出流

        out = new PrintWriter(conn.getOutputStream());

        // 发送请求参数

        out.print(param);

        // flush输出流的缓冲

        out.flush();

        // 定义BufferedReader输入流来读取URL的响应

        in = new BufferedReader(

                new InputStreamReader(conn.getInputStream(), "utf8"));

        String line;

        while ((line = in.readLine()) != null) {

            result += line;

        }

        if (out != null) {

            out.close();

        }

        if (in != null) {

            in.close();

        }

        return result;

    }

    public static String get(String url) throws IOException {

        BufferedReader in = null;

        URL realUrl = new URL(url);

        // 打开和URL之间的连接

        URLConnection connection = realUrl.openConnection();

        // 设置通用的请求属性

        connection.setRequestProperty("accept", "*/*");

        connection.setRequestProperty("connection", "Keep-Alive");

        connection.setRequestProperty("user-agent", "Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.1;SV1)");

        connection.setConnectTimeout(5000);

        connection.setReadTimeout(5000);

        // 建立实际的连接

        connection.connect();

        // 定义 BufferedReader输入流来读取URL的响应

        in = new BufferedReader(new InputStreamReader(connection.getInputStream()));

        StringBuffer sb = new StringBuffer();

        String line;

        while ((line = in.readLine()) != null) {

            sb.append(line);

        }

        in.close();

        return sb.toString();

    }

    public static void getIndexStr() {

        indexstr = "logstash-" + new SimpleDateFormat("yyyy.MM.dd").format(new Date());

        //indexstr = "tpot_test";//for test

    }

    public static Set<String> getAttackTypeSet() throws IOException {

        getIndexStr();

        String attacktypeurl = p.getProperty("els.host") + "/" + indexstr + "/" + "_mapping?pretty=true";

        System.out.println("【getting all types today】>>" + attacktypeurl);

        String attacktyperesult = get(attacktypeurl);

        //parse json

        JSONObject jobj1 = new JSONObject(attacktyperesult);

        JSONObject jobj2 = jobj1.getJSONObject(indexstr);

        JSONObject jobj3 = jobj2.getJSONObject("mappings");

        return jobj3.keySet();

    }

    public static LinkedList<NearRealtimeIntelligence> getresult() throws IOException {

        LinkedList<NearRealtimeIntelligence> result = new LinkedList<NearRealtimeIntelligence>();

        Set<String> attacktypeset = getAttackTypeSet();

        String param = "{\n" +

                "  \"query\": {\n" +

                "    \"bool\": {\n" +

                "      \"must_not\": [\n" +

                "        {\n" +

                "          \"regexp\":{\n" +

                "            \"src_ip\":\"" + get_whitelist_regex() + "\"\n" +

                "          }\n" +

                "        }\n" +

                "      ]\n" +

                "    }\n" +

                "  },\"size\":" + p.getProperty("els.batch_size") + "\n" +

                "}";

        for (String attacktype : attacktypeset) {

            //忽略default、syslog两个type

            if (attacktype.equals("_default_") || attacktype.equals("Syslog")) {

                continue;

            }

            System.out.println("【getting "+attacktype+" data】");

            String req = p.getProperty("els.host") + "/" + indexstr + "/" + attacktype + "/_search?scroll=" + p.getProperty("scroll_timegap");

            System.out.println("posting url>>" + req);

            String res = post(req, param, null);

            //parse json

            JSONObject res_json = new JSONObject(res);

            JSONObject all_hits = res_json.getJSONObject("hits");

            JSONArray docu_array = all_hits.getJSONArray("hits");

            int total = all_hits.getInt("total");

            int pages = (int) Math.ceil(total / Double.parseDouble(p.getProperty("els.batch_size")));

            System.out.println("数据条数："+total + " 页数：" + pages);

            String scroll_id = res_json.getString("_scroll_id");

//            System.out.println("######################################batch0");

            for (int j = 0; j < docu_array.length(); j++) {

                JSONObject docu = (JSONObject) docu_array.get(j);

                JSONObject source = docu.getJSONObject("_source");

                if (source.has("src_ip")) {

                    String src_ip = source.getString("src_ip");

                    System.out.println(src_ip);

                    NearRealtimeIntelligence adata=new NearRealtimeIntelligence();

                    adata.setName(src_ip);

                    adata.setSourceName(attacktype);

                    result.add(adata);

                }

            }

            for (int i = 1; i < pages; i++) {

//                System.out.println("######################################batch" + i);

                req = p.getProperty("els.host") + "/_search/scroll";

//                System.out.println("posting url>>" + req);

                String param_scroll = "{\n" +

                        "  \"scroll\":\"" + p.getProperty("scroll_timegap") + "\",\n" +

                        "  \"scroll_id\":\"" + scroll_id + "\"\n" +

                        "}";

                res = post(req, param_scroll, null);

                //parse json

                res_json = new JSONObject(res);

                all_hits = res_json.getJSONObject("hits");

                docu_array = all_hits.getJSONArray("hits");

                for (int j = 0; j < docu_array.length(); j++) {

                    JSONObject docu = (JSONObject) docu_array.get(j);

                    JSONObject source = docu.getJSONObject("_source");

                    if (source.has("src_ip")) {

                        String src_ip = source.getString("src_ip");

//                        System.out.println(src_ip);

                        NearRealtimeIntelligence adata=new NearRealtimeIntelligence();

                        adata.setName(src_ip);

                        adata.setSourceName(attacktype);

                        result.add(adata);

                    }

                }

            }

        }

        return result;

    }

}

拉取过程中，注意：

1、请求参数中过滤掉白名单+设置大小分页读取

url: http://xxx.xxx.xxx.xxx:8000/logstash-2018.07.30/Honeytrap/_search?scroll=3m
String param = "{\n" +

                "  \"query\": {\n" +

                "    \"bool\": {\n" +

                "      \"must_not\": [\n" +

                "        {\n" +

                "          \"regexp\":{\n" +

                "            \"src_ip\":\"" + get_whitelist_regex() + "\"\n" +

                "          }\n" +

                "        }\n" +

                "      ]\n" +

                "    }\n" +

                "  },\"size\":" + p.getProperty("els.batch_size") + "\n" +

                "}";

2、读取文件

获得inputstream

ClassLoader classLoader=Downloader.class.getClassLoader();

InputStream whitelist_inputstream=classLoader.getResourceAsStream(p.getProperty("white_list_file"));

使用inputstream按行读

BufferedReader whitelist_reader=new BufferedReader(new InputStreamReader(whitelist_inputstream));

String line=null;

while((line=whitelist_reader.readLine())!=null){

}

3、读取文件

Properties p = new Properties();

InputStream input = Downloader.class.getClassLoader().getResourceAsStream(filename);

p.load(input);

4、解析json字符串

JSONObject res_json = new JSONObject(res);

JSONObject all_hits = res_json.getJSONObject("hits");

JSONArray docu_array = all_hits.getJSONArray("hits");

tpot从elastic search拉攻击数据之三用于拉取的java程序的更多相关文章

tpot从elastic search拉攻击数据之一找本地数据端口
前面,我们已经在ubuntu服务器上部署好了tpot,并启动进行数据捕获可以通过64297端口登陆到kibana可视化平台查看捕获到攻击的情况. 现在要拉取攻击数据了,但是该怎么拉呢? 看了一上午的 ...
tpot从elastic search拉攻击数据之二配置端口映射
虽然知道了本地的数据接口位置,但是我们需要的是从远程拉取数据,所以我们需要更改es的ip端口为0.0.0.0:xxxx. 直接修改下图的elasticsearch.yml配置文件,结果发现无效. 这是 ...
用mescroll实现无限上拉增加数据，下拉刷新数据 (学习笔记)
最近自己做一个web app需要用到上拉查询下页数据,网上看了很多很多帖子,发现并不能快速的套用,总是会出现各种问题无法使用,于是无奈自己跑去看了官方api文档,终于做了出来,至此做个笔记,以后用到可 ...
Transform数据权限浅析2之利用Java完成权限设置
一:项目背景 1.1:cognos的两种建模工具为了更好的满足客户的需求,提升报表展现的效率,一种建模工具已经不能满足报表开发和展现的需要.Cognos除了给我们提供了一种基于关系型数据库的建模工具 ...
Elastic Search快速上手（2）：将数据存入ES
前言在上手使用前,需要先了解一些基本的概念. 推荐可以到 https://www.elastic.co/guide/cn/elasticsearch/guide/current/index.htm ...
SQL数据同步到ELK（二）- Elastic Search 安装
开篇废话没错,前面扯了一堆SQL SERVER,其实我连Elastic Search根本没动手玩过(是不是与时代有点脱节了?),那今天我就准备尝试安装一个ELK的简单集群出来(这个集群是使用我的小米 ...
elastic search查询命令集合
Technorati 标签: elastic search,query,commands 基本查询:最简单的查询方式 query:{"term":{"title" ...
elastic search 学习笔记
Elastic search在数据分析的应用中相当于一个数据库的搜索引擎. 跟MySQL类似,它有自己的查询语言,只不过不是关系型数据库,属于NoSQL. 可以根据索引从分布式服务器文件系统中快速存取 ...
分库分表后跨分片查询与Elastic Search
携程酒店订单Elastic Search实战:http://www.lvesu.com/blog/main/cms-610.html 为什么分库分表后不建议跨分片查询:https://www.jian ...

随机推荐

atitit.软件设计模式大的总结attialx总结
atitit.软件设计模式大的总结attialx总结 1. 设计模式的历史3 2. 设计模式的数量(253个)3 3. 设计模式的结构4 3.1. 应用场景and条件Context4 3.2. Pro ...
匿名内部类 , Iterable<T> 和 Iterator<T>
package generic; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; i ...
twemproxy源码分析2——守护进程的创建
twemproxy源码中关于守护进程的创建实现得比较标准,先贴出代码来,然后结合一些资料来分析和列举一些实现守护进程的常用方法,不过不得不说twemproxy的实现确实是不错的,注释都写在了代码中,直 ...
neo4j使用笔记
#coding:utf- __author__ = 'similarface' # 安装驱动:pip install neo4j-driver from neo4j.v1 import GraphDa ...
java printf long
System.out.printf("%d\n", 1000000000000000000L);
C# 时间格式 yyyy/mm/dd
今天遇到个问题在C#中将日期格式设置为yyyy/MM/dd,我是这样写的: DateTime.Now.ToString("yyyy/MM/dd"); 可是获取到的日期还是显示yyy ...
bzoj2440 完全平方数莫比乌斯值+容斥+二分
莫比乌斯值+容斥+二分 /** 题目:bzoj2440 完全平方数链接:http://www.lydsy.com/JudgeOnline/problem.php?id=2440 题意:求第k个小x数 ...
01 SQL核心语句
在 sqlplus 中可以使用 ; 或 / 来执行一条 sql 语句, 但是 / 必须要换行, 个人感觉只有那些脚本啊什么的最后一般使用 / , 其他的都使用; 核心语句 SELECT oracle ...
安装Node.js以及Hexo
安装前提安装 Hexo 相当简单.然而在安装前,您必须检查电脑中是否已安装下列应用程序: Node.js 如何在Ubuntu上安装最新版本的Node.js https://hexo.io/zh-cn ...
JAVA源文件中是否可以包括多个类,有什么限制
JAVA源文件中是否可以包括多个类,有什么限制解答:一个java源文件中可以包含多个类,每个源文件中至多有一个public类,如果有的话,那么源文件的名字必须与之相同.如果源文件中没有public类 ...

tpot从elastic search拉攻击数据之三 用于拉取的java程序

tpot从elastic search拉攻击数据之三 用于拉取的java程序的更多相关文章

随机推荐

热门专题

tpot从elastic search拉攻击数据之三用于拉取的java程序

tpot从elastic search拉攻击数据之三用于拉取的java程序的更多相关文章