代码如下,测试发现,是否对输入数据进行归一化/标准化对于结果没有影响:

import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler def parse_line(s):
s = s.replace("u'", "").replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", "")
s2 = s.split(",")
dat = [float(_) for _ in s2[1:]]
return (s2[0], dat) def get_data():
with open("feature.dat") as f:
lines = f.readlines()
return [parse_line(line) for line in lines] def train(collected_data):
input_data = [c[1] for c in collected_data]
#scaler = StandardScaler().fit(input_data)
#input_data = scaler.transform(input_data) #min_max_scaler = MinMaxScaler()
#input_data = min_max_scaler.fit_transform(input_data)
#print input_data rng = np.random.RandomState(42)
#clf = IsolationForest(max_samples=10*2, random_state=rng)
#clf = IsolationForest(max_features=5)
clf = IsolationForest(max_samples="auto", random_state=rng)
clf.fit(input_data)
pred_y = clf.predict(input_data) bad_domains = set()
for i,y in enumerate(pred_y):
if y == -1:
print "bad domains:", collected_data[i]
bad_domains.add(collected_data[i][0]) if __name__ == "__main__":
dat = get_data()
train(dat)

输出样例:

bad domains: ('openvpn.', [81.0, 5.0, 3.0, 14.0, 0.1728395061728395, 27.493827160493826, 32.76543209876543, 3.2857142857142856, 18.214285714285715, 3.0714285714285716, 3.255427209766844, 0.04938271604938271, 0.0, 0.3950617283950617, 0.12345679012345678, 0.00224517287831163])
bad domains: ('mobily.com.sa', [16.0, 1.0, 4.0, 12.0, 0.75, 47.3125, 108.8125, 1.0, 5.333333333333333, 0.0, 1.9166666666666667, 0.6875, 0.0, 0.375, 0.375, 0.0066050198150594455])
bad domains: ('vcl2728.com', [40.0, 2.0, 10.0, 27.0, 0.675, 67.125, 462.85, 3.3333333333333335, 28.555555555555557, 3.3703703703703702, 3.111111111111111, 0.025, 0.0, 0.0, 0.0, 0.00186219739292365])
bad domains: ('vkcache.com', [598.0, 1.0, 2.0, 528.0, 0.882943143812709, 47.0, 161.65886287625418, 1.0, 6.0, 0.005681818181818182, 2.453875312427234, 0.22909698996655517, 0.0, 0.11371237458193979, 0.0033444816053511705, 0.00017789795773144525])
bad domains: ('nsconcreteblock.info', [18.0, 2.0, 4.0, 18.0, 1.0, 87.0, 43.5, 1.0, 37.0, 5.0, 3.823329582775343, 1.0, 0.0, 0.0, 0.0, 0.0031928480204342275])
bad domains: ('topcdn.org', [52.0, 2.0, 4.0, 13.0, 0.25, 80.92307692307692, 56.38461538461539, 1.0, 40.92307692307692, 0.0, 4.176988788169356, 0.5, 0.0, 0.28846153846153844, 0.21153846153846154, 0.001188212927756654])
bad domains: ('bilibiligame.net', [6472.0, 165.0, 17.0, 32.0, 0.004944375772558714, 46.542954264524106, 88.28522867737948, 1.0, 18.65625, 2.84375, 3.4818361348887463, 0.9610630407911002, 0.0, 0.2376390605686032, 0.0004635352286773795, 1.659883277007961e-05])
bad domains: ('vip.', [2183.0, 386.0, 30.0, 32.0, 0.014658726523133303, 34.78515803939533, 23.834631241410904, 1.9375, 9.6875, 0.0, 2.83937270784057, 0.9436555199267064, 0.0, 0.09894640403114979, 0.011452130096197893, 6.58449220396123e-05])
bad domains: ('ixigua.com', [2707.0, 133.0, 29.0, 17.0, 0.006280014776505356, 33.71222755818249, 123.10749907646841, 1.0, 4.647058823529412, 0.8823529411764706, 1.9781718484300252, 0.9759881787957149, 0.0, 0.28075360177318065, 0.01699298115995567, 5.478911668986072e-05])
bad domains: ('expressvpn.', [890.0, 31.0, 36.0, 165.0, 0.1853932584269663, 41.89887640449438, 0.0, 1.0363636363636364, 11.224242424242425, 0.05454545454545454, 3.0592421535372565, 0.5325842696629214, 0.0, 0.0, 0.0, 0.00013408420488066506])

输入数据样例(已经提取了特征):

(u'abfxsc.com', (24, 1, 4, 11, 0.4583333333333333, 48.0, 56.041666666666664, 1.0, 8.0, 0.0, 3.0, 0.5, 0.0, 0.20833333333333334, 0.08333333333333333, 0.004340277777777778))
(u'dqdkws.cn', (71, 2, 7, 50, 0.704225352112676, 45.0, 79.859154929577471, 1.0, 6.0, 0.0, 2.4132632507067329, 0.5915492957746479, 0.0, 0.0, 0.0, 0.0015649452269170579))
(u'tcdnvod.com', (701, 51, 17, 40, 0.05706134094151213, 55.266761768901567, 56.370898716119832, 3.1749999999999998, 17.399999999999999, 0.125, 3.4810606143066232, 0.9714693295292439, 0.0, 0.39514978601997147, 0.0442225392296719, 0.00012905890248309329))
(u'0937jyg.com', (68, 4, 7, 19, 0.27941176470588236, 46.25, 67.529411764705884, 1.0, 5.3684210526315788, 0.0, 2.2469056830015672, 0.6323529411764706, 0.0, 0.0, 0.0, 0.001589825119236884))
(u'jcloud-cdn.com', (61, 3, 3, 11, 0.18032786885245902, 67.278688524590166, 66.311475409836063, 4.5454545454545459, 24.363636363636363, 0.18181818181818182, 3.5244668708659161, 0.4262295081967213, 0.0, 0.08196721311475409, 0.03278688524590164, 0.0012183235867446393))
(u'omacloud.com', (545, 8, 20, 29, 0.05321100917431193, 46.315596330275227, 30.722935779816513, 1.9655172413793103, 17.793103448275861, 0.0, 3.3836270422458083, 1.0, 0.0, 0.10825688073394496, 0.022018348623853212, 0.00019808256081134618))
(u'serverss.top', (144, 1, 15, 22, 0.1527777777777778, 46.604166666666664, 50.145833333333336, 1.0, 4.5909090909090908, 0.0, 2.1594720075625, 0.5277777777777778, 0.0, 0.2777777777777778, 0.06944444444444445, 0.00074504544777231408))
(u'ctripgslb.com', (601, 9, 10, 34, 0.056572379367720464, 60.512479201331118, 157.12479201331115, 3.0588235294117645, 17.911764705882351, 0.91176470588235292, 3.3912394967901913, 0.8585690515806988, 0.0, 0.3594009983361065, 0.016638935108153077, 0.00013748350197976243))
(u'kas-labs.com', (54, 2, 8, 15, 0.2777777777777778, 55.888888888888886, 142.37037037037038, 1.0, 12.466666666666667, 1.6000000000000001, 3.0989151803147923, 0.5, 0.0, 0.09259259259259259, 0.09259259259259259, 0.0016567263088137839))
(u'mccdnglb.com', (365, 4, 6, 21, 0.057534246575342465, 51.161643835616438, 98.161643835616445, 3.5238095238095237, 18.428571428571427, 0.19047619047619047, 3.4116298602195974, 0.989041095890411, 0.0, 0.16164383561643836, 0.01643835616438356, 0.00026775195458926852))
(u'localhost.', (28, 4, 3, 10, 0.35714285714285715, 41.142857142857146, 172.35714285714286, 1.8999999999999999, 10.9, 1.8999999999999999, 2.3999999999999999, 0.14285714285714285, 0.0, 0.0, 0.0, 0.004340277777777778))
(u'xdy-cdn.cn', (473, 5, 2, 50, 0.10570824524312897, 54.780126849894295, 46.545454545454547, 3.0, 14.74, 0.0, 3.1343677127142864, 0.5750528541226215, 0.0, 0.0, 0.0, 0.00019296823742811933))
(u'labkas.com', (24, 2, 6, 10, 0.4166666666666667, 56.666666666666664, 66.833333333333329, 2.0, 17.399999999999999, 1.7, 3.6751008468322333, 0.08333333333333333, 0.0, 0.0, 0.0, 0.0036764705882352941))
(u'site.', (62, 5, 22, 14, 0.22580645161290322, 43.322580645161288, 50.774193548387096, 1.9285714285714286, 11.785714285714286, 0.21428571428571427, 3.0365341332026929, 0.5806451612903226, 0.0, 0.11290322580645161, 0.06451612903225806, 0.0018615040953090098))
(u'ft25882.com', (39, 2, 5, 20, 0.5128205128205128, 49.0, 92.871794871794876, 1.0, 8.0, 0.0, 3.0, 0.5384615384615384, 0.0, 0.3076923076923077, 0.05128205128205128, 0.0026164311878597592))
(u'douyuyuba.com', (232, 4, 7, 115, 0.4956896551724138, 62.650862068965516, 97.504310344827587, 2.0, 21.530434782608694, 0.97391304347826091, 3.4599350912323117, 0.5560344827586207, 0.0, 0.25, 0.008620689655172414, 0.00034399724802201581))
(u'win.', (334, 7, 39, 23, 0.0688622754491018, 42.604790419161674, 60.008982035928142, 1.8695652173913044, 13.217391304347826, 0.21739130434782608, 2.9398183078690807, 0.7904191616766467, 0.0, 0.3772455089820359, 0.041916167664670656, 0.00035137034434293746))
(u'affise.com', (73, 3, 10, 10, 0.136986301369863, 49.246575342465754, 146.56164383561645, 1.0, 8.5, 0.0, 2.5368841208873407, 0.6027397260273972, 0.0, 0.273972602739726, 0.0547945205479452, 0.0013908205841446453))
(u'stripcdn.com', (46, 3, 8, 17, 0.3695652173913043, 44.043478260869563, 160.54347826086956, 1.0, 3.8823529411764706, 0.52941176470588236, 1.8718920798583554, 0.391304347826087, 0.0, 0.10869565217391304, 0.10869565217391304, 0.0024679170779861796))
(u'doonoo.cn', (198, 1, 8, 19, 0.09595959595959595, 42.111111111111114, 66.060606060606062, 1.0, 3.1052631578947367, 0.0, 1.6286506585399816, 0.5, 0.0, 0.2222222222222222, 0.025252525252525252, 0.00059966418805468941))
(u'nii.ac.jp', (34, 3, 8, 16, 0.47058823529411764, 43.029411764705884, 34.529411764705884, 1.3125, 7.3125, 0.1875, 2.4667777025215347, 0.4411764705882353, 0.0, 0.08823529411764706, 0.08823529411764706, 0.0034176349965823649))
(u'78dm.net', (41, 5, 6, 11, 0.2682926829268293, 39.146341463414636, 66.634146341463421, 1.0, 3.3636363636363638, 0.18181818181818182, 1.3510446035661767, 0.7317073170731707, 0.0, 0.3170731707317073, 0.04878048780487805, 0.0031152647975077881))
(u'gosuncdn.com', (587, 5, 36, 40, 0.06814310051107325, 53.325383304940374, 204.61328790459967, 3.25, 15.699999999999999, 0.0, 3.3370338393801235, 0.5724020442930153, 0.0, 0.09540034071550256, 0.010221465076660987, 0.00015973420228739378))
(u'gfnormal04aj.com', (68, 2, 2, 33, 0.4852941176470588, 62.0, 58.970588235294116, 1.0, 16.0, 0.0, 3.4444634232339926, 0.5147058823529411, 0.0, 0.25, 0.058823529411764705, 0.0011859582542694497))
(u'mediatoday.co.kr', (13, 1, 3, 12, 0.9230769230769231, 50.46153846153846, 100.61538461538461, 1.0, 4.583333333333333, 0.0, 1.7623953076615158, 1.0, 0.0, 0.23076923076923078, 0.23076923076923078, 0.007621951219512195))
(u'qinsx.cn', (127, 4, 8, 14, 0.11023622047244094, 29.811023622047244, 51.362204724409452, 1.0, 1.9285714285714286, 0.0, 0.9285714285714286, 0.5905511811023622, 0.0, 0.30708661417322836, 0.06299212598425197, 0.0013206550449022716))

参考:http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html#sklearn.ensemble.IsolationForest

使用isolation forest进行dns网络流量异常检测的更多相关文章

  1. 5-Spark高级数据分析-第五章 基于K均值聚类的网络流量异常检测

    据我们所知,有‘已知的已知’,有些事,我们知道我们知道:我们也知道,有 ‘已知的未知’,也就是说,有些事,我们现在知道我们不知道.但是,同样存在‘不知的不知’——有些事,我们不知道我们不知道. 上一章 ...

  2. 基于PySpark的网络服务异常检测系统 阶段总结(二)

    在上篇博文中介绍了网络服务异常检测的大概,本篇将详细介绍SVDD和Isolation Forest这两种算法 1. SVDD算法 SVDD的英文全称是Support Vector Data Descr ...

  3. alluxio网络流量异常分析【转】

    1. 介绍 2. 准备工作 2.1 tcpdump 2.2 winshark 2.3 安装iftop 2.4 alluxio网络通信相关的端口 3.iftop 锁定消耗流量最大的端口 4. dump数 ...

  4. activeMQ消费消息时网络流量异常大的问题

    http://www.cnblogs.com/baibaluo/archive/2012/12/24/2748468.html#2590289 公司有一个应用,多个线程从activeMQ中取消息,随着 ...

  5. 基于PySpark的网络服务异常检测系统 (四) Mysql与SparkSQL对接同步数据 kmeans算法计算预测异常

    基于Django Restframework和Spark的异常检测系统,数据库为MySQL.Redis, 消息队列为Celery,分析服务为Spark SQL和Spark Mllib,使用kmeans ...

  6. 网络KPI异常检测之时序分解算法

    时间序列数据伴随着我们的生活和工作.从牙牙学语时的“1, 2, 3, 4, 5, ……”到房价的走势变化,从金融领域的刷卡记录到运维领域的核心网性能指标.时间序列中的规律能加深我们对事物和场景的认识, ...

  7. Python机器学习笔记 异常点检测算法——Isolation Forest

    Isolation,意为孤立/隔离,是名词,其动词为isolate,forest是森林,合起来就是“孤立森林”了,也有叫“独异森林”,好像并没有统一的中文叫法.可能大家都习惯用其英文的名字isolat ...

  8. isolation forest进行异常点检测

    一.简介 孤立森林(Isolation Forest)是另外一种高效的异常检测算法,它和随机森林类似,但每次选择划分属性和划分点(值)时都是随机的,而不是根据信息增益或者基尼指数来选择.在建树过程中, ...

  9. (转)isolation forest进行异常点检测

    原文链接:https://www.cnblogs.com/gczr/p/9156971.html 一.简介 孤立森林(Isolation Forest)是另外一种高效的异常检测算法,它和随机森林类似, ...

随机推荐

  1. [AngularJS]Chapter 5 与服务器交互

    第八章有关于缓存的东西. [通过$http交互] 传统的AJAX请求如下 var xmlhttp = new XMLHttpRequest(); xmlhttp.onreadystatechange ...

  2. [ReactVR] Add Shapes Using 3D Primitives in React VR

    React VR ships with a handful of 3D primitives. We'll importprimitives like <Sphere/>, <Box ...

  3. ABAP FIELD-SYMBOLS 有大作用- 将没有可改參数的增强出口变得也能改主程序的值了

    看下图代码: report  z_xul_test2 中 定义了 全局变量 G_DATA1 , 分别调用了 z_xul_tes1 中的 form  和 function zbapi_test , 这两 ...

  4. 黑马day14 过滤器概述&amp;生命周期&amp;运行过程

    过滤器:当訪问一个web资源的时候,过滤器就在你訪问这个web资源的前进行拦截...在放行过后...filter过滤器也能够做一些其它的事情. 编写过滤器的步骤: 1.写一个过滤器类实现filter接 ...

  5. Oracle 数据泵使用详解--精华版

    数据泵使用EXPDP和IMPDP时应该注意的事项: EXP和IMP是客户端工具程序,它们既可以在客户端使用,也可以在服务端使用. EXPDP和IMPDP是服务端的工具程序,他们只能在ORACLE服务端 ...

  6. 图论之tarjan缩点

    缩点,就是把一张有向有环图中的环缩成一个个点,形成一个有向无环图. 首先我介绍一下为什么这题要缩点(有人肯定觉得这是放屁,这不就是缩点的模板题吗?但我们不能这么想,考试的时候不会有人告诉你打什么板上去 ...

  7. inline元素和inline-block元素的4px空白间距解决方案

    实在不想写了,要吐了,看到一篇讲的比较全的文章,直接粘链接了 inline元素和inline-block元素的4px空白间距解决方案 出自脚本之家

  8. HDU 1010 Tempter of the Bone【DFS】

    学习剪枝的第一篇@_@学习别人的剪枝,一剪就是两天@_@---- 参看的这篇--http://blog.csdn.net/libin56842/article/details/8962512自己的小体 ...

  9. SQL之子查询

    子查询概念:把一个查询的结果在另一个查询中使用就叫做子查询 1.子查询作为条件时 当我们使用子查询作为条件时,若子查询返回值为多个,则会报以下错误: "子查询返回的值不止一个.当子查询跟随在 ...

  10. ActiveMQ学习笔记(11)----ActiveMQ的动态网络连接

    1. 多播协议multicast ActiveMQ使用Multicast协议将一个Service和其他的Broker是我Service里连接起来.IP Multicast是一个被用于网络中传输数据到其 ...