代码如下,测试发现,是否对输入数据进行归一化/标准化对于结果没有影响:

import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler def parse_line(s):
s = s.replace("u'", "").replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", "")
s2 = s.split(",")
dat = [float(_) for _ in s2[1:]]
return (s2[0], dat) def get_data():
with open("feature.dat") as f:
lines = f.readlines()
return [parse_line(line) for line in lines] def train(collected_data):
input_data = [c[1] for c in collected_data]
#scaler = StandardScaler().fit(input_data)
#input_data = scaler.transform(input_data) #min_max_scaler = MinMaxScaler()
#input_data = min_max_scaler.fit_transform(input_data)
#print input_data rng = np.random.RandomState(42)
#clf = IsolationForest(max_samples=10*2, random_state=rng)
#clf = IsolationForest(max_features=5)
clf = IsolationForest(max_samples="auto", random_state=rng)
clf.fit(input_data)
pred_y = clf.predict(input_data) bad_domains = set()
for i,y in enumerate(pred_y):
if y == -1:
print "bad domains:", collected_data[i]
bad_domains.add(collected_data[i][0]) if __name__ == "__main__":
dat = get_data()
train(dat)

输出样例:

bad domains: ('openvpn.', [81.0, 5.0, 3.0, 14.0, 0.1728395061728395, 27.493827160493826, 32.76543209876543, 3.2857142857142856, 18.214285714285715, 3.0714285714285716, 3.255427209766844, 0.04938271604938271, 0.0, 0.3950617283950617, 0.12345679012345678, 0.00224517287831163])
bad domains: ('mobily.com.sa', [16.0, 1.0, 4.0, 12.0, 0.75, 47.3125, 108.8125, 1.0, 5.333333333333333, 0.0, 1.9166666666666667, 0.6875, 0.0, 0.375, 0.375, 0.0066050198150594455])
bad domains: ('vcl2728.com', [40.0, 2.0, 10.0, 27.0, 0.675, 67.125, 462.85, 3.3333333333333335, 28.555555555555557, 3.3703703703703702, 3.111111111111111, 0.025, 0.0, 0.0, 0.0, 0.00186219739292365])
bad domains: ('vkcache.com', [598.0, 1.0, 2.0, 528.0, 0.882943143812709, 47.0, 161.65886287625418, 1.0, 6.0, 0.005681818181818182, 2.453875312427234, 0.22909698996655517, 0.0, 0.11371237458193979, 0.0033444816053511705, 0.00017789795773144525])
bad domains: ('nsconcreteblock.info', [18.0, 2.0, 4.0, 18.0, 1.0, 87.0, 43.5, 1.0, 37.0, 5.0, 3.823329582775343, 1.0, 0.0, 0.0, 0.0, 0.0031928480204342275])
bad domains: ('topcdn.org', [52.0, 2.0, 4.0, 13.0, 0.25, 80.92307692307692, 56.38461538461539, 1.0, 40.92307692307692, 0.0, 4.176988788169356, 0.5, 0.0, 0.28846153846153844, 0.21153846153846154, 0.001188212927756654])
bad domains: ('bilibiligame.net', [6472.0, 165.0, 17.0, 32.0, 0.004944375772558714, 46.542954264524106, 88.28522867737948, 1.0, 18.65625, 2.84375, 3.4818361348887463, 0.9610630407911002, 0.0, 0.2376390605686032, 0.0004635352286773795, 1.659883277007961e-05])
bad domains: ('vip.', [2183.0, 386.0, 30.0, 32.0, 0.014658726523133303, 34.78515803939533, 23.834631241410904, 1.9375, 9.6875, 0.0, 2.83937270784057, 0.9436555199267064, 0.0, 0.09894640403114979, 0.011452130096197893, 6.58449220396123e-05])
bad domains: ('ixigua.com', [2707.0, 133.0, 29.0, 17.0, 0.006280014776505356, 33.71222755818249, 123.10749907646841, 1.0, 4.647058823529412, 0.8823529411764706, 1.9781718484300252, 0.9759881787957149, 0.0, 0.28075360177318065, 0.01699298115995567, 5.478911668986072e-05])
bad domains: ('expressvpn.', [890.0, 31.0, 36.0, 165.0, 0.1853932584269663, 41.89887640449438, 0.0, 1.0363636363636364, 11.224242424242425, 0.05454545454545454, 3.0592421535372565, 0.5325842696629214, 0.0, 0.0, 0.0, 0.00013408420488066506])

输入数据样例(已经提取了特征):

(u'abfxsc.com', (24, 1, 4, 11, 0.4583333333333333, 48.0, 56.041666666666664, 1.0, 8.0, 0.0, 3.0, 0.5, 0.0, 0.20833333333333334, 0.08333333333333333, 0.004340277777777778))
(u'dqdkws.cn', (71, 2, 7, 50, 0.704225352112676, 45.0, 79.859154929577471, 1.0, 6.0, 0.0, 2.4132632507067329, 0.5915492957746479, 0.0, 0.0, 0.0, 0.0015649452269170579))
(u'tcdnvod.com', (701, 51, 17, 40, 0.05706134094151213, 55.266761768901567, 56.370898716119832, 3.1749999999999998, 17.399999999999999, 0.125, 3.4810606143066232, 0.9714693295292439, 0.0, 0.39514978601997147, 0.0442225392296719, 0.00012905890248309329))
(u'0937jyg.com', (68, 4, 7, 19, 0.27941176470588236, 46.25, 67.529411764705884, 1.0, 5.3684210526315788, 0.0, 2.2469056830015672, 0.6323529411764706, 0.0, 0.0, 0.0, 0.001589825119236884))
(u'jcloud-cdn.com', (61, 3, 3, 11, 0.18032786885245902, 67.278688524590166, 66.311475409836063, 4.5454545454545459, 24.363636363636363, 0.18181818181818182, 3.5244668708659161, 0.4262295081967213, 0.0, 0.08196721311475409, 0.03278688524590164, 0.0012183235867446393))
(u'omacloud.com', (545, 8, 20, 29, 0.05321100917431193, 46.315596330275227, 30.722935779816513, 1.9655172413793103, 17.793103448275861, 0.0, 3.3836270422458083, 1.0, 0.0, 0.10825688073394496, 0.022018348623853212, 0.00019808256081134618))
(u'serverss.top', (144, 1, 15, 22, 0.1527777777777778, 46.604166666666664, 50.145833333333336, 1.0, 4.5909090909090908, 0.0, 2.1594720075625, 0.5277777777777778, 0.0, 0.2777777777777778, 0.06944444444444445, 0.00074504544777231408))
(u'ctripgslb.com', (601, 9, 10, 34, 0.056572379367720464, 60.512479201331118, 157.12479201331115, 3.0588235294117645, 17.911764705882351, 0.91176470588235292, 3.3912394967901913, 0.8585690515806988, 0.0, 0.3594009983361065, 0.016638935108153077, 0.00013748350197976243))
(u'kas-labs.com', (54, 2, 8, 15, 0.2777777777777778, 55.888888888888886, 142.37037037037038, 1.0, 12.466666666666667, 1.6000000000000001, 3.0989151803147923, 0.5, 0.0, 0.09259259259259259, 0.09259259259259259, 0.0016567263088137839))
(u'mccdnglb.com', (365, 4, 6, 21, 0.057534246575342465, 51.161643835616438, 98.161643835616445, 3.5238095238095237, 18.428571428571427, 0.19047619047619047, 3.4116298602195974, 0.989041095890411, 0.0, 0.16164383561643836, 0.01643835616438356, 0.00026775195458926852))
(u'localhost.', (28, 4, 3, 10, 0.35714285714285715, 41.142857142857146, 172.35714285714286, 1.8999999999999999, 10.9, 1.8999999999999999, 2.3999999999999999, 0.14285714285714285, 0.0, 0.0, 0.0, 0.004340277777777778))
(u'xdy-cdn.cn', (473, 5, 2, 50, 0.10570824524312897, 54.780126849894295, 46.545454545454547, 3.0, 14.74, 0.0, 3.1343677127142864, 0.5750528541226215, 0.0, 0.0, 0.0, 0.00019296823742811933))
(u'labkas.com', (24, 2, 6, 10, 0.4166666666666667, 56.666666666666664, 66.833333333333329, 2.0, 17.399999999999999, 1.7, 3.6751008468322333, 0.08333333333333333, 0.0, 0.0, 0.0, 0.0036764705882352941))
(u'site.', (62, 5, 22, 14, 0.22580645161290322, 43.322580645161288, 50.774193548387096, 1.9285714285714286, 11.785714285714286, 0.21428571428571427, 3.0365341332026929, 0.5806451612903226, 0.0, 0.11290322580645161, 0.06451612903225806, 0.0018615040953090098))
(u'ft25882.com', (39, 2, 5, 20, 0.5128205128205128, 49.0, 92.871794871794876, 1.0, 8.0, 0.0, 3.0, 0.5384615384615384, 0.0, 0.3076923076923077, 0.05128205128205128, 0.0026164311878597592))
(u'douyuyuba.com', (232, 4, 7, 115, 0.4956896551724138, 62.650862068965516, 97.504310344827587, 2.0, 21.530434782608694, 0.97391304347826091, 3.4599350912323117, 0.5560344827586207, 0.0, 0.25, 0.008620689655172414, 0.00034399724802201581))
(u'win.', (334, 7, 39, 23, 0.0688622754491018, 42.604790419161674, 60.008982035928142, 1.8695652173913044, 13.217391304347826, 0.21739130434782608, 2.9398183078690807, 0.7904191616766467, 0.0, 0.3772455089820359, 0.041916167664670656, 0.00035137034434293746))
(u'affise.com', (73, 3, 10, 10, 0.136986301369863, 49.246575342465754, 146.56164383561645, 1.0, 8.5, 0.0, 2.5368841208873407, 0.6027397260273972, 0.0, 0.273972602739726, 0.0547945205479452, 0.0013908205841446453))
(u'stripcdn.com', (46, 3, 8, 17, 0.3695652173913043, 44.043478260869563, 160.54347826086956, 1.0, 3.8823529411764706, 0.52941176470588236, 1.8718920798583554, 0.391304347826087, 0.0, 0.10869565217391304, 0.10869565217391304, 0.0024679170779861796))
(u'doonoo.cn', (198, 1, 8, 19, 0.09595959595959595, 42.111111111111114, 66.060606060606062, 1.0, 3.1052631578947367, 0.0, 1.6286506585399816, 0.5, 0.0, 0.2222222222222222, 0.025252525252525252, 0.00059966418805468941))
(u'nii.ac.jp', (34, 3, 8, 16, 0.47058823529411764, 43.029411764705884, 34.529411764705884, 1.3125, 7.3125, 0.1875, 2.4667777025215347, 0.4411764705882353, 0.0, 0.08823529411764706, 0.08823529411764706, 0.0034176349965823649))
(u'78dm.net', (41, 5, 6, 11, 0.2682926829268293, 39.146341463414636, 66.634146341463421, 1.0, 3.3636363636363638, 0.18181818181818182, 1.3510446035661767, 0.7317073170731707, 0.0, 0.3170731707317073, 0.04878048780487805, 0.0031152647975077881))
(u'gosuncdn.com', (587, 5, 36, 40, 0.06814310051107325, 53.325383304940374, 204.61328790459967, 3.25, 15.699999999999999, 0.0, 3.3370338393801235, 0.5724020442930153, 0.0, 0.09540034071550256, 0.010221465076660987, 0.00015973420228739378))
(u'gfnormal04aj.com', (68, 2, 2, 33, 0.4852941176470588, 62.0, 58.970588235294116, 1.0, 16.0, 0.0, 3.4444634232339926, 0.5147058823529411, 0.0, 0.25, 0.058823529411764705, 0.0011859582542694497))
(u'mediatoday.co.kr', (13, 1, 3, 12, 0.9230769230769231, 50.46153846153846, 100.61538461538461, 1.0, 4.583333333333333, 0.0, 1.7623953076615158, 1.0, 0.0, 0.23076923076923078, 0.23076923076923078, 0.007621951219512195))
(u'qinsx.cn', (127, 4, 8, 14, 0.11023622047244094, 29.811023622047244, 51.362204724409452, 1.0, 1.9285714285714286, 0.0, 0.9285714285714286, 0.5905511811023622, 0.0, 0.30708661417322836, 0.06299212598425197, 0.0013206550449022716))

参考:http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html#sklearn.ensemble.IsolationForest

使用isolation forest进行dns网络流量异常检测的更多相关文章

  1. 5-Spark高级数据分析-第五章 基于K均值聚类的网络流量异常检测

    据我们所知,有‘已知的已知’,有些事,我们知道我们知道:我们也知道,有 ‘已知的未知’,也就是说,有些事,我们现在知道我们不知道.但是,同样存在‘不知的不知’——有些事,我们不知道我们不知道. 上一章 ...

  2. 基于PySpark的网络服务异常检测系统 阶段总结(二)

    在上篇博文中介绍了网络服务异常检测的大概,本篇将详细介绍SVDD和Isolation Forest这两种算法 1. SVDD算法 SVDD的英文全称是Support Vector Data Descr ...

  3. alluxio网络流量异常分析【转】

    1. 介绍 2. 准备工作 2.1 tcpdump 2.2 winshark 2.3 安装iftop 2.4 alluxio网络通信相关的端口 3.iftop 锁定消耗流量最大的端口 4. dump数 ...

  4. activeMQ消费消息时网络流量异常大的问题

    http://www.cnblogs.com/baibaluo/archive/2012/12/24/2748468.html#2590289 公司有一个应用,多个线程从activeMQ中取消息,随着 ...

  5. 基于PySpark的网络服务异常检测系统 (四) Mysql与SparkSQL对接同步数据 kmeans算法计算预测异常

    基于Django Restframework和Spark的异常检测系统,数据库为MySQL.Redis, 消息队列为Celery,分析服务为Spark SQL和Spark Mllib,使用kmeans ...

  6. 网络KPI异常检测之时序分解算法

    时间序列数据伴随着我们的生活和工作.从牙牙学语时的“1, 2, 3, 4, 5, ……”到房价的走势变化,从金融领域的刷卡记录到运维领域的核心网性能指标.时间序列中的规律能加深我们对事物和场景的认识, ...

  7. Python机器学习笔记 异常点检测算法——Isolation Forest

    Isolation,意为孤立/隔离,是名词,其动词为isolate,forest是森林,合起来就是“孤立森林”了,也有叫“独异森林”,好像并没有统一的中文叫法.可能大家都习惯用其英文的名字isolat ...

  8. isolation forest进行异常点检测

    一.简介 孤立森林(Isolation Forest)是另外一种高效的异常检测算法,它和随机森林类似,但每次选择划分属性和划分点(值)时都是随机的,而不是根据信息增益或者基尼指数来选择.在建树过程中, ...

  9. (转)isolation forest进行异常点检测

    原文链接:https://www.cnblogs.com/gczr/p/9156971.html 一.简介 孤立森林(Isolation Forest)是另外一种高效的异常检测算法,它和随机森林类似, ...

随机推荐

  1. C#-C#6.0新特性

    来自为知笔记(Wiz)

  2. cogs 466. [NOIP2009] 细胞分裂

    466. [NOIP2009] 细胞分裂 ★★   输入文件:cell.in   输出文件:cell.out   简单对比时间限制:1 s   内存限制:128 MB [问题描述]    Hanks ...

  3. jquery让 readOnly失效的方法

    re.attr("readOnly","true"); re.attr("readOnly",false); 注意 :false不能带引号

  4. HDU Train Problem I (STL_栈)

    Problem Description As the new term comes, the Ignatius Train Station is very busy nowadays. A lot o ...

  5. Azure RBAC(Roles Based Access Control)正式上线了

    期盼已久的Azure RBAC(Roles Based Access Control)正式上线了. 在非常多情况下.客户须要对各种类型的用户加以区分,以便做出适当的授权决定.基于角色的訪问控制 (RB ...

  6. bzoj3295: [Cqoi2011]动态逆序对(cdq分治+树状数组)

    3295: [Cqoi2011]动态逆序对 题目:传送门 题解: 刚学完cdq分治,想起来之前有一道是树套树的题目可以用cdq分治来做...尝试一波 还是太弱了...想到了要做两次cdq...然后伏地 ...

  7. Making ViewState More Secure

    Unencrypted view state in ASP.NET 2.0 could leak sensitive information https://www.rapid7.com/db/vul ...

  8. iOS:简单使用UIAlertVIew和UIActionSheet

    做iOS开发的同学想必都用过UIAlertVIew或者UIActionSheet.UIAlertVIew 可以弹出一个出现在屏幕中间的提示视图,给用户展示信息,并让用户自己选择操作,UIActionS ...

  9. 【英雄会】微软题目:几个bing

    今天是元旦,开篇先祝福大家在新的一年心想事成,工作顺利,开心生活每一天 . 看到[英雄会]上出现了微软出的题目:几个bing,题目内容如下: 本届大赛由微软必应词典冠名,必应词典(Bing Dicti ...

  10. Devexpress控件使用一:GridControl

    1.控件及列表展示 1).控件 2).构建表格,用于列表展示 3).gridControl绑定数据 4).调用绑定:BindDataSource(InitDt()); 5).展示列表 2.表格的列配置 ...