代码如下,测试发现,是否对输入数据进行归一化/标准化对于结果没有影响:

import numpy as np
from sklearn.ensemble import IsolationForest
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler def parse_line(s):
s = s.replace("u'", "").replace("'", "").replace("(", "").replace(")", "").replace("[", "").replace("]", "")
s2 = s.split(",")
dat = [float(_) for _ in s2[1:]]
return (s2[0], dat) def get_data():
with open("feature.dat") as f:
lines = f.readlines()
return [parse_line(line) for line in lines] def train(collected_data):
input_data = [c[1] for c in collected_data]
#scaler = StandardScaler().fit(input_data)
#input_data = scaler.transform(input_data) #min_max_scaler = MinMaxScaler()
#input_data = min_max_scaler.fit_transform(input_data)
#print input_data rng = np.random.RandomState(42)
#clf = IsolationForest(max_samples=10*2, random_state=rng)
#clf = IsolationForest(max_features=5)
clf = IsolationForest(max_samples="auto", random_state=rng)
clf.fit(input_data)
pred_y = clf.predict(input_data) bad_domains = set()
for i,y in enumerate(pred_y):
if y == -1:
print "bad domains:", collected_data[i]
bad_domains.add(collected_data[i][0]) if __name__ == "__main__":
dat = get_data()
train(dat)

输出样例:

bad domains: ('openvpn.', [81.0, 5.0, 3.0, 14.0, 0.1728395061728395, 27.493827160493826, 32.76543209876543, 3.2857142857142856, 18.214285714285715, 3.0714285714285716, 3.255427209766844, 0.04938271604938271, 0.0, 0.3950617283950617, 0.12345679012345678, 0.00224517287831163])
bad domains: ('mobily.com.sa', [16.0, 1.0, 4.0, 12.0, 0.75, 47.3125, 108.8125, 1.0, 5.333333333333333, 0.0, 1.9166666666666667, 0.6875, 0.0, 0.375, 0.375, 0.0066050198150594455])
bad domains: ('vcl2728.com', [40.0, 2.0, 10.0, 27.0, 0.675, 67.125, 462.85, 3.3333333333333335, 28.555555555555557, 3.3703703703703702, 3.111111111111111, 0.025, 0.0, 0.0, 0.0, 0.00186219739292365])
bad domains: ('vkcache.com', [598.0, 1.0, 2.0, 528.0, 0.882943143812709, 47.0, 161.65886287625418, 1.0, 6.0, 0.005681818181818182, 2.453875312427234, 0.22909698996655517, 0.0, 0.11371237458193979, 0.0033444816053511705, 0.00017789795773144525])
bad domains: ('nsconcreteblock.info', [18.0, 2.0, 4.0, 18.0, 1.0, 87.0, 43.5, 1.0, 37.0, 5.0, 3.823329582775343, 1.0, 0.0, 0.0, 0.0, 0.0031928480204342275])
bad domains: ('topcdn.org', [52.0, 2.0, 4.0, 13.0, 0.25, 80.92307692307692, 56.38461538461539, 1.0, 40.92307692307692, 0.0, 4.176988788169356, 0.5, 0.0, 0.28846153846153844, 0.21153846153846154, 0.001188212927756654])
bad domains: ('bilibiligame.net', [6472.0, 165.0, 17.0, 32.0, 0.004944375772558714, 46.542954264524106, 88.28522867737948, 1.0, 18.65625, 2.84375, 3.4818361348887463, 0.9610630407911002, 0.0, 0.2376390605686032, 0.0004635352286773795, 1.659883277007961e-05])
bad domains: ('vip.', [2183.0, 386.0, 30.0, 32.0, 0.014658726523133303, 34.78515803939533, 23.834631241410904, 1.9375, 9.6875, 0.0, 2.83937270784057, 0.9436555199267064, 0.0, 0.09894640403114979, 0.011452130096197893, 6.58449220396123e-05])
bad domains: ('ixigua.com', [2707.0, 133.0, 29.0, 17.0, 0.006280014776505356, 33.71222755818249, 123.10749907646841, 1.0, 4.647058823529412, 0.8823529411764706, 1.9781718484300252, 0.9759881787957149, 0.0, 0.28075360177318065, 0.01699298115995567, 5.478911668986072e-05])
bad domains: ('expressvpn.', [890.0, 31.0, 36.0, 165.0, 0.1853932584269663, 41.89887640449438, 0.0, 1.0363636363636364, 11.224242424242425, 0.05454545454545454, 3.0592421535372565, 0.5325842696629214, 0.0, 0.0, 0.0, 0.00013408420488066506])

输入数据样例(已经提取了特征):

(u'abfxsc.com', (24, 1, 4, 11, 0.4583333333333333, 48.0, 56.041666666666664, 1.0, 8.0, 0.0, 3.0, 0.5, 0.0, 0.20833333333333334, 0.08333333333333333, 0.004340277777777778))
(u'dqdkws.cn', (71, 2, 7, 50, 0.704225352112676, 45.0, 79.859154929577471, 1.0, 6.0, 0.0, 2.4132632507067329, 0.5915492957746479, 0.0, 0.0, 0.0, 0.0015649452269170579))
(u'tcdnvod.com', (701, 51, 17, 40, 0.05706134094151213, 55.266761768901567, 56.370898716119832, 3.1749999999999998, 17.399999999999999, 0.125, 3.4810606143066232, 0.9714693295292439, 0.0, 0.39514978601997147, 0.0442225392296719, 0.00012905890248309329))
(u'0937jyg.com', (68, 4, 7, 19, 0.27941176470588236, 46.25, 67.529411764705884, 1.0, 5.3684210526315788, 0.0, 2.2469056830015672, 0.6323529411764706, 0.0, 0.0, 0.0, 0.001589825119236884))
(u'jcloud-cdn.com', (61, 3, 3, 11, 0.18032786885245902, 67.278688524590166, 66.311475409836063, 4.5454545454545459, 24.363636363636363, 0.18181818181818182, 3.5244668708659161, 0.4262295081967213, 0.0, 0.08196721311475409, 0.03278688524590164, 0.0012183235867446393))
(u'omacloud.com', (545, 8, 20, 29, 0.05321100917431193, 46.315596330275227, 30.722935779816513, 1.9655172413793103, 17.793103448275861, 0.0, 3.3836270422458083, 1.0, 0.0, 0.10825688073394496, 0.022018348623853212, 0.00019808256081134618))
(u'serverss.top', (144, 1, 15, 22, 0.1527777777777778, 46.604166666666664, 50.145833333333336, 1.0, 4.5909090909090908, 0.0, 2.1594720075625, 0.5277777777777778, 0.0, 0.2777777777777778, 0.06944444444444445, 0.00074504544777231408))
(u'ctripgslb.com', (601, 9, 10, 34, 0.056572379367720464, 60.512479201331118, 157.12479201331115, 3.0588235294117645, 17.911764705882351, 0.91176470588235292, 3.3912394967901913, 0.8585690515806988, 0.0, 0.3594009983361065, 0.016638935108153077, 0.00013748350197976243))
(u'kas-labs.com', (54, 2, 8, 15, 0.2777777777777778, 55.888888888888886, 142.37037037037038, 1.0, 12.466666666666667, 1.6000000000000001, 3.0989151803147923, 0.5, 0.0, 0.09259259259259259, 0.09259259259259259, 0.0016567263088137839))
(u'mccdnglb.com', (365, 4, 6, 21, 0.057534246575342465, 51.161643835616438, 98.161643835616445, 3.5238095238095237, 18.428571428571427, 0.19047619047619047, 3.4116298602195974, 0.989041095890411, 0.0, 0.16164383561643836, 0.01643835616438356, 0.00026775195458926852))
(u'localhost.', (28, 4, 3, 10, 0.35714285714285715, 41.142857142857146, 172.35714285714286, 1.8999999999999999, 10.9, 1.8999999999999999, 2.3999999999999999, 0.14285714285714285, 0.0, 0.0, 0.0, 0.004340277777777778))
(u'xdy-cdn.cn', (473, 5, 2, 50, 0.10570824524312897, 54.780126849894295, 46.545454545454547, 3.0, 14.74, 0.0, 3.1343677127142864, 0.5750528541226215, 0.0, 0.0, 0.0, 0.00019296823742811933))
(u'labkas.com', (24, 2, 6, 10, 0.4166666666666667, 56.666666666666664, 66.833333333333329, 2.0, 17.399999999999999, 1.7, 3.6751008468322333, 0.08333333333333333, 0.0, 0.0, 0.0, 0.0036764705882352941))
(u'site.', (62, 5, 22, 14, 0.22580645161290322, 43.322580645161288, 50.774193548387096, 1.9285714285714286, 11.785714285714286, 0.21428571428571427, 3.0365341332026929, 0.5806451612903226, 0.0, 0.11290322580645161, 0.06451612903225806, 0.0018615040953090098))
(u'ft25882.com', (39, 2, 5, 20, 0.5128205128205128, 49.0, 92.871794871794876, 1.0, 8.0, 0.0, 3.0, 0.5384615384615384, 0.0, 0.3076923076923077, 0.05128205128205128, 0.0026164311878597592))
(u'douyuyuba.com', (232, 4, 7, 115, 0.4956896551724138, 62.650862068965516, 97.504310344827587, 2.0, 21.530434782608694, 0.97391304347826091, 3.4599350912323117, 0.5560344827586207, 0.0, 0.25, 0.008620689655172414, 0.00034399724802201581))
(u'win.', (334, 7, 39, 23, 0.0688622754491018, 42.604790419161674, 60.008982035928142, 1.8695652173913044, 13.217391304347826, 0.21739130434782608, 2.9398183078690807, 0.7904191616766467, 0.0, 0.3772455089820359, 0.041916167664670656, 0.00035137034434293746))
(u'affise.com', (73, 3, 10, 10, 0.136986301369863, 49.246575342465754, 146.56164383561645, 1.0, 8.5, 0.0, 2.5368841208873407, 0.6027397260273972, 0.0, 0.273972602739726, 0.0547945205479452, 0.0013908205841446453))
(u'stripcdn.com', (46, 3, 8, 17, 0.3695652173913043, 44.043478260869563, 160.54347826086956, 1.0, 3.8823529411764706, 0.52941176470588236, 1.8718920798583554, 0.391304347826087, 0.0, 0.10869565217391304, 0.10869565217391304, 0.0024679170779861796))
(u'doonoo.cn', (198, 1, 8, 19, 0.09595959595959595, 42.111111111111114, 66.060606060606062, 1.0, 3.1052631578947367, 0.0, 1.6286506585399816, 0.5, 0.0, 0.2222222222222222, 0.025252525252525252, 0.00059966418805468941))
(u'nii.ac.jp', (34, 3, 8, 16, 0.47058823529411764, 43.029411764705884, 34.529411764705884, 1.3125, 7.3125, 0.1875, 2.4667777025215347, 0.4411764705882353, 0.0, 0.08823529411764706, 0.08823529411764706, 0.0034176349965823649))
(u'78dm.net', (41, 5, 6, 11, 0.2682926829268293, 39.146341463414636, 66.634146341463421, 1.0, 3.3636363636363638, 0.18181818181818182, 1.3510446035661767, 0.7317073170731707, 0.0, 0.3170731707317073, 0.04878048780487805, 0.0031152647975077881))
(u'gosuncdn.com', (587, 5, 36, 40, 0.06814310051107325, 53.325383304940374, 204.61328790459967, 3.25, 15.699999999999999, 0.0, 3.3370338393801235, 0.5724020442930153, 0.0, 0.09540034071550256, 0.010221465076660987, 0.00015973420228739378))
(u'gfnormal04aj.com', (68, 2, 2, 33, 0.4852941176470588, 62.0, 58.970588235294116, 1.0, 16.0, 0.0, 3.4444634232339926, 0.5147058823529411, 0.0, 0.25, 0.058823529411764705, 0.0011859582542694497))
(u'mediatoday.co.kr', (13, 1, 3, 12, 0.9230769230769231, 50.46153846153846, 100.61538461538461, 1.0, 4.583333333333333, 0.0, 1.7623953076615158, 1.0, 0.0, 0.23076923076923078, 0.23076923076923078, 0.007621951219512195))
(u'qinsx.cn', (127, 4, 8, 14, 0.11023622047244094, 29.811023622047244, 51.362204724409452, 1.0, 1.9285714285714286, 0.0, 0.9285714285714286, 0.5905511811023622, 0.0, 0.30708661417322836, 0.06299212598425197, 0.0013206550449022716))

参考:http://scikit-learn.org/stable/modules/generated/sklearn.ensemble.IsolationForest.html#sklearn.ensemble.IsolationForest

使用isolation forest进行dns网络流量异常检测的更多相关文章

  1. 5-Spark高级数据分析-第五章 基于K均值聚类的网络流量异常检测

    据我们所知,有‘已知的已知’,有些事,我们知道我们知道:我们也知道,有 ‘已知的未知’,也就是说,有些事,我们现在知道我们不知道.但是,同样存在‘不知的不知’——有些事,我们不知道我们不知道. 上一章 ...

  2. 基于PySpark的网络服务异常检测系统 阶段总结(二)

    在上篇博文中介绍了网络服务异常检测的大概,本篇将详细介绍SVDD和Isolation Forest这两种算法 1. SVDD算法 SVDD的英文全称是Support Vector Data Descr ...

  3. alluxio网络流量异常分析【转】

    1. 介绍 2. 准备工作 2.1 tcpdump 2.2 winshark 2.3 安装iftop 2.4 alluxio网络通信相关的端口 3.iftop 锁定消耗流量最大的端口 4. dump数 ...

  4. activeMQ消费消息时网络流量异常大的问题

    http://www.cnblogs.com/baibaluo/archive/2012/12/24/2748468.html#2590289 公司有一个应用,多个线程从activeMQ中取消息,随着 ...

  5. 基于PySpark的网络服务异常检测系统 (四) Mysql与SparkSQL对接同步数据 kmeans算法计算预测异常

    基于Django Restframework和Spark的异常检测系统,数据库为MySQL.Redis, 消息队列为Celery,分析服务为Spark SQL和Spark Mllib,使用kmeans ...

  6. 网络KPI异常检测之时序分解算法

    时间序列数据伴随着我们的生活和工作.从牙牙学语时的“1, 2, 3, 4, 5, ……”到房价的走势变化,从金融领域的刷卡记录到运维领域的核心网性能指标.时间序列中的规律能加深我们对事物和场景的认识, ...

  7. Python机器学习笔记 异常点检测算法——Isolation Forest

    Isolation,意为孤立/隔离,是名词,其动词为isolate,forest是森林,合起来就是“孤立森林”了,也有叫“独异森林”,好像并没有统一的中文叫法.可能大家都习惯用其英文的名字isolat ...

  8. isolation forest进行异常点检测

    一.简介 孤立森林(Isolation Forest)是另外一种高效的异常检测算法,它和随机森林类似,但每次选择划分属性和划分点(值)时都是随机的,而不是根据信息增益或者基尼指数来选择.在建树过程中, ...

  9. (转)isolation forest进行异常点检测

    原文链接:https://www.cnblogs.com/gczr/p/9156971.html 一.简介 孤立森林(Isolation Forest)是另外一种高效的异常检测算法,它和随机森林类似, ...

随机推荐

  1. Android自己定义百度地图缩放图标

    自己定义实现Android百度地图的缩放图标,须要自己定义一个缩放控件,实现效果例如以下: 这里的缩放效果,实现了点击button能够对地图的放大缩小,通过手势放大与缩小也控制缩放图标的可用状态.详细 ...

  2. HDU 4331 Contest 4

    一个很直观的想法是,求出每个点上下左右能到达的最大长度.然后枚举其斜边...没想到过了.... 当然,题解有一个很巧妙的优化,利用树状数组,那个太巧妙了. #include<iostream&g ...

  3. 怎样在Nginxserver中启用Gzip压缩

    原文链接: Enable GZIP Compression on nginx Servers原文日期: 2014年7月16日翻译日期: 2014年7月19日翻译人员: 铁锚 速度决定一切,没有什么比一 ...

  4. (转)<![CDATA[]]>和转义字符

    被<![CDATA[]]>这个标记所包含的内容将表示为纯文本,比如<![CDATA[<]]>表示文本内容“<”. 此标记用于xml文档中,我们先来看看使用转义符的情 ...

  5. 虚拟化技术对照:Xen vs KVM

    恒天云:http://www.hengtianyun.com/download-show-id-68.html 一.说明 本文主要从功能方面和性能方面对Xen和KVM对照分析,分析出其优缺点指导我们恒 ...

  6. vue 路由demo

    <!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8&quo ...

  7. iOS七种手势

    // 初始化一个UIimageView UIImageView *imageView = [[UIImageView alloc]initWithFrame:CGRectMake(100, 100, ...

  8. su和sudo的区别与使用,su命令,linux命令

    su和sudo的区别与使用 一.   使用 su 命令临时切换用户身份 1. su 的适用条件和威力 su命令就是切换用户 的工具,怎么理解呢?比如我们以普通用户beinan登录的,但要添加用户任务, ...

  9. WPF学习(一) - XAML

    Window.Grid.TextBox.Button等,都叫元素 xaml文档中,<>用来定义标签,标签可以用来描述元素或元素的属性,如: <Window> <Windo ...

  10. Android TextView加上阴影效果

    <TextView android:id="@+id/test_shadow" android:layout_width="wrap_content" a ...