dga model train and test code
# _*_coding:UTF-8_*_ import operator
import tldextract
import random
import pickle
import os
import tflearn from math import log
from tflearn.data_utils import to_categorical, pad_sequences
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_1d, max_pool_1d
from tflearn.layers.estimator import regression
from tflearn.layers.normalization import batch_normalization
from sklearn.model_selection import train_test_split def get_cnn_model(max_len, volcab_size=None):
if volcab_size is None:
volcab_size = 10240000 # Building convolutional network
network = tflearn.input_data(shape=[None, max_len], name='input')
network = tflearn.embedding(network, input_dim=volcab_size, output_dim=32) network = conv_1d(network, 64, 3, activation='relu', regularizer="L2")
network = max_pool_1d(network, 2)
network = conv_1d(network, 64, 3, activation='relu', regularizer="L2")
network = max_pool_1d(network, 2) network = batch_normalization(network)
network = fully_connected(network, 64, activation='relu')
network = dropout(network, 0.5) network = fully_connected(network, 2, activation='softmax')
sgd = tflearn.SGD(learning_rate=0.1, lr_decay=0.96, decay_step=1000)
network = regression(network, optimizer=sgd, loss='categorical_crossentropy') model = tflearn.DNN(network, tensorboard_verbose=0)
return model def get_data_from(file_name):
ans = []
with open(file_name) as f:
for line in f:
domain_name = line.strip()
ans.append(domain_name)
return ans def get_local_data(tag="labeled"):
white_data = get_data_from(file_name="dga_360_sorted.txt")
black_data = get_data_from(file_name="top-1m.csv")
return black_data, white_data def get_data():
black_x, white_x = get_local_data()
black_y, white_y = [1]*len(black_x), [0]*len(white_x) X = black_x + white_x
labels = black_y + white_y # Generate a dictionary of valid characters
valid_chars = {x:idx+1 for idx, x in enumerate(set(''.join(X)))} max_features = len(valid_chars) + 1
print("max_features:", max_features)
maxlen = max([len(x) for x in X])
print("max_len:", maxlen)
maxlen = min(maxlen, 256) # Convert characters to int and pad
X = [[valid_chars[y] for y in x] for x in X]
X = pad_sequences(X, maxlen=maxlen, value=0.) # Convert labels to 0-1
Y = to_categorical(labels, nb_classes=2) volcab_file = "volcab.pkl"
output = open(volcab_file, 'wb')
# Pickle dictionary using protocol 0.
data = {"valid_chars": valid_chars, "max_len": maxlen, "volcab_size": max_features}
pickle.dump(data, output)
output.close() return X, Y, maxlen, max_features def train_model():
X, Y, max_len, volcab_size = get_data() print("X len:", len(X), "Y len:", len(Y))
trainX, testX, trainY, testY = train_test_split(X, Y, test_size=0.2, random_state=42)
print(trainX[:1])
print(trainY[:1])
print(testX[-1:])
print(testY[-1:]) model = get_cnn_model(max_len, volcab_size)
model.fit(trainX, trainY, validation_set=(testX, testY), show_metric=True, batch_size=1024) filename = 'finalized_model.tflearn'
model.save(filename) model.load(filename)
print("Just review 3 sample data test result:")
result = model.predict(testX[0:3])
print(result) def test_model():
volcab_file = "volcab.pkl"
assert os.path.exists(volcab_file)
pkl_file = open(volcab_file, 'rb')
data = pickle.load(pkl_file)
valid_chars, max_document_length, max_features = data["valid_chars"], data["max_len"], data["volcab_size"] print("max_features:", max_features)
print("max_len:", max_document_length) cnn_model = get_cnn_model(max_document_length, max_features)
filename = 'finalized_model.tflearn'
cnn_model.load(filename)
print("predict domains:")
bls = list() with open("dga_360_sorted.txt") as f:
# with open("todo.txt") as f:
lines = f.readlines()
print("domain_list len:", len(lines))
cnt = 1000
for i in range(0, len(lines), cnt):
lines2 = lines[i:i+cnt]
domain_list = [line.strip() for line in lines2]
#print("domain_list sample:", domain_list[:5]) # Convert characters to int and pad
X = [[valid_chars[y] if y in valid_chars else 0 for y in x] for x in domain_list]
X = pad_sequences(X, maxlen=max_document_length, value=0.) result = cnn_model.predict(X)
for i, domain in enumerate(domain_list):
if result[i][1] > .5: #.95:
#print(lines2[i], domain + " is GDA")
print(lines2[i].strip() + "\t" + domain, result[i][1])
bls.append(domain)
else:
#print(lines2[i], domain )
pass
#print(bls)
print(len(bls) , "dga found!") if __name__ == "__main__":
print("train model...")
train_model()
print("test model...")
test_model()
dga model train and test code的更多相关文章
- 一步步开发自己的博客 .NET版(9、从model first替换成code first 问题记录)
为什么要改用code first 用过code first的基本上都不会再想用回model first或是db first(谁用谁知道).不要问我为什么不一开始就直接使用code first,因为那个 ...
- Pytorch本人疑问(2)model.train()和model.eval()的区别
我们在训练时如果使用了BN层和Dropout层,我们需要对model进行标识: model.train():在训练时使用BN层和Dropout层,对模型进行更改. model.eval():在评价时将 ...
- MVC学习6 学习使用Code First Migrations功能 把Model的更新同步到DB中
参考:http://www.asp.net/mvc/tutorials/mvc-4/getting-started-with-aspnet-mvc4/adding-a-new-field-to-th ...
- EF7 - What Does “Code First Only” Really Mean
这篇文章很有价值,但翻译了一段,实在翻译不下去了,没办法,只能转载了. 英文地址:http://blogs.msdn.com/b/adonet/archive/2014/10/21/ef7-what- ...
- Code First :使用Entity. Framework编程(8) ----转发 收藏
第8章 Code First将走向哪里? So far, this book has covered all of the Code First components that reached the ...
- Code First :使用Entity. Framework编程(7) ----转发 收藏
第7章 高级概念 The Code First modeling functionality that you have seen so far should be enough to get you ...
- Create Entity Data Model
http://www.entityframeworktutorial.net/EntityFramework5/create-dbcontext-in-entity-framework5.aspx 官 ...
- Clean Code – Chapter 6 Objects and Data Structures
Data Abstraction Hiding implementation Data/Object Anti-Symmetry Objects hide their data behind abst ...
- CV code references
转:http://www.sigvc.org/bbs/thread-72-1-1.html 一.特征提取Feature Extraction: SIFT [1] [Demo program][SI ...
随机推荐
- 01-路由跳转 安装less this.$router.replace(path) 解决vue/cli3.0语法报错问题
2==解决vue2.0里面控制台包的一些语法错误. https://www.jianshu.com/p/5e0a1541418b 在build==>webpack.base.conf.j下注释掉 ...
- 【poj3207】Ikki's Story IV - Panda's Trick(2-sat)
传送门 题意: 给出一个圆,圆上有\(n\)个点,依次为\(0,1,\cdots,n-1\). 现在要连接\(m\)对点,每次连接时可以直接从里面连,也可以从外面连. 最后问,连完这\(m\)对点后, ...
- 洛谷 P5596 【XR-4】题
洛谷 P5596 [XR-4]题 洛谷传送门 题目描述 小 X 遇到了一道题: 给定自然数 a,ba,b,求满足下列条件的自然数对 (x,y)(x,y) 的个数: y^2 - x^2 = ax + b ...
- pytest--生成HTML报告
前戏 我们做自动化,测试报告是必不可少的.方便自己查看,也可以供领导查看,一个测试报告就可以说明我们做了哪些事情,pytest-html插件给我们提供了一个很简陋的测试报告,为什么说简陋,因为是真简陋 ...
- 【day09】PHP
一.函数 1. 作用域(Scope) (1)局部变量:变量在声明的代码段中有效 a.动态变量 b.静态变量:static ,用在函数中,当调用函数后内存不释放,能存储变量的最后的值. (2)全局变量: ...
- Paper | Quality assessment of deblocked images
目录 1. 故事 2. 失真变化 3. 方法(PSNR-B) 4. 实验 这篇文章提出了一个PSNR-B指标,旨在衡量 压缩图像的块效应强度 或 去块效应后的残留块效应强度(比较去块效应算法的优劣). ...
- 大话设计模式Python实现-工厂方法模式
工厂方法模式(Factory Method Pattern):定义一个用于创建对象的接口,让子类决定实例化哪一个类,工厂方法使一个类的实例化延时到其子类. #!/usr/bin/env python ...
- 十二、深入理解Java内存模型
深入理解Java内存模型 [1]CPU和缓存的一致性 我们应该都知道,计算机在执行程序的时候,每条指令都是在CPU中执行的,而执行的时候,又免不了要和数据打交道.而计算机上面的数据,是存放在主存当 ...
- ThinkPHP查询数据的时候toArray()报错解决办法
当查找不到数据时toArray()会报错,如图 解决办法:先查找数据,然后加个判断,如果有数据再转化为数组,如果没有数据就给个空值,不想代码继续往下执行就return false;
- LINQ 之 SelectMany
声明:本文为www.cnc6.cn原创,转载时请注明出处,谢谢! 一.第一种用法: public static IEnumerable<TResult> SelectMany<TSo ...