一、目的

  以imagenet2012作为数据集,用Inception-v3对图像提取特征作为输入,来训练一个自编码器。

  以上作为预训练模型,随后在该自编码器的基础上,中间加入一个自表示层,将最终学习到的自表示层系数,作为相似度矩阵,对imagenet2012的1000类进行聚类。

二、预训练

  1.原理

  inception-v3+自编码器

  2.代码 

 import tensorflow as tf
import os
import numpy as np
import random
import tensorflow.contrib.slim as slim
import shutil tf.app.flags.DEFINE_string('model_dir', 'model/inception', 'Inception-v3 pretrain model dir')
tf.app.flags.DEFINE_string('class_list', 'imagenet12/train_class_list.txt', 'ILSVRC2012 image class list')
tf.app.flags.DEFINE_string('img_path', '/media/gpu/bdc7606d-0e3c-4870-9a5d-4926fd9961c0/gpu/Works/imagenet/others/ILSVRC2012_img_train', 'ILSVRC2012 image train path')
tf.app.flags.DEFINE_integer('max_train_steps_pre', 200000, 'max train num')
tf.app.flags.DEFINE_boolean('restore', True, 'wheather restore model and variable from previous saved')
tf.app.flags.DEFINE_string('checkpoint_path', 'model/pre/', 'model saved path')
tf.app.flags.DEFINE_string('feature_train_path','feature_train','ILSVRC2012 train feature save path')
tf.app.flags.DEFINE_integer('large_multi', 100, 'enlarge the feature data')
tf.app.flags.DEFINE_integer('width', 32, 'the width of feature input')
tf.app.flags.DEFINE_integer('inception_out_size', 2048, 'the dim of feature input,inception out dim')
tf.app.flags.DEFINE_integer('train_num_of_every_batch', 2000, 'change the data every 2000 epochs')
FLAGS = tf.app.flags.FLAGS kernel_num_list = [16, 32, 64] #channel num
kernel_size_list = [[3, 3], [3, 3], [3, 3]] #channel size
kernel_stride_list = [2, 2, 2] #stride
batch_size = 500 def get_inception_graph():
'''
load inception-v3 gragh for get_inception_output to
get the feature from Inception-v3
'''
with tf.gfile.FastGFile(os.path.join(FLAGS.model_dir, 'inception-v3.pb'), 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
inception_out = tf.import_graph_def(graph_def,name='',return_elements=['pool_3/_reshape:0'])
return inception_out def create_graph_pre():
'''
create graph and loss
'''
inception_input = tf.placeholder(tf.float32, [None, FLAGS.width, FLAGS.inception_out_size/FLAGS.width, 1], name='inception_holder')
with tf.variable_scope('DSC'):
with tf.variable_scope('encoder'):
net = slim.conv2d(inception_input, kernel_num_list[0], kernel_size_list[0], stride = kernel_stride_list[0], scope='conv_0')
net = slim.conv2d(net, kernel_num_list[1], kernel_size_list[1], stride=kernel_stride_list[1], scope='conv_1')
net = slim.conv2d(net, kernel_num_list[2], kernel_size_list[2], stride=kernel_stride_list[2], scope='conv_2') with tf.variable_scope('decoder'):
net = slim.conv2d_transpose(net, kernel_num_list[1], kernel_size_list[2], stride=kernel_stride_list[2], scope='deconv_2')
net = slim.conv2d_transpose(net, kernel_num_list[0], kernel_size_list[1], stride=kernel_stride_list[1], scope='deconv_1')
net = slim.conv2d_transpose(net, 1, kernel_size_list[0], stride=kernel_stride_list[0], scope='deconv_0') restruct_loss = tf.losses.mean_squared_error(net, inception_input)
return restruct_loss,inception_input,net def get_inception_output(sess, img, txt_name,inception_out,save):
'''
get the inception-v3 feature for img and save in txt_name
'''
image_data = tf.gfile.FastGFile(img, 'rb').read()
output = sess.run(inception_out, feed_dict={'DecodeJpeg/contents:0': image_data})
output = np.squeeze(output)
output = output.reshape(FLAGS.width,-1)
if save == True:
np.savetxt(txt_name, output, fmt='%.6f')
return output def get_inception_batch(sess,inception_out,save=True):
'''
get inception-v3 feature for a batch as input of the new graph(create_graph_pre)
'''
class_list = np.loadtxt(FLAGS.class_list, dtype= str)[0:batch_size]
batch = [] for i, item in enumerate(class_list):
class_img_path = os.path.join(FLAGS.img_path, item)
class_img_list = os.listdir(class_img_path) img_name = random.choice(class_img_list)
txt_name = os.path.join(FLAGS.feature_train_path, item, img_name[:-4]+'txt')
img = os.path.join(class_img_path, img_name) if os.path.exists(txt_name):
print('%s Found!' % os.path.join(item, img_name[:-4]+'txt'))
batch_i = np.loadtxt(txt_name)
else:
#print('%s Extracting!' % os.path.join(item, img_name[:-4]+'txt'))
dir_name = os.path.join(FLAGS.feature_train_path, item)
if not os.path.exists(dir_name):
os.makedirs(dir_name)
batch_i = get_inception_output(sess, img,txt_name, inception_out,save=save)
batch.append(batch_i)
large_batch = np.array(batch) * FLAGS.large_multi return large_batch def reconstruct(sess, net, img_inception):
'''
get the loss for the input(img_inception) to varify the result of reconstruct
'''
output = sess.run([net], feed_dict={'inception_holder:0': img_inception})
img_inception=np.squeeze(img_inception)
output=np.squeeze(np.array(output))
test_loss = pow(img_inception-output,2) return output, sum(sum(test_loss))/(32*64) def interface_pre(): total_loss, inception_input, net = create_graph_pre() global_step = tf.Variable(0)
learning_rate = tf.train.exponential_decay(1e-3, global_step, decay_steps=100, decay_rate=0.98, staircase=True)
train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss) saver = tf.train.Saver(max_to_keep=3) with tf.Session() as sess: if FLAGS.restore:
print('continue training from previous checkpoint')
ckpt = tf.train.latest_checkpoint(FLAGS.checkpoint_path)
pre_step = int(ckpt.replace(FLAGS.checkpoint_path + '-', ''))
saver.restore(sess, ckpt)
else:
#remove previous model
if os.path.exists(FLAGS.checkpoint_path):
shutil.rmtree(FLAGS.checkpoint_path)
os.makedirs(FLAGS.checkpoint_path)
sess.run(tf.global_variables_initializer())
pre_step = 0 inception_out = get_inception_graph() for step in range(FLAGS.max_train_steps_pre):
if step % FLAGS.train_num_of_every_batch == 0:
inception_output = get_inception_batch(sess, inception_out, save=False)
inception_output = inception_output.reshape(-1,inception_output.shape[1], inception_output.shape[2], 1)
perm = np.arange(batch_size)
np.random.shuffle(perm)
inception_output = inception_output[perm] inception_output = inception_output.reshape(-1,inception_output.shape[1], inception_output.shape[2], 1)
_, loss_value= sess.run([train_op, total_loss],feed_dict={'inception_holder:0':inception_output})
if step % 100 == 0:
print("step %d :total_loss= %f" % (step, loss_value))
if step % 500 == 0 and step > 0:
# save model
if step > 500 :
write_meta_graph = False
else:
write_meta_graph = True
all_step = pre_step + step
saver.save(sess, FLAGS.checkpoint_path, global_step=all_step, write_meta_graph=write_meta_graph)
#construct
img_inception = get_inception_output(sess, 'cropped_panda.jpg', 'cropped_panda.txt',inception_out,False)
img_out, test_loss = reconstruct(sess, net, FLAGS.large_multi*img_inception.reshape(-1,32,64,1))
print("test loss= %.5f" % test_loss) if __name__ == '__main__':
interface_pre()

三、训练

  1.原理

  以imagenet2012在inception-v3特征上的类平均向量作为输入,来训练模型,获得自表示系数作为聚类输入,从而获得聚类结果并可视化。

  2.代码  

 import tensorflow as tf
import os
import numpy as np
import random
import tensorflow.contrib.slim as slim
import tensorflow.contrib.slim.nets as nets
import shutil
from scipy.sparse import coo_matrix
from sklearn.cluster import spectral_clustering
from scipy.sparse.linalg import svds
from sklearn import cluster
from sklearn.preprocessing import normalize tf.app.flags.DEFINE_string('class_list', '../imagenet12/train_class_list.txt', 'ILSVRC2012 image class list')
tf.app.flags.DEFINE_string('img_path', '/media/gpu/bdc7606d-0e3c-4870-9a5d-4926fd9961c0/gpu/Works/imagenet/others/ILSVRC2012_img_train', 'ILSVRC2012 image train path')
tf.app.flags.DEFINE_integer('max_train_steps', 200000, 'max train num')
tf.app.flags.DEFINE_boolean('restore', False, 'wheather restore model and variable from previous saved')
tf.app.flags.DEFINE_string('pretrain_path', '../model/pre/', 'pretrain model path')
tf.app.flags.DEFINE_string('train_path', 'model/train/', 'train model path')
tf.app.flags.DEFINE_string('Coef_path','Coef/','save path of self_express xishu')
tf.app.flags.DEFINE_integer('large_multi', 100, '')
tf.app.flags.DEFINE_integer('width', 32, '')
tf.app.flags.DEFINE_integer('inception_out_size', 2048, '')
tf.app.flags.DEFINE_float('self_express_loss_weight',1,'')
tf.app.flags.DEFINE_float('regularizer_loss_weight',0.01,'')
tf.app.flags.DEFINE_integer('train_num_of_every_batch', 5000, '')
tf.app.flags.DEFINE_string('cluster_path','cluster','cluster result path')
tf.app.flags.DEFINE_string('data_path','avg_train_vector','imagenet2012 average feature path')
FLAGS = tf.app.flags.FLAGS kernel_num_list = [16, 32, 64]
kernel_size_list = [[3, 3], [3, 3], [3, 3]]
kernel_stride_list = [2, 2, 2]
batch_size = 1000
learn_rate=0.001 def create_graph_pre(): inception_input = tf.placeholder(tf.float32, [None, FLAGS.width, int(FLAGS.inception_out_size/FLAGS.width), 1], name='inception_holder')
with tf.variable_scope('DSC'):
with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(0.0005)):
with tf.variable_scope('encoder'):
net = slim.conv2d(inception_input, kernel_num_list[0], kernel_size_list[0], stride = kernel_stride_list[0], scope='conv_0')
net = slim.conv2d(net, kernel_num_list[1], kernel_size_list[1], stride=kernel_stride_list[1], scope='conv_1')
net = slim.conv2d(net, kernel_num_list[2], kernel_size_list[2], stride=kernel_stride_list[2], scope='conv_2')
self_express_x = net
net = tf.reshape(net, [batch_size, -1], name='reshape_to_flat')
Coef = slim.model_variable('Coef',
shape=[batch_size, batch_size],
initializer=tf.truncated_normal_initializer(stddev=0.1),
regularizer=slim.l2_regularizer(0.0005), trainable=True)
net = tf.matmul(Coef, net, name='mutmul') with tf.variable_scope('decoder'):
net = tf.reshape(net, [batch_size, int(FLAGS.width/8), int(FLAGS.inception_out_size/FLAGS.width/8), kernel_num_list[2]], name='reshape_to_normal')
self_express_x_c = net
net = slim.conv2d_transpose(net, kernel_num_list[1], kernel_size_list[2], stride=kernel_stride_list[2], scope='deconv_2')
net = slim.conv2d_transpose(net, kernel_num_list[0], kernel_size_list[1], stride=kernel_stride_list[1], scope='deconv_1')
net = slim.conv2d_transpose(net, 1, kernel_size_list[0], stride=kernel_stride_list[0], scope='deconv_0') reconstruct_loss = tf.losses.mean_squared_error(net, inception_input)
self_express_loss = FLAGS.self_express_loss_weight *tf.losses.mean_squared_error(self_express_x, self_express_x_c)
regularizer_loss = FLAGS.regularizer_loss_weight * tf.reduce_sum(tf.pow(Coef, 2.0))
#regularizer_loss = tf.add_n(tf.losses.get_regularization_losses()) loss = reconstruct_loss + self_express_loss + regularizer_loss
#loss = self_express_loss
return net, loss, Coef,reconstruct_loss, self_express_loss, regularizer_loss def get_inception_batch_avg():
class_list = np.loadtxt(FLAGS.class_list, dtype=str)[0:batch_size]
res=[]
for i in range(len(class_list)):
data_path = os.path.join(FLAGS.data_path,class_list[i]+'.txt')
data = np.loadtxt(data_path)
data = data.reshape(32,64)
res.append(data*100)
return np.array(res) def interface():
net, total_loss, Coef, reconstruct_loss, self_express_loss, regularizer_loss = create_graph_pre() global_step = tf.Variable(0)
learning_rate = tf.train.exponential_decay(1e-4, global_step, decay_steps=100, decay_rate=0.98, staircase=True) train_op = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(total_loss)
saver = tf.train.Saver(max_to_keep=3)
with tf.Session() as sess:
if FLAGS.restore:
print('continue training from previous checkpoint')
ckpt = tf.train.latest_checkpoint(FLAGS.train_path)
pre_step = int(ckpt.replace(FLAGS.train_path+'-', ''))
saver.restore(sess, ckpt)
else:
# remove previous model and Coef
if os.path.exists(FLAGS.train_path):
shutil.rmtree(FLAGS.train_path)
if os.path.exists(FLAGS.Coef_path):
shutil.rmtree(FLAGS.Coef_path)
os.makedirs(FLAGS.train_path)
os.makedirs(FLAGS.Coef_path)
# restore from pretrain
sess.run(tf.global_variables_initializer())
pre_step = 0
ckpt = tf.train.latest_checkpoint(FLAGS.pretrain_path)
variable_restore_op = slim.assign_from_checkpoint_fn(ckpt,slim.get_variables_to_restore(),ignore_missing_vars=True)
variable_restore_op(sess) inception_out = get_inception_graph()
inception_output = get_inception_batch_avg()
inception_output = inception_output.reshape(-1, inception_output.shape[1], inception_output.shape[2], 1)
for step in range(FLAGS.max_train_steps):
_, loss_value, Coef_val, rec_val, see_val, reg_val= \
sess.run([train_op, total_loss, Coef, reconstruct_loss, self_express_loss, regularizer_loss],
feed_dict={'inception_holder:0':inception_output})
if step % 100 == 0:
print("step %d :total_loss= %f,rec_loss= %f,see_val=%f,reg_val=%f"
% (step,loss_value,rec_val, see_val,reg_val)) if step % 1000 == 0 and step > 0:
if step > 500 :
write_meta_graph = False
else:
write_meta_graph = True
all_step = pre_step+step
saver.save(sess, FLAGS.train_path, global_step=all_step,write_meta_graph=write_meta_graph)
np.savetxt(FLAGS.Coef_path+str(all_step)+'.txt',Coef_val,fmt='%.6f') def thrC(C):
row,col = C.shape
for i in range(row):
for j in range(col):
C[i,j]=abs(C[i,j])
return C def post_proC(C,N):
# C: coefficient matrix
C = 0.5 * (C + C.T)
np.savetxt(FLAGS.cluster_path + 'C_abs.txt', C, fmt='%.6f')
graph = coo_matrix(C)
labels = spectral_clustering(graph, n_clusters=N)
return labels def vis(N,labels):
## visual
for i in range(N):
print(i)
index = [j for j in range(len(labels)) if labels[j]==i]
class_list=np.loadtxt(FLAGS.class_list,dtype=str) sub_class_list = class_list[index]
np.savetxt(os.path.join(FLAGS.cluster_path, str(i) + '.txt'), sub_class_list, fmt='%s')
if vis:
dir_path = os.path.join(FLAGS.cluster_path, str(i))
if os.path.exists(dir_path):
shutil.rmtree(dir_path)
os.makedirs(dir_path)
# copy an example to dir_path
for sub_class_item in sub_class_list:
img_path = os.path.join(FLAGS.img_path, sub_class_item)
random_img = random.choice(os.listdir(img_path))
src = os.path.join(img_path, random_img)
dst = os.path.join(dir_path, random_img) shutil.copyfile(src, dst) if __name__ == '__main__':
interface() C=np.loadtxt('Coef/199000.txt') #系数,相似度矩阵
C=thrC(C)
N=32
grp = post_proC(C,N) vis(N,grp)

tensorflow-用DASC结合Inception-v3对imagenet2012聚类实现的更多相关文章

  1. Inception V3 的 tensorflow 实现

    tensorflow 官方给出的实现:models/inception_v3.py at master · tensorflow/models · GitHub 1. 模型结构 首先来看 Incept ...

  2. 源码分析——迁移学习Inception V3网络重训练实现图片分类

    1. 前言 近些年来,随着以卷积神经网络(CNN)为代表的深度学习在图像识别领域的突破,越来越多的图像识别算法不断涌现.在去年,我们初步成功尝试了图像识别在测试领域的应用:将网站样式错乱问题.无线领域 ...

  3. 微调Inception V3网络-对Satellite分类

    目录 1. 流程概述 2. 准备数据集 2.1 Satellite数据集介绍 3. Inception V3网络 4. 训练 4.1 基于Keras微调Inception V3网络 4.2 Keras ...

  4. 1、VGG16 2、VGG19 3、ResNet50 4、Inception V3 5、Xception介绍——迁移学习

    ResNet, AlexNet, VGG, Inception: 理解各种各样的CNN架构 本文翻译自ResNet, AlexNet, VGG, Inception: Understanding va ...

  5. 脸型分类-Face shape classification using Inception v3

    本文链接:https://blog.csdn.net/u011961856/article/details/77984667函数解析github 代码:https://github.com/adoni ...

  6. 网络结构解读之inception系列四:Inception V3

    网络结构解读之inception系列四:Inception V3   Inception V3根据前面两篇结构的经验和新设计的结构的实验,总结了一套可借鉴的网络结构设计的原则.理解这些原则的背后隐藏的 ...

  7. 从GoogLeNet至Inception v3

    从GoogLeNet至Inception v3 一.CNN发展纵览 我们先来看一张图片: 1985年,Rumelhart和Hinton等人提出了后向传播(Back Propagation,BP)算法( ...

  8. 经典分类CNN模型系列其五:Inception v2与Inception v3

    经典分类CNN模型系列其五:Inception v2与Inception v3 介绍 Inception v2与Inception v3被作者放在了一篇paper里面,因此我们也作为一篇blog来对其 ...

  9. [译]与TensorFlow的第一次接触(三)之聚类

    转自 [译]与TensorFlow的第一次接触(三)之聚类 2016.08.09 16:58* 字数 4316 阅读 7916评论 5喜欢 18 前一章节中介绍的线性回归是一种监督学习算法,我们使用数 ...

  10. 深度学习面试题29:GoogLeNet(Inception V3)

    目录 使用非对称卷积分解大filters 重新设计pooling层 辅助构造器 使用标签平滑 参考资料 在<深度学习面试题20:GoogLeNet(Inception V1)>和<深 ...

随机推荐

  1. An overview of network penetration testing

    1. an SQLi vulnerability will allow you  to do the  following query the database using select statem ...

  2. python 基于机器学习识别验证码

    1.背景    验证码自动识别在模拟登陆上使用的较为广泛,一直有耳闻好多人在使用机器学习来识别验证码,最近因为刚好接触这方面的知识,所以特定研究了一番.发现网上已有很多基于machine learni ...

  3. LLDB 中从地址设置为变量

    // set language and import framework settings set target.language swift expr -l Swift -- import UIKi ...

  4. logrotate命令

    logrotate是个十分有用的工具,它可以自动对日志进行截断(或轮循).压缩以及删除旧的日志文件,例如,你可以设置logrotate,让/var/log/foo日志文件每30天轮循,并删除超过6个月 ...

  5. 多媒体开发(6):滤镜实现各种图片效果 | Video-Filters | avfilter | 变色

    之前讲过使用FFmpeg的drawtext滤镜(把图片或文字加到视频上),而实际上,FFmpeg的滤镜很强大,远不止加字幕或加图片的功能.滤镜是很有趣的,可以把图片变模糊.变色.缩放旋转,等等. 本文 ...

  6. Python 30分钟快速入门指南

    学习地址 中文版:Python 30分钟入门指南 英文版:Learn X in Y minutes 学习时间 2019/03/10 19:00 - 19:32,多用了2分钟.

  7. 取消layUI中日期选择控件默认填充日期

    input标签中使用日期选择控件填写,加载时默认填充当前日期, 标签设置了placeholder="请选择" autocomplete="off",但是并没有效 ...

  8. angularJs实现数据双向绑定的原理

    angular1.x在指定的事件触发时(比如dom事件,xhr响应事件,浏览器定位变更事件,定时器事件),通过脏值检测的方式比对数据是否有变更,来决定是否更新视图. angular2使用了zone.j ...

  9. Auth模块、Forms组件

    Auth模块 auth模块是Django自带的用户认证模块: 我们在开发一个网站的时候,无可避免的需要设计实现网站的用户系统.此时我们需要实现包括用户注册.用户登录.用户认证.注销.修改密码等功能,这 ...

  10. Scrapy 框架

    1. 基本使用 1.1 定义及安装 为了爬取网站数据,提取结构性数据而编写的应用框架 scrapy组件工作流程 引擎首先会将爬虫文件中的起始url获取,并且提交到调度器中.如果需要从url中下载数据, ...