Deep Dream 模型
本节的代码参考了TensorFlow 源码中的示例程序https://github.com/tensorflow/tensorflow/tree/master/tensorflow/examples/tutorials/deepdream,并做了适当修改。
4.2.1 导入Inception 模型
在chapter_4_data/中或者网址https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip 下载解压得到模型文件tensorflow_inception_graph.pb,将该文件拷贝到当前文件夹中(即chapter_4/中)。
使用下面的命令加载模型并打印一些基础信息:
python load_inception.py
# coding:utf-8
# 导入要用到的基本模块。
from __future__ import print_function
import numpy as np
import tensorflow as tf
# 创建图和Session
graph = tf.Graph()
sess = tf.InteractiveSession(graph=graph)
# tensorflow_inception_graph.pb文件中,既存储了inception的网络结构也存储了对应的数据
# 使用下面的语句将之导入
model_fn = 'tensorflow_inception_graph.pb'
with tf.gfile.FastGFile(model_fn, 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
# 定义t_input为我们输入的图像
t_input = tf.placeholder(np.float32, name='input')
imagenet_mean = 117.0
# 输入图像需要经过处理才能送入网络中
# expand_dims是加一维,从[height, width, channel]变成[1, height, width, channel]
# t_input - imagenet_mean是减去一个均值
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
tf.import_graph_def(graph_def, {'input': t_preprocessed})
# 找到所有卷积层
layers = [op.name for op in graph.get_operations() if op.type == 'Conv2D' and 'import/' in op.name]
# 输出卷积层层数
print('Number of layers', len(layers))
# 特别地,输出mixed4d_3x3_bottleneck_pre_relu的形状
name = 'mixed4d_3x3_bottleneck_pre_relu'
print('shape of %s: %s' % (name, str(graph.get_tensor_by_name('import/' + name + ':0').get_shape())))
4.2.2 生成原始的Deep Dream 图像
python gen_naive.py
# coding: utf-8
from __future__ import print_function
import os
from io import BytesIO
import numpy as np
from functools import partial
import PIL.Image
import scipy.misc
import tensorflow as tf
graph = tf.Graph()
model_fn = 'tensorflow_inception_graph.pb'
sess = tf.InteractiveSession(graph=graph)
with tf.gfile.FastGFile(model_fn, 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
t_input = tf.placeholder(np.float32, name='input')
imagenet_mean = 117.0
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
tf.import_graph_def(graph_def, {'input': t_preprocessed})
def savearray(img_array, img_name):
    scipy.misc.toimage(img_array).save(img_name)
    print('img saved: %s' % img_name)
def render_naive(t_obj, img0, iter_n=20, step=1.0):
    # t_score是优化目标。它是t_obj的平均值
    # 结合调用处看,实际上就是layer_output[:, :, :, channel]的平均值
    t_score = tf.reduce_mean(t_obj)
    # 计算t_score对t_input的梯度
    t_grad = tf.gradients(t_score, t_input)[0]
    # 创建新图
    img = img0.copy()
    for i in range(iter_n):
        # 在sess中计算梯度,以及当前的score
        g, score = sess.run([t_grad, t_score], {t_input: img})
        # 对img应用梯度。step可以看做“学习率”
        g /= g.std() + 1e-8
        img += g * step
        print('score(mean)=%f' % (score))
    # 保存图片
    savearray(img, 'naive.jpg')
# 定义卷积层、通道数,并取出对应的tensor
name = 'mixed4d_3x3_bottleneck_pre_relu'
channel = 139
layer_output = graph.get_tensor_by_name("import/%s:0" % name)
# 定义原始的图像噪声
img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
# 调用render_naive函数渲染
render_naive(layer_output[:, :, :, channel], img_noise, iter_n=20)

4.2.3 生成更大尺寸的Deep Dream 图像
python gen_multiscale.py
# coding:utf-8
from __future__ import print_function
import os
from io import BytesIO
import numpy as np
from functools import partial
import PIL.Image
import scipy.misc
import tensorflow as tf
graph = tf.Graph()
model_fn = 'tensorflow_inception_graph.pb'
sess = tf.InteractiveSession(graph=graph)
with tf.gfile.FastGFile(model_fn, 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
t_input = tf.placeholder(np.float32, name='input')
imagenet_mean = 117.0
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
tf.import_graph_def(graph_def, {'input': t_preprocessed})
def savearray(img_array, img_name):
    scipy.misc.toimage(img_array).save(img_name)
    print('img saved: %s' % img_name)
def resize_ratio(img, ratio):
    min = img.min()
    max = img.max()
    img = (img - min) / (max - min) * 255
    img = np.float32(scipy.misc.imresize(img, ratio))
    img = img / 255 * (max - min) + min
    return img
def calc_grad_tiled(img, t_grad, tile_size=512):
    # 每次只对tile_size×tile_size大小的图像计算梯度,避免内存问题
    sz = tile_size
    h, w = img.shape[:2]
    # img_shift:先在行上做整体移动,再在列上做整体移动
    # 防止在tile的边缘产生边缘效应
    sx, sy = np.random.randint(sz, size=2)
    img_shift = np.roll(np.roll(img, sx, 1), sy, 0)
    grad = np.zeros_like(img)
    # y, x是开始位置的像素
    for y in range(0, max(h - sz // 2, sz), sz):
        for x in range(0, max(w - sz // 2, sz), sz):
            # 每次对sub计算梯度。sub的大小是tile_size×tile_size
            sub = img_shift[y:y + sz, x:x + sz]
            g = sess.run(t_grad, {t_input: sub})
            grad[y:y + sz, x:x + sz] = g
    # 使用np.roll移动回去
    return np.roll(np.roll(grad, -sx, 1), -sy, 0)
def render_multiscale(t_obj, img0, iter_n=10, step=1.0, octave_n=3, octave_scale=1.4):
    # 同样定义目标和梯度
    t_score = tf.reduce_mean(t_obj)
    t_grad = tf.gradients(t_score, t_input)[0]
    img = img0.copy()
    for octave in range(octave_n):
        if octave > 0:
            # 每次将将图片放大octave_scale倍
            # 共放大octave_n - 1 次
            img = resize_ratio(img, octave_scale)
        for i in range(iter_n):
            # 调用calc_grad_tiled计算任意大小图像的梯度
            g = calc_grad_tiled(img, t_grad)
            g /= g.std() + 1e-8
            img += g * step
            print('.', end=' ')
    savearray(img, 'multiscale.jpg')
if __name__ == '__main__':
    name = 'mixed4d_3x3_bottleneck_pre_relu'
    channel = 139
    img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
    layer_output = graph.get_tensor_by_name("import/%s:0" % name)
    render_multiscale(layer_output[:, :, :, channel], img_noise, iter_n=20)

4.2.4 生成更高质量的Deep Dream 图像
python gen_lapnorm.py
# coding:utf-8
from __future__ import print_function
import os
from io import BytesIO
import numpy as np
from functools import partial
import PIL.Image
import scipy.misc
import tensorflow as tf
graph = tf.Graph()
model_fn = 'tensorflow_inception_graph.pb'
sess = tf.InteractiveSession(graph=graph)
with tf.gfile.FastGFile(model_fn, 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
t_input = tf.placeholder(np.float32, name='input')
imagenet_mean = 117.0
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
tf.import_graph_def(graph_def, {'input': t_preprocessed})
def savearray(img_array, img_name):
    scipy.misc.toimage(img_array).save(img_name)
    print('img saved: %s' % img_name)
def resize_ratio(img, ratio):
    min = img.min()
    max = img.max()
    img = (img - min) / (max - min) * 255
    img = np.float32(scipy.misc.imresize(img, ratio))
    img = img / 255 * (max - min) + min
    return img
def calc_grad_tiled(img, t_grad, tile_size=512):
    sz = tile_size
    h, w = img.shape[:2]
    sx, sy = np.random.randint(sz, size=2)
    img_shift = np.roll(np.roll(img, sx, 1), sy, 0)  # 先在行上做整体移动,再在列上做整体移动
    grad = np.zeros_like(img)
    for y in range(0, max(h - sz // 2, sz), sz):
        for x in range(0, max(w - sz // 2, sz), sz):
            sub = img_shift[y:y + sz, x:x + sz]
            g = sess.run(t_grad, {t_input: sub})
            grad[y:y + sz, x:x + sz] = g
    return np.roll(np.roll(grad, -sx, 1), -sy, 0)
k = np.float32([1, 4, 6, 4, 1])
k = np.outer(k, k)
k5x5 = k[:, :, None, None] / k.sum() * np.eye(3, dtype=np.float32)
# 这个函数将图像分为低频和高频成分
def lap_split(img):
    with tf.name_scope('split'):
        # 做过一次卷积相当于一次“平滑”,因此lo为低频成分
        lo = tf.nn.conv2d(img, k5x5, [1, 2, 2, 1], 'SAME')
        # 低频成分放缩到原始图像一样大小得到lo2,再用原始图像img减去lo2,就得到高频成分hi
        lo2 = tf.nn.conv2d_transpose(lo, k5x5 * 4, tf.shape(img), [1, 2, 2, 1])
        hi = img - lo2
    return lo, hi
# 这个函数将图像img分成n层拉普拉斯金字塔
def lap_split_n(img, n):
    levels = []
    for i in range(n):
        # 调用lap_split将图像分为低频和高频部分
        # 高频部分保存到levels中
        # 低频部分再继续分解
        img, hi = lap_split(img)
        levels.append(hi)
    levels.append(img)
    return levels[::-1]
# 将拉普拉斯金字塔还原到原始图像
def lap_merge(levels):
    img = levels[0]
    for hi in levels[1:]:
        with tf.name_scope('merge'):
            img = tf.nn.conv2d_transpose(img, k5x5 * 4, tf.shape(hi), [1, 2, 2, 1]) + hi
    return img
# 对img做标准化。
def normalize_std(img, eps=1e-10):
    with tf.name_scope('normalize'):
        std = tf.sqrt(tf.reduce_mean(tf.square(img)))
        return img / tf.maximum(std, eps)
# 拉普拉斯金字塔标准化
def lap_normalize(img, scale_n=4):
    img = tf.expand_dims(img, 0)
    tlevels = lap_split_n(img, scale_n)
    # 每一层都做一次normalize_std
    tlevels = list(map(normalize_std, tlevels))
    out = lap_merge(tlevels)
    return out[0, :, :, :]
def tffunc(*argtypes):
    placeholders = list(map(tf.placeholder, argtypes))
    def wrap(f):
        out = f(*placeholders)
        def wrapper(*args, **kw):
            return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))
        return wrapper
    return wrap
def render_lapnorm(t_obj, img0,
                   iter_n=10, step=1.0, octave_n=3, octave_scale=1.4, lap_n=4):
    # 同样定义目标和梯度
    t_score = tf.reduce_mean(t_obj)
    t_grad = tf.gradients(t_score, t_input)[0]
    # 将lap_normalize转换为正常函数
    lap_norm_func = tffunc(np.float32)(partial(lap_normalize, scale_n=lap_n))
    img = img0.copy()
    for octave in range(octave_n):
        if octave > 0:
            img = resize_ratio(img, octave_scale)
        for i in range(iter_n):
            g = calc_grad_tiled(img, t_grad)
            # 唯一的区别在于我们使用lap_norm_func来标准化g!
            g = lap_norm_func(g)
            img += g * step
            print('.', end=' ')
    savearray(img, 'lapnorm.jpg')
if __name__ == '__main__':
    name = 'mixed4d_3x3_bottleneck_pre_relu'
    channel = 139
    img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
    layer_output = graph.get_tensor_by_name("import/%s:0" % name)
    render_lapnorm(layer_output[:, :, :, channel], img_noise, iter_n=20)

4.2.5 最终的Deep Dream 模型
python gen_deepdream.py
# coding:utf-8
from __future__ import print_function
import os
from io import BytesIO
import numpy as np
from functools import partial
import PIL.Image
import scipy.misc
import tensorflow as tf
graph = tf.Graph()
model_fn = 'tensorflow_inception_graph.pb'
sess = tf.InteractiveSession(graph=graph)
with tf.gfile.FastGFile(model_fn, 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
t_input = tf.placeholder(np.float32, name='input')  # define the input tensor
imagenet_mean = 117.0
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
tf.import_graph_def(graph_def, {'input': t_preprocessed})
def savearray(img_array, img_name):
    scipy.misc.toimage(img_array).save(img_name)
    print('img saved: %s' % img_name)
def visstd(a, s=0.1):
    return (a - a.mean()) / max(a.std(), 1e-4) * s + 0.5
def resize_ratio(img, ratio):
    min = img.min()
    max = img.max()
    img = (img - min) / (max - min) * 255
    img = np.float32(scipy.misc.imresize(img, ratio))
    img = img / 255 * (max - min) + min
    return img
def resize(img, hw):
    min = img.min()
    max = img.max()
    img = (img - min) / (max - min) * 255
    img = np.float32(scipy.misc.imresize(img, hw))
    img = img / 255 * (max - min) + min
    return img
def calc_grad_tiled(img, t_grad, tile_size=512):
    sz = tile_size
    h, w = img.shape[:2]
    sx, sy = np.random.randint(sz, size=2)
    img_shift = np.roll(np.roll(img, sx, 1), sy, 0)  # 先在行上做整体移动,再在列上做整体移动
    grad = np.zeros_like(img)
    for y in range(0, max(h - sz // 2, sz), sz):
        for x in range(0, max(w - sz // 2, sz), sz):
            sub = img_shift[y:y + sz, x:x + sz]
            g = sess.run(t_grad, {t_input: sub})
            grad[y:y + sz, x:x + sz] = g
    return np.roll(np.roll(grad, -sx, 1), -sy, 0)
def tffunc(*argtypes):
    placeholders = list(map(tf.placeholder, argtypes))
    def wrap(f):
        out = f(*placeholders)
        def wrapper(*args, **kw):
            return out.eval(dict(zip(placeholders, args)), session=kw.get('session'))
        return wrapper
    return wrap
def render_deepdream(t_obj, img0,
                     iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):
    t_score = tf.reduce_mean(t_obj)
    t_grad = tf.gradients(t_score, t_input)[0]
    img = img0
    # 同样将图像进行金字塔分解
    # 此时提取高频、低频的方法比较简单。直接缩放就可以
    octaves = []
    for i in range(octave_n - 1):
        hw = img.shape[:2]
        lo = resize(img, np.int32(np.float32(hw) / octave_scale))
        hi = img - resize(lo, hw)
        img = lo
        octaves.append(hi)
    # 先生成低频的图像,再依次放大并加上高频
    for octave in range(octave_n):
        if octave > 0:
            hi = octaves[-octave]
            img = resize(img, hi.shape[:2]) + hi
        for i in range(iter_n):
            g = calc_grad_tiled(img, t_grad)
            img += g * (step / (np.abs(g).mean() + 1e-7))
            print('.', end=' ')
    img = img.clip(0, 255)
    savearray(img, 'deepdream.jpg')
if __name__ == '__main__':
    img0 = PIL.Image.open('test.jpg')
    img0 = np.float32(img0)
    name = 'mixed4d_3x3_bottleneck_pre_relu'
    channel = 139
    layer_output = graph.get_tensor_by_name("import/%s:0" % name)
    render_deepdream(layer_output[:, :, :, channel], img0)
    # name = 'mixed4c'
    # layer_output = graph.get_tensor_by_name("import/%s:0" % name)
    # render_deepdream(tf.square(layer_output), img0)

Deep Dream 模型的更多相关文章
- Deep Dream模型与实现
		
Deep Dream是谷歌公司在2015年公布的一项有趣的技术.在训练好的卷积神经网络中,只需要设定几个参数,就可以通过这项技术生成一张图像. 本文章的代码和图片都放在我的github上,想实现本文代 ...
 - Deep Learning模型之:CNN卷积神经网络(一)深度解析CNN
		
http://m.blog.csdn.net/blog/wu010555688/24487301 本文整理了网上几位大牛的博客,详细地讲解了CNN的基础结构与核心思想,欢迎交流. [1]Deep le ...
 - 深度学习在美团点评推荐平台排序中的应用&&  wide&&deep推荐系统模型--学习笔记
		
写在前面:据说下周就要xxxxxxxx, 吓得本宝宝赶紧找些广告的东西看看 gbdt+lr的模型之前是知道怎么搞的,dnn+lr的模型也是知道的,但是都没有试验过 深度学习在美团点评推荐平台排序中的运 ...
 - Top Deep Learning Projects in github
		
Top Deep Learning Projects A list of popular github projects related to deep learning (ranked by sta ...
 - Unity3d地图制作之模型高光
		
由于颇受暗黑破坏神美工的影响,最近都在研究怎么制作场景地图之类的. 那么今日讲的模型高光虽然和地图无关,但是也涉及到一些美工的知识,尤其是shader. 按照国际惯例,先贴一张图饱饱眼福. 大家可以看 ...
 - #Deep Learning回顾#之LeNet、AlexNet、GoogLeNet、VGG、ResNet
		
CNN的发展史 上一篇回顾讲的是2006年Hinton他们的Science Paper,当时提到,2006年虽然Deep Learning的概念被提出来了,但是学术界的大家还是表示不服.当时有流传的段 ...
 - Deep Learning(深度学习)学习笔记整理(二)
		
本文整理了网上几位大牛的博客,详细地讲解了CNN的基础结构与核心思想,欢迎交流. [1]Deep learning简介 [2]Deep Learning训练过程 [3]Deep Learning模型之 ...
 - Applied Deep Learning Resources
		
Applied Deep Learning Resources A collection of research articles, blog posts, slides and code snipp ...
 - Deep Learning for Information Retrieval
		
最近关注了一些Deep Learning在Information Retrieval领域的应用,得益于Deep Model在对文本的表达上展现的优势(比如RNN和CNN),我相信在IR的领域引入Dee ...
 
随机推荐
- 查看 java 中的编译的字节码文件
			
javap -c Atomicity ---------- javap -c 产生的字节码文件---------- Compiled from "Atomicity.java" p ...
 - Python学习之旅(二十)
			
Python基础知识(19):面向对象高级编程(Ⅱ) 定制类 形如“__xx__”的变量或函数在Python中是有特殊用途的 1.__str__ 让打印出来的结果更好看 __str__:面向用户:__ ...
 - ML.NET 0.10特性简介
			
IDataView被单独作为一个类库包 IDataView组件为表格式数据提供了非常高效的处理方式,尤其是用于机器学习和高级分析应用.它被设计为可以高效地处理高维数据和大型数据集.并且也适合处理属于更 ...
 - ubuntu下设置mysql自启
 - React Router API文档
			
React Router API文档 一.<BrowserRouter> 使用HTML5历史记录API(pushState,replaceState和popstate事件)的<Rou ...
 - react中进入某个详情页URL路劲参数Id获取问题
			
<Route path={`${match.url}/detail/:id`} component={AppManageAddDetail} /> const { match:{param ...
 - 图->最短路径->单源最短路径(迪杰斯特拉算法Dijkstra)
			
文字描述 引言:如下图一个交通系统,从A城到B城,有些旅客可能关心途中中转次数最少的路线,有些旅客更关心的是节省交通费用,而对于司机,里程和速度则是更感兴趣的信息.上面这些问题,都可以转化为求图中,两 ...
 - mysql in 排序 也可以按in里面的顺序来排序
			
SQL: select * from table where id IN (3,9,6);这样的情况取出来后,其实,id还是按3,6,9,排序的,但如果我们真要按IN里面的顺序排序怎么办?SQL能不能 ...
 - python练习题-day13
			
1.获取移动平均值 def wrapper(fun): def inner(*args,**kwargs): ret=fun(*args,**kwargs) ret.__next__() return ...
 - JavaScript 模拟 Dictionary
			
function Dictionary() { var items = {}; //判断是否包含Key值 this.has = function(key) { return key in items; ...