TF Boys (TensorFlow Boys ) 养成记（五）： CIFAR10 Model 和 TensorFlow 的四种交叉熵介绍

有了数据，有了网络结构，下面我们就来写 cifar10 的代码。

首先处理输入，在 /home/your_name/TensorFlow/cifar10/ 下建立 cifar10_input.py，输入如下代码：

from __future__ import absolute_import        # 绝对导入

from __future__ import division                # 精确除法，/是精确除，//是取整除

from __future__ import print_function        # 打印函数

import os

import tensorflow as tf

# 建立一个 cifar10_data 的类， 输入文件名队列，输出 labels 和images

class cifar10_data(object):

    def __init__(self, filename_queue):        # 类初始化

        # 根据上一篇文章介绍的文件格式，定义初始化参数

        self.height = 32

        self.width = 32

        self.depth = 3

        # label 一个字节

        self.label_bytes = 1

        # 图像 32*32*3 = 3072 字节

        self.image_bytes = self.height * self.width * self.depth

        # 读取的固定字节长度为 3072 + 1 = 3073

        self.record_bytes = self.label_bytes + self.image_bytes

        self.label, self.image = self.read_cifar10(filename_queue)

    def read_cifar10(self, filename_queue):

        # 读取固定长度文件

        reader = tf.FixedLengthRecordReader(record_bytes = self.record_bytes)

        key, value = reader.read(filename_queue)

        record_bytes = tf.decode_raw(value, tf.uint8)

        # tf.slice(record_bytes, 起始位置， 长度)

        label = tf.cast(tf.slice(record_bytes, [0], [self.label_bytes]), tf.int32)

        # 从 label 起，切片 self.image_bytes = 3072 长度为图像

        image_raw = tf.slice(record_bytes, [self.label_bytes], [self.image_bytes])

        # 图片转化成 3*32*32

        image_raw = tf.reshape(image_raw, [self.depth, self.height, self.width])

        # 图片转化成 32*32*3

        image = tf.transpose(image_raw, (1,2,0))

        image = tf.cast(image, tf.float32)

        return label, image

def inputs(data_dir, batch_size, train = True, name = 'input'):

    # 建议加上 tf.name_scope, 可以画出漂亮的流程图。

    with tf.name_scope(name):

        if train:

            # 要读取的文件的名字

            filenames = [os.path.join(data_dir,'data_batch_%d.bin' % ii)

                        for ii in range(1,6)]

            # 不存在该文件的时候报错

            for f in filenames:

                if not tf.gfile.Exists(f):

                    raise ValueError('Failed to find file: ' + f)

            # 用文件名生成文件名队列

            filename_queue = tf.train.string_input_producer(filenames)

            # 送入 cifar10_data 类中

            read_input = cifar10_data(filename_queue)

            images = read_input.image

            # 图像白化操作，由于网络结构简单，不加这句正确率很低。

            images = tf.image.per_image_whitening(images)

            labels = read_input.label

            # 生成 batch 队列，16 线程操作，容量 20192，min_after_dequeue 是

            # 离队操作后，队列中剩余的最少的元素，确保队列中一直有 min_after_dequeue

            # 以上元素，建议设置 capacity = min_after_dequeue + batch_size * 3

            num_preprocess_threads = 16

            image, label = tf.train.shuffle_batch(

                                    [images,labels], batch_size = batch_size,

                                    num_threads = num_preprocess_threads,

                                    min_after_dequeue = 20000, capacity = 20192)

            return image, tf.reshape(label, [batch_size])

        else:

            filenames = [os.path.join(data_dir,'test_batch.bin')]

            for f in filenames:

                if not tf.gfile.Exists(f):

                    raise ValueError('Failed to find file: ' + f)

            filename_queue = tf.train.string_input_producer(filenames)

            read_input = cifar10_data(filename_queue)

            images = read_input.image

            images = tf.image.per_image_whitening(images)

            labels = read_input.label

            num_preprocess_threads = 16

            image, label = tf.train.shuffle_batch(

                                    [images,labels], batch_size = batch_size,

                                    num_threads = num_preprocess_threads,

                                    min_after_dequeue = 20000, capacity = 20192)

            return image, tf.reshape(label, [batch_size])

在 /home/your_name/TensorFlow/cifar10/ 下建立 cifar10.py，输入如下代码

from __future__ import absolute_import
from __future__ import division

from __future__ import print_function

import os

import os.path

import time

from datetime import datetime

import numpy as np

from six.moves import xrange

import tensorflow as tf

import my_cifar10_input

BATCH_SIZE = 64

LEARNING_RATE = 0.1

MAX_STEP = 50000
TRAIN = True

# 用 get_variable 在 CPU 上定义常量

def variable_on_cpu(name, shape, initializer = tf.constant_initializer(0.1)):

    with tf.device('/cpu:0'):

        dtype = tf.float32

        var = tf.get_variable(name, shape, initializer = initializer,

                              dtype = dtype)

    return var

 # 用 get_variable 在 CPU 上定义变量

def variables(name, shape, stddev):

    dtype = tf.float32

    var = variable_on_cpu(name, shape,

                          tf.truncated_normal_initializer(stddev = stddev,

                                                          dtype = dtype))

    return var

# 定义网络结构

def inference(images):

    with tf.variable_scope('conv1') as scope:

        # 用 5*5 的卷积核，64 个 Feature maps

        weights = variables('weights', [5,5,3,64], 5e-2)

        # 卷积，步长为 1*1

        conv = tf.nn.conv2d(images, weights, [1,1,1,1], padding = 'SAME')

        biases = variable_on_cpu('biases', [64])

        # 加上偏置

        bias = tf.nn.bias_add(conv, biases)

        # 通过 ReLu 激活函数

        conv1 = tf.nn.relu(bias, name = scope.name)

        # 柱状图总结 conv1

        tf.histogram_summary(scope.name + '/activations', conv1)

    with tf.variable_scope('pooling1_lrn') as scope:

        # 最大池化，3*3 的卷积核，2*2 的卷积

        pool1 = tf.nn.max_pool(conv1, ksize = [1,3,3,1], strides = [1,2,2,1],

                               padding = 'SAME', name='pool1')

        # 局部响应归一化

        norm1 = tf.nn.lrn(pool1, 4, bias = 1.0, alpha = 0.001/9.0,

                          beta = 0.75, name = 'norm1')

    with tf.variable_scope('conv2') as scope:

        weights = variables('weights', [5,5,64,64], 5e-2)

        conv = tf.nn.conv2d(norm1, weights, [1,1,1,1], padding = 'SAME')

        biases = variable_on_cpu('biases', [64])

        bias = tf.nn.bias_add(conv, biases)

        conv2 = tf.nn.relu(bias, name = scope.name)

        tf.histogram_summary(scope.name + '/activations', conv2)

    with tf.variable_scope('pooling2_lrn') as scope:

        norm2 = tf.nn.lrn(conv2, 4, bias = 1.0, alpha = 0.001/9.0,

                          beta = 0.75, name = 'norm1')

        pool2 = tf.nn.max_pool(norm2, ksize = [1,3,3,1], strides = [1,2,2,1],

                               padding = 'SAME', name='pool1')

    with tf.variable_scope('local3') as scope:

        # 第一层全连接

        reshape = tf.reshape(pool2, [BATCH_SIZE,-1])

        dim = reshape.get_shape()[1].value

        weights = variables('weights', shape=[dim,384], stddev=0.004)

        biases = variable_on_cpu('biases', [384])

        # ReLu 激活函数

        local3 = tf.nn.relu(tf.matmul(reshape, weights)+biases,

                            name = scope.name)

        # 柱状图总结 local3

        tf.histogram_summary(scope.name + '/activations', local3)

    with tf.variable_scope('local4') as scope:

        # 第二层全连接

        weights = variables('weights', shape=[384,192], stddev=0.004)

        biases = variable_on_cpu('biases', [192])

        local4 = tf.nn.relu(tf.matmul(local3, weights)+biases,

                            name = scope.name)

        tf.histogram_summary(scope.name + '/activations', local4)

    with tf.variable_scope('softmax_linear') as scope:

        # softmax 层，实际上不是严格的 softmax ，真正的 softmax 在损失层

        weights = variables('weights', [192, 10], stddev=1/192.0)

        biases = variable_on_cpu('biases', [10])

        softmax_linear = tf.add(tf.matmul(local4, weights), biases,

                                name = scope.name)

        tf.histogram_summary(scope.name + '/activations', softmax_linear)

    return softmax_linear

# 交叉熵损失层

def losses(logits, labels):

    with tf.variable_scope('loss') as scope:

        labels = tf.cast(labels, tf.int64)

        # 交叉熵损失，至于为什么是这个函数，后面会说明。

        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits\

                            (logits, labels, name='cross_entropy_per_example')

        loss = tf.reduce_mean(cross_entropy, name = 'loss')

        tf.scalar_summary(scope.name + '/x_entropy', loss)

    return loss

现在来看下为什么要用 tf.nn.sparse_softmax_cross_entropy_with_logits 这么长的一个函数，在官方文档中，一共有4中交叉熵损失函数：

1. tf.nn.sigmoid_cross_entropy_with_logits(logits, targets,name=None)

2. tf.nn.softmax_cross_entropy_with_logits(logits, labels,dim=-1, name=None)

3. tf.nn.sparse_softmax_cross_entropy_with_logits(logits,labels, name=None)

4. tf.nn.weighted_cross_entropy_with_logits(logits, targets,pos_weight, name=None)

分别来看一下：

1）第一个函数就是传统的 sigmoid 交叉熵，假设 x = logits, z = targets，那么第一个函数的交叉熵损失可以写作：

z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))

注意，sigmoid 用于二分类，logits 和 targets 维度要相同。

2）第二个函数是 softmax 交叉熵，用于多分类，并且类间相互独立，不能一个元素既属于这个类又属于那个类。并且，也是要求logits 和 targets 维度要相同。

例如，上面的 losses 代码中目标分为10类，logits 是 64*10 维度的，而 targets(也就是labels) 是 [64] 维度的，就不能用这个函数，要想使用这个函数，得把 labels 变成 64*10 的 onehot encoding (独热编码)，假设 labels 的 64 个值分别是：[1,5,2,3,0,4,9,8,7,5,6,4,5,8...]，那么 labels 变成独热编码以后，第一行变成：[0,1,0,0,0,0,0,0,0,0]，第二行变为：[0,0,0,0,0,1,0,0,0,0]，第三行：[0,0,1,0,0,0,0,0,0,0]，也就是：每行的第 label 个值变为1，其他是0，用代码可以如下写：

targets = np.zeros([64,10], dtype = np.float)

for index, value in enumerate(labels):

    targets[index, value] = 1.0

3）也就是我们所使用的函数，与第二个函数不同的一点是，不要求维度相同，只要求第 0 维相同，若 logits 是 64*10 维度的， targets(也就是labels) 是 [64] 维度的，那么第 0 个维度相同，就可以使用这个函数了，不需要进行 onehot encoding ，从上一篇文章我们所画出来的流程图可以明显看出来，loss 层的输入，一个是 64*10 维，一个是 64 维。并且这个函数，自带了 softmax 的计算，所以，在 inference 的最后一层，我们实际上计算的不是真正的 softmax。

4）和第一个函数差不多相同，只是可以加一个权重 pos_weight，假设 x = logits, z = targets, q = pos_weight，那么第四个函数的交叉熵损失为：

  q * z * -log(sigmoid(x)) + (1 - z) * -log(1 - sigmoid(x))

= q * z * -log(1 / (1 + exp(-x))) + (1 - z) * -log(exp(-x) / (1 + exp(-x)))

= q * z * log(1 + exp(-x)) + (1 - z) * (-log(exp(-x)) + log(1 + exp(-x)))

= q * z * log(1 + exp(-x)) + (1 - z) * (x + log(1 + exp(-x))

= (1 - z) * x + (qz +  1 - z) * log(1 + exp(-x))

= (1 - z) * x + (1 + (q - 1) * z) * log(1 + exp(-x))

参考文献：

1. https://www.tensorflow.org/api_docs/python/nn/classification

2. https://github.com/tensorflow/models/tree/master/tutorials/image/cifar10

TF Boys (TensorFlow Boys ) 养成记（五）： CIFAR10 Model 和 TensorFlow 的四种交叉熵介绍的更多相关文章

『TensorFlow』分类问题与两种交叉熵
关于categorical cross entropy 和 binary cross entropy的比较,差异一般体现在不同的分类(二分类.多分类等)任务目标,可以参考文章keras中两种交叉熵损失 ...
TF Boys (TensorFlow Boys ) 养成记（一）：TensorFlow 基本操作
本资料是在Ubuntu14.0.4版本下进行,用来进行图像处理,所以只介绍关于图像处理部分的内容,并且默认TensorFlow已经配置好,如果没有配置好,请参考官方文档配置安装,推荐用pip安装.关于 ...
TF Boys (TensorFlow Boys ) 养成记（二）： TensorFlow 数据读取
TensorFlow 的 How-Tos,讲解了这么几点: 1. 变量:创建,初始化,保存,加载,共享: 2. TensorFlow 的可视化学习,(r0.12版本后,加入了Embedding Vis ...
TF Boys (TensorFlow Boys ) 养成记（三）： TensorFlow 变量共享
上次说到了 TensorFlow 从文件读取数据,这次我们来谈一谈变量共享的问题. 为什么要共享变量?我举个简单的例子:例如,当我们研究生成对抗网络GAN的时候,判别器的任务是,如果接收到的是生成器生 ...
【转】TensorFlow四种Cross Entropy算法实现和应用
http://www.jianshu.com/p/75f7e60dae95 作者:陈迪豪来源:CSDNhttp://dataunion.org/26447.html 交叉熵介绍交叉熵(Cross ...
TF Boys (TensorFlow Boys ) 养成记（五）
有了数据,有了网络结构,下面我们就来写 cifar10 的代码. 首先处理输入,在 /home/your_name/TensorFlow/cifar10/ 下建立 cifar10_input.py,输 ...
TF Boys (TensorFlow Boys ) 养成记（六）： CIFAR10 Train 和 TensorBoard 简介
圣诞节玩的有点嗨,差点忘记更新.祝大家昨天圣诞节快乐,再过几天元旦节快乐. 来继续学习,在/home/your_name/TensorFlow/cifar10/ 下新建文件夹cifar10_train ...
TF Boys (TensorFlow Boys ) 养成记（四）：TensorFlow 简易 CIFAR10 分类网络
前面基本上把 TensorFlow 的在图像处理上的基础知识介绍完了,下面我们就用 TensorFlow 来搭建一个分类 cifar10 的神经网络. 首先准备数据: cifar10 的数据集共有 6 ...
TF Boys (TensorFlow Boys ) 养成记（一）
本资料是在Ubuntu14.0.4版本下进行,用来进行图像处理,所以只介绍关于图像处理部分的内容,并且默认TensorFlow已经配置好,如果没有配置好,请参考官方文档配置安装,推荐用pip安装.关于 ...

随机推荐

C++ cosnt的一点总结
1,C++在定义函数重载的时候形参不管是不是const的他们都是等价的,除非形参是const引用.举个例子: void fun(int a){...}与void fun(const int a){.. ...
MySql——事务控制语言（DTL）
什么是事务(控制台只能是内存的操作) 通常,在此之前,我们说,一条语句使用一个分号(;)来结束,并得到执行. 那么我们说,这个“一次性执行”的过程,可以称为“一个事务”. 简单来说,“一条sql语句, ...
jdk ssl证书
工具类MyBatisUtils创建SqlSessionFactory
package com.js.ai.modules.pointwall.interfac; import java.io.IOException; import java.io.InputStream ...
ISIS与OSPF的区别与联系
共同之处: 1 都是链路状态路由协议,都要求区域内的路由器交换链路状态信息,链路状态信息被收集到链路状态数据库中 2 都是用了一种实现路由选择信息交换相似机制 3 都在广播网络中选择指定路由器来控制扩 ...
yum ftp本地源
一. 准备工作1. 安装系统centos7.32. 环境 10.10.10.14 controller-1 10.10.10.15 computer-1 3. 在14主机上安装FTP服务yum ins ...
跟我一起学kafka(一)
从昨天下午接到新任务,要采集一个法院网站得所有公告,大概是需要采集这个网站得所有公告列表里得所有txt内容,txt文件里边是一件件赤裸裸得案件,记录这案由,原告被告等相关属性(不知道该叫什么就称之为属 ...
恒大威武！关于SQL的一些基础知识整理回顾
首先的首先,恒大威武! 开始正题. 关系代数: 目前主流的关系型数据库,是建立在关系代数的基础上的,即他的数学支撑是关系代数. 关系代数主要包括如下几个二目运算:并运算union.交运算interse ...
git submodule一些操作
checkout指定tag cd /path/to/yoursubmodule git checkout yourTag cd .. git add yoursubmodule git commit ...
呕心沥血Android studio使用JNI实例
发现网上很多JNI的使用教程,也很详细,不过有的地方有些缺漏,导致很多小问题难以解决的,今天就来总结一下. 准备工作:下载NDK. 简单的说,要用到C/C++,就要用NDK.直接百度搜索然后去官网下载 ...

TF Boys (TensorFlow Boys ) 养成记（五）： CIFAR10 Model 和 TensorFlow 的四种交叉熵介绍

TF Boys (TensorFlow Boys ) 养成记（五）： CIFAR10 Model 和 TensorFlow 的四种交叉熵介绍的更多相关文章

随机推荐

热门专题