实现方式

以 ℓ2 Regularization 为例，主要有两种实现方式

1. 手动累加

with tf.name_scope('loss'):

    loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot

    l2_reg_loss = tf.constant(0.0, tf.float32)

    for vv in tf.trainable_variables():

        if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name \

                or 'batch_normalization' in vv.name or 'gn' in vv.name:

            continue

        else:

            l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))

    l2_reg_loss *= 0.001

    loss = loss + l2_reg_loss

2. 借助于 kernel_regularizer

with tf.name_scope('dnn'):

    hidden1 = tf.layers.dense(X, 300, kernel_initializer=he_init, name='hidden1',

                              kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))

    ......

with tf.name_scope('loss'):

    loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot

    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)

    loss = tf.add_n([loss] + reg_losses)

实例验证

import tensorflow as tf

# 1. create data

from tensorflow.examples.tutorials.mnist import input_data

mnist = input_data.read_data_sets('../MNIST_data', one_hot=True)

X = tf.placeholder(tf.float32, shape=(None, 784), name='X')

y = tf.placeholder(tf.int32, shape=(None), name='y')

is_training = tf.placeholder(tf.bool, None, name='is_training')

# 2. define network

he_init = tf.contrib.layers.variance_scaling_initializer()

with tf.name_scope('dnn'):

    hidden1 = tf.layers.dense(X, 300, kernel_initializer=he_init, name='hidden1',

                              kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))

    hidden1 = tf.layers.batch_normalization(hidden1, momentum=0.9)

    hidden1 = tf.nn.relu(hidden1)

    hidden2 = tf.layers.dense(hidden1, 100, kernel_initializer=he_init, name='hidden2',

                              kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))

    hidden2 = tf.layers.batch_normalization(hidden2, training=is_training, momentum=0.9)

    hidden2 = tf.nn.relu(hidden2)

    logits = tf.layers.dense(hidden2, 10, kernel_initializer=he_init, name='output',

                             kernel_regularizer=tf.contrib.layers.l2_regularizer(0.001))

# 3. define loss

with tf.name_scope('loss'):

    loss = tf.losses.softmax_cross_entropy(onehot_labels=y, logits=logits) # label is one_hot

    # =================

    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)

    reg_loss = tf.reduce_sum(reg_losses)

    # loss = tf.add_n([loss] + reg_losses)

    # =================

    l2_reg_loss = tf.constant(0.0, tf.float32)

    for vv in tf.trainable_variables():

        if 'bn' in vv.name or 'batchnorm' in vv.name or 'batch_norm' in vv.name \

                or 'batch_normalization' in vv.name or 'gn' in vv.name:

            continue

        else:

            l2_reg_loss = tf.add(l2_reg_loss, tf.nn.l2_loss(vv))

    l2_reg_loss *= 0.001

    # loss = loss + l2_reg_loss

    # =================

# 4. define optimizer

learning_rate_init = 0.01

global_step = tf.Variable(0, trainable=False)

with tf.name_scope('train'):

    learning_rate = tf.train.polynomial_decay(  # 多项式衰减

        learning_rate=learning_rate_init,  # 初始学习率

        global_step=global_step,  # 当前迭代次数

        decay_steps=22000,  # 在迭代到该次数实际，学习率衰减为 learning_rate * dacay_rate

        end_learning_rate=learning_rate_init / 10,  # 最小的学习率

        power=0.9,

        cycle=False

    )

    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)  # for batch normalization

    with tf.control_dependencies(update_ops):

        optimizer_op = tf.train.MomentumOptimizer(

            learning_rate=learning_rate, momentum=0.9).minimize(

            loss=loss,

            var_list=tf.trainable_variables(),

            global_step=global_step # 不指定的话学习率不更新

        )

with tf.name_scope('eval'):

    correct = tf.nn.in_top_k(logits, tf.argmax(y, axis=1), 1) # 目标是否在前K个预测中, label's dtype is int*

    accuracy = tf.reduce_mean(tf.cast(correct, tf.float32))

# 5. initialize

init_op = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer())

saver = tf.train.Saver()

# 5. train & test

n_epochs = 1

batch_size = 55000

with tf.Session() as sess:

    saver.restore(sess, './my_model_final.ckpt')

    for epoch in range(n_epochs):

        for iteration in range(mnist.train.num_examples // batch_size):

            X_batch, y_batch = mnist.train.next_batch(batch_size)

            loss_, l2_reg_loss_, reg_loss_ = sess.run([loss, l2_reg_loss, reg_loss], feed_dict={X: X_batch, y: y_batch, is_training:True})

        acc_train = accuracy.eval(feed_dict={X: X_batch, y: y_batch, is_training:False}) # 最后一个 batch 的 accuracy

        acc_test = accuracy.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})

        loss_test = loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})

        l2_reg_loss_test = l2_reg_loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})

        reg_loss_test = reg_loss.eval(feed_dict={X: mnist.test.images, y: mnist.test.labels, is_training:False})

        print("Train loss:", loss_, "Train l2_reg_loss:", l2_reg_loss_, "Train reg_loss:", reg_loss_, "Train accuracy:", acc_train)

        print("Test loss:", loss_test, "Test l2_reg_loss:", l2_reg_loss_test, "Test reg_loss:", reg_loss_test, "Test accuracy:", acc_test)

"""

# =================

Train loss: 0.000636433 Train l2_reg_loss: 0.48696715 Train reg_loss: 0.48683384 Train accuracy: 1.0

Test loss: 0.059231624 Test l2_reg_loss: 0.48696715 Test reg_loss: 0.48683384 Test accuracy: 0.983

"""

TensorFlow使用记录 (十二）： ℓ1 and ℓ2 Regularization的更多相关文章

Spring学习记录(十二)---AOP理解和基于注解配置
Spring核心之二:AOP(Aspect Oriented Programming) --- 面向切面编程,通过预编译方式和运行期动态代理实现程序功能的统一维护的一种技术.AOP是OOP的延续,是软 ...
TensorFlow使用记录 (十四）： Multi-task to MNIST + Fashion MNIST
前言后面工作中有个较重要的 task 是将 YOLOV3 目标检测和 LanNet 车道线检测和到一个网络中训练,特别的是,这两部分数据来自于不同的数据源.这和我之前在 caffe 环境下训练检测整 ...
TensorFlow使用记录 (十）： Pretraining
上一篇的模型保存和恢复熟练后,我们就可以大量使用 pretrain model 来训练任务了 Tweaking, Dropping, or Replacing the Upper Layers The ...
TensorFlow 学习（十二）—— 高级函数
tf.map_fn(fn, elems):接受一个函数对象,然后用该函数对象对集合(elems)中的每一个元素分别处理, def preprocessing_image(image, training ...
第十二篇 Integration Services：高级日志记录
本篇文章是Integration Services系列的第十二篇,详细内容请参考原文. 简介在前一篇文章我们配置了SSIS内置日志记录,演示了简单和高级日志配置,保存并查看日志配置,生成自定义日志消息 ...
【译】第十二篇 Integration Services：高级日志记录
本篇文章是Integration Services系列的第十二篇,详细内容请参考原文. 简介在前一篇文章我们配置了SSIS内置日志记录,演示了简单和高级日志配置,保存并查看日志配置,生成自定义日志消息 ...
Tensorflow深度学习之十二：基础图像处理之二
Tensorflow深度学习之十二:基础图像处理之二 from:https://blog.csdn.net/davincil/article/details/76598474 首先放出原始图像: ...
我的MYSQL学习心得（十二）触发器
我的MYSQL学习心得(十二) 触发器我的MYSQL学习心得(一) 简单语法我的MYSQL学习心得(二) 数据类型宽度我的MYSQL学习心得(三) 查看字段长度我的MYSQL学习心得(四) 数 ...
解剖SQLSERVER 第十二篇 OrcaMDF 行压缩支持（译）
解剖SQLSERVER 第十二篇 OrcaMDF 行压缩支持(译) http://improve.dk/orcamdf-row-compression-support/ 在这两个月的断断续续的开发 ...

随机推荐

Java EE javax.servlet中的ServletConfig接口
ServletConfig接口 public interface ServletConfig 实现类:GenericServlet.HttpServlet 一.介绍一个供servlet容器使用配置对 ...
转SSL/TLS协议
TLS名为传输层安全协议(Transport Layer Protocol),这个协议是一套加密的通信协议.它的前身是SSL协议(安全套接层协议,Secure Sockets Layer).这两个协议 ...
postgresql11解压版安装windows
一.准备安装包下载地址:https://www.postgresql.org/download/windows/ 二.创建data目录(用于存储数据) 三.进入bin目录执行命令..初始化数据库并设 ...
SpringBoot整合MyBatis的分页插件PageHelper
1.导入依赖(maven) <dependency> <groupId>com.github.pagehelper</groupId> <artifactId ...
qt tableview使用
Qt::CheckState checkSibling(QStandardItem * item); void treeItem_checkAllChild(QStandardItem * item, ...
The method getContextPath() from the type HttpServletRequest
在做java项目开发的时候,jsp页面很容易报出这个错误. 错误的原因很多,但是都和JRE有关. 一般在导入项目的时候容易报出这个错误,主要因为JRE(jdk版本不一致). 解决方法:就是重新配置路径 ...
Oracle的FIXED
今天发现一个有意思的问题,我们知道,在Oracle数据库中正常执行 select sysdate from dual 都可以返回当前主机的系统时间.正常修改系统时间,对应的查询结果也会变成修改后的系统 ...
java压缩下载图片并以zip流的形式下载到客户端
/** * * @param page * @param rows * @param works * @return * @author ffwwzz 下载zip * @throws IOExcept ...
Vim中复制粘贴缩进错乱问题的解决方案
Vim中复制粘贴缩进错乱问题的解决方案当你把这段缩进优美的代码直接ctrl+c,ctrl+v到Vim的时候,就会出现如下恶心的情况可以看到,这种直接粘贴的方式会导致代码丢失和缩进错乱等情况. 解决 ...
python之时间日期datetime
相比于time模块,datetime模块的接口则更直观.更容易调用datetime模块定义了以下几个类: datetime.date():表示日期的类.常用的属性是year,month,day:dat ...

TensorFlow使用记录 (十二）： ℓ1 and ℓ2 Regularization

实现方式

实例验证

TensorFlow使用记录 (十二）： ℓ1 and ℓ2 Regularization的更多相关文章

随机推荐

热门专题