keras损失函数详解

以下信息均来自官网

------------------------------------------------------------------------------------------------------------

损失函数的使用

损失函数（或称目标函数、优化评分函数）是编译模型时所需的两个参数之一：

model.compile(loss='mean_squared_error', optimizer='sgd')

from keras import losses

model.compile(loss=losses.mean_squared_error, optimizer='sgd')

你可以传递一个现有的损失函数名，或者一个 TensorFlow/Theano 符号函数。该符号函数为每个数据点返回一个标量，有以下两个参数:

y_true: 真实标签。TensorFlow/Theano 张量。
y_pred: 预测值。TensorFlow/Theano 张量，其 shape 与 y_true 相同。

实际的优化目标是所有数据点的输出数组的平均值。

可用损失函数

mean_squared_error

mean_squared_error(y_true, y_pred)

mean_absolute_error

mean_absolute_error(y_true, y_pred)

mean_absolute_percentage_error

mean_absolute_percentage_error(y_true, y_pred)

mean_squared_logarithmic_error

mean_squared_logarithmic_error(y_true, y_pred)

squared_hinge

squared_hinge(y_true, y_pred)

hinge

hinge(y_true, y_pred)

categorical_hinge

categorical_hinge(y_true, y_pred)

logcosh

logcosh(y_true, y_pred)

预测误差的双曲余弦的对数。

对于小的 x，log(cosh(x)) 近似等于 (x ** 2) / 2。对于大的 x，近似于 abs(x) - log(2)。这表示 'logcosh' 与均方误差大致相同，但是不会受到偶尔疯狂的错误预测的强烈影响。

参数

y_true: 目标真实值的张量。
y_pred: 目标预测值的张量。

每个样本都有一个标量损失的张量。

categorical_crossentropy

categorical_crossentropy(y_true, y_pred)

sparse_categorical_crossentropy

sparse_categorical_crossentropy(y_true, y_pred)

binary_crossentropy

binary_crossentropy(y_true, y_pred)

kullback_leibler_divergence

kullback_leibler_divergence(y_true, y_pred)

poisson

poisson(y_true, y_pred)

cosine_proximity

cosine_proximity(y_true, y_pred)

注意: 当使用 categorical_crossentropy 损失时，你的目标值应该是分类格式 (即，如果你有 10 个类，每个样本的目标值应该是一个 10 维的向量，这个向量除了表示类别的那个索引为 1，其他均为 0)。为了将 整数目标值 转换为 分类目标值，你可以使用 Keras 实用函数 to_categorical：

from keras.utils.np_utils import to_categorical

categorical_labels = to_categorical(int_labels, num_classes=None)

如果还不明白，请看下面的源码

 """Built-in loss functions.

 """

 from __future__ import absolute_import

 from __future__ import division

 from __future__ import print_function

 import six

 from . import backend as K

 from .utils.generic_utils import deserialize_keras_object

 from .utils.generic_utils import serialize_keras_object

 def mean_squared_error(y_true, y_pred):

     return K.mean(K.square(y_pred - y_true), axis=-1)

 def mean_absolute_error(y_true, y_pred):

     return K.mean(K.abs(y_pred - y_true), axis=-1)

 def mean_absolute_percentage_error(y_true, y_pred):

     diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true),

                                             K.epsilon(),

                                             None))

     return 100. * K.mean(diff, axis=-1)

 def mean_squared_logarithmic_error(y_true, y_pred):

     first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.)

     second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.)

     return K.mean(K.square(first_log - second_log), axis=-1)

 def squared_hinge(y_true, y_pred):

     return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1)

 def hinge(y_true, y_pred):

     return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1)

 def categorical_hinge(y_true, y_pred):

     pos = K.sum(y_true * y_pred, axis=-1)

     neg = K.max((1. - y_true) * y_pred, axis=-1)

     return K.maximum(0., neg - pos + 1.)

 def logcosh(y_true, y_pred):

     """Logarithm of the hyperbolic cosine of the prediction error.

     `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and

     to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly

     like the mean squared error, but will not be so strongly affected by the

     occasional wildly incorrect prediction.

     # Arguments

         y_true: tensor of true targets.

         y_pred: tensor of predicted targets.

     # Returns

         Tensor with one scalar loss entry per sample.

     """

     def _logcosh(x):

         return x + K.softplus(-2. * x) - K.log(2.)

     return K.mean(_logcosh(y_pred - y_true), axis=-1)

 def categorical_crossentropy(y_true, y_pred):

     return K.categorical_crossentropy(y_true, y_pred)

 def sparse_categorical_crossentropy(y_true, y_pred):

     return K.sparse_categorical_crossentropy(y_true, y_pred)

 def binary_crossentropy(y_true, y_pred):

     return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1)

 def kullback_leibler_divergence(y_true, y_pred):

     y_true = K.clip(y_true, K.epsilon(), 1)

     y_pred = K.clip(y_pred, K.epsilon(), 1)

     return K.sum(y_true * K.log(y_true / y_pred), axis=-1)

 def poisson(y_true, y_pred):

     return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1)

 def cosine_proximity(y_true, y_pred):

     y_true = K.l2_normalize(y_true, axis=-1)

     y_pred = K.l2_normalize(y_pred, axis=-1)

     return -K.sum(y_true * y_pred, axis=-1)

 # Aliases.

 mse = MSE = mean_squared_error

 mae = MAE = mean_absolute_error

 mape = MAPE = mean_absolute_percentage_error

 msle = MSLE = mean_squared_logarithmic_error

 kld = KLD = kullback_leibler_divergence

 cosine = cosine_proximity

 def serialize(loss):

     return serialize_keras_object(loss)

 def deserialize(name, custom_objects=None):

     return deserialize_keras_object(name,

                                     module_objects=globals(),

                                     custom_objects=custom_objects,

                                     printable_module_name='loss function')

 def get(identifier):

     """Get the `identifier` loss function.

     # Arguments

         identifier: None or str, name of the function.

     # Returns

         The loss function or None if `identifier` is None.

     # Raises

         ValueError if unknown identifier.

     """

     if identifier is None:

         return None

     if isinstance(identifier, six.string_types):

         identifier = str(identifier)

         return deserialize(identifier)

     if isinstance(identifier, dict):

         return deserialize(identifier)

     elif callable(identifier):

         return identifier

     else:

         raise ValueError('Could not interpret '

                          'loss function identifier:', identifier)