GraphSAGE 代码解析(三)

原创文章～转载请注明出处哦。其他部分内容参见以下链接～

1. class MeanAggregator(Layer):

该类主要用于实现

1. init()

__init_() 用于获取并初始化成员变量 dropout, bias(False), act(ReLu), concat(False), input_dim, output_dim, name(Variable scopr)

用glorot()方法初始化节点v的权值矩阵 vars['self_weights'] 和邻居节点均值u的权值矩阵 vars['neigh_weights']

用零向量初始化vars['bias']。(见inits.py: zeros(shape))

若logging为True,则调用 layers.py 中 class Layer()的成员函数_log_vars(), 生成vars中各个变量的直方图。

glorot()

其中，glorot() 在inits.py中定义，用于权值初始化。(from .inits import glorot)

均匀分布初始化方法，又称Xavier均匀初始化，参数从 [-limit, limit] 的均匀分布产生，其中limit为 sqrt(6 / (fan_in + fan_out))。fan_in为权值张量的输入单元数，fan_out是权重张量的输出单元数。该函数返回 [fan_in, fan_out]大小的Variable。

 def glorot(shape, name=None):

     """Glorot & Bengio (AISTATS 2010) init."""

     init_range = np.sqrt(6.0/(shape[0]+shape[1]))

     initial = tf.random_uniform(shape, minval=-init_range, maxval=init_range, dtype=tf.float32)

     return tf.Variable(initial, name=name)

2. _call(inputs)

class MeanAggregator(Layer) 中的 _call(inputs) 函数是对父类class Layer(object)方法_call(inputs)的重写。

用于实现最上方的迭代更新式子。

在layer.py 中定义的 class Layer(object)中，执行特殊函数def __call__(inputs) 时有： outputs = self._call(inputs)调用_call(inputs) 方法，也即在这里调用子类MeanAggregator(Layer)中的_call(inputs)方法。

tf.nn.dropout(x, keep_prob, noise_shape=None, seed=None, name=None)

With probability keep_prob, outputs the input element scaled up by 1 / keep_prob, otherwise outputs 0. The scaling is so that the expected sum is unchanged.

注意：输出的非0元素是原来的 “1/keep_prob” 倍，以保证总和不变。

tf.add_n(inputs, name=None)

Adds all input tensors element-wise.

Args:

inputs: A list of Tensor or IndexedSlices objects, each with same shape and type.

name: A name for the operation (optional).

Returns:

A Tensor of same shape and type as the elements of inputs.

Raises:

ValueError: If inputs don't all have same shape and dtype or the shape cannot be inferred.

output = tf.concat([from_self, from_neighs], axis=1)

这里注意在concat后其维数变为之前的2倍。

3. class MeanAggregator(Layer) 代码

 class MeanAggregator(Layer):

     """

     Aggregates via mean followed by matmul and non-linearity.

     """

     def __init__(self, input_dim, output_dim, neigh_input_dim=None,

             dropout=0., bias=False, act=tf.nn.relu,

             name=None, concat=False, **kwargs):

         super(MeanAggregator, self).__init__(**kwargs)

         self.dropout = dropout

         self.bias = bias

         self.act = act

         self.concat = concat

         if neigh_input_dim is None:

             neigh_input_dim = input_dim

         if name is not None:

             name = '/' + name

         else:

             name = ''

         with tf.variable_scope(self.name + name + '_vars'):

             self.vars['neigh_weights'] = glorot([neigh_input_dim, output_dim],

                                                         name='neigh_weights')

             self.vars['self_weights'] = glorot([input_dim, output_dim],

                                                         name='self_weights')

             if self.bias:

                 self.vars['bias'] = zeros([self.output_dim], name='bias')

         if self.logging:

             self._log_vars()

         self.input_dim = input_dim

         self.output_dim = output_dim

     def _call(self, inputs):

         self_vecs, neigh_vecs = inputs

         neigh_vecs = tf.nn.dropout(neigh_vecs, 1-self.dropout)

         self_vecs = tf.nn.dropout(self_vecs, 1-self.dropout)

         neigh_means = tf.reduce_mean(neigh_vecs, axis=1)

         # [nodes] x [out_dim]

         from_neighs = tf.matmul(neigh_means, self.vars['neigh_weights'])

         from_self = tf.matmul(self_vecs, self.vars["self_weights"])

         if not self.concat:

             output = tf.add_n([from_self, from_neighs])

         else:

             output = tf.concat([from_self, from_neighs], axis=1)

         # bias

         if self.bias:

             output += self.vars['bias']

         return self.act(output)

2. class GCNAggregator(Layer)

这里__init__()与MeanAggregator基本相同，在_call()的实现中略有不同。

 def _call(self, inputs):

     self_vecs, neigh_vecs = inputs

     neigh_vecs = tf.nn.dropout(neigh_vecs, 1-self.dropout)

     self_vecs = tf.nn.dropout(self_vecs, 1-self.dropout)

     means = tf.reduce_mean(tf.concat([neigh_vecs,

         tf.expand_dims(self_vecs, axis=1)], axis=1), axis=1)

     # [nodes] x [out_dim]

     output = tf.matmul(means, self.vars['weights'])

     # bias

     if self.bias:

         output += self.vars['bias']

     return self.act(output)

其中对means求解时，

1. 先将self_vecs行列转换(tf.expand_dims(self_vecs, axis=1)),

2. 之后self_vecs的行数与neigh_vecs行数相同时，将二者concat, 即相当于在原先的neigh_vecs矩阵后面新增一列self_vecs的转置

3. 最后将得到的矩阵每行求均值，即得means.

之后means与权值矩阵vars['weights']求内积，并加上vars['bias'], 最终将该值带入激活函数(ReLu)。

下面举个例子简单说明(例子中省略了点乘W的操作)：

 import tensorflow as tf

 neigh_vecs = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]

 self_vecs = [2, 3, 4]

 means = tf.reduce_mean(tf.concat([neigh_vecs,

                                   tf.expand_dims(self_vecs, axis=1)], axis=1), axis=1)

 print(tf.shape(self_vecs))

 print(tf.expand_dims(self_vecs, axis=0))

 # Tensor("ExpandDims_1:0", shape=(1, 3), dtype=int32)

 print(tf.expand_dims(self_vecs, axis=1))

 # Tensor("ExpandDims_2:0", shape=(3, 1), dtype=int32)

 sess = tf.Session()

 print(sess.run(tf.expand_dims(self_vecs, axis=1)))

 # [[2]

 #  [3]

 #  [4]]

 print(sess.run(tf.concat([neigh_vecs,

                           tf.expand_dims(self_vecs, axis=1)], axis=1)))

 # [[1 2 3 2]

 #  [4 5 6 3]

 #  [7 8 9 4]]

 print(means)

 # Tensor("Mean:0", shape=(3,), dtype=int32)

 print(sess.run(tf.reduce_mean(tf.concat([neigh_vecs,

                                          tf.expand_dims(self_vecs, axis=1)], axis=1), axis=1)))

 # [2 4 7]

 # [[1 2 3 2]   = 8 // 4  = 2

 #  [4 5 6 3]   = 18 // 4 = 4

 #  [7 8 9 4]]  = 28 // 4 = 7

 bias = [1]

 output = means + bias

 print(sess.run(output))

 # [3 5 8]

 # [2 + 1, 4 + 1, 7 + 1] = [3, 5, 8]