tensorflow faster rcnn 代码分析一 demo.py

os.environ["CUDA_VISIBLE_DEVICES"]=2 # 设置使用的GPU

tfconfig=tf.ConfigProto(allow_soft_placement=True) # 如果分类的GPU没有，允许tf自动分配设备

tfconfig=tf.gpu_options.allow_growth=True # Gpu 按需增加

sess=tf.Session(config=tfconfig)

定义resnet 类

class resnetv1(Network):# 继承net 类

  def __init__(self, num_layers=50):

    Network.__init__(self)

    self._feat_stride = [16, ]

    self._feat_compress = [1. / float(self._feat_stride[0]), ]

    self._num_layers = num_layers

    self._scope = 'resnet_v1_%d' % num_layers

    self._decide_blocks()

net=resnetv1(num_layers=101)

调用Network的create_architecture 函数，得到网络的输出rois 和其prediction

def create_architecture(self, mode, num_classes, tag=None,  anchor_scales=(8, 16, 32), anchor_ratios=(0.5, 1, 2)):

    # 在demo 中mode 为“test” num_classes 分类个数，
    # 输入图像的占位
    self._image = tf.placeholder(tf.float32, shape=[1, None, None, 3])

    self._im_info = tf.placeholder(tf.float32, shape=[3])

    self._gt_boxes = tf.placeholder(tf.float32, shape=[None, 5])

    self._tag = tag

    self._num_classes = num_classes

    self._mode = mode

    self._anchor_scales = anchor_scales

    self._num_scales = len(anchor_scales)

    self._anchor_ratios = anchor_ratios

    self._num_ratios = len(anchor_ratios)

    self._num_anchors = self._num_scales * self._num_ratios

    # demo 中training 是false

    training = mode == 'TRAIN'

    testing = mode == 'TEST'

    assert tag != None

    # handle most of the regularizers here l2 正则项
    # contrib.layer 中定义好了卷积网络的结构

    weights_regularizer = tf.contrib.layers.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)

    if cfg.TRAIN.BIAS_DECAY:

      biases_regularizer = weights_regularizer

    else:

      biases_regularizer = tf.no_regularizer

    # list as many types of layers as possible, even if they are not used now
    # arg_scope 是slim 库中的函数，tf.contrib.slim.arg_scope(list_ops_or_scope, **kwargs) ，给list_ops_or_scope存储默认的参数

    with arg_scope([slim.conv2d, slim.conv2d_in_plane, \

                    slim.conv2d_transpose, slim.separable_conv2d, slim.fully_connected],

                    weights_regularizer=weights_regularizer,

                    biases_regularizer=biases_regularizer,

                    biases_initializer=tf.constant_initializer(0.0)):

      rois, cls_prob, bbox_pred = self._build_network(training) # _build_network 中对网络进行初始化，得到rois,cls_pred 等

    layers_to_output = {'rois': rois}

    for var in tf.trainable_variables():

      self._train_summaries.append(var)

    if testing:

      stds = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (self._num_classes))

      means = np.tile(np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (self._num_classes))

      self._predictions["bbox_pred"] *= stds

      self._predictions["bbox_pred"] += means

    else:

      self._add_losses()

      layers_to_output.update(self._losses)

      val_summaries = []

      with tf.device("/cpu:0"):

        val_summaries.append(self._add_gt_image_summary())

        for key, var in self._event_summaries.items():

          val_summaries.append(tf.summary.scalar(key, var))

        for key, var in self._score_summaries.items():

          self._add_score_summary(key, var)

        for var in self._act_summaries:

          self._add_act_summary(var)

        for var in self._train_summaries:

          self._add_train_summary(var)

      self._summary_op = tf.summary.merge_all()

      self._summary_op_val = tf.summary.merge(val_summaries)

    layers_to_output.update(self._predictions)

    return layers_to_output

　　给自定义的函数指定参数：

from tensorflow.contrib import framework

from tensorflow.contrib.framework.python.ops.arg_scope import add_args_scope

def wo(name,age):

     print(name,age)

with framework.arg_scope([wo],age=20):

      wo('cui')

　_build_network 返回网络的输出rois ，prediction，生成网络的整个结构

  def _build_network(self, is_training=True):

    # select initializers

    if cfg.TRAIN.TRUNCATED:

      initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)

      initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)

    else:

      initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)

      initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001)

    net_conv = self._image_to_head(is_training) # 在对应的resnet_v1 中实现, 定义了网络的前半部分特征提取部分

     # 分别region Proposals

    with tf.variable_scope(self._scope, self._scope):

      # build the anchors for the image

      self._anchor_component()

      # region proposal network

      rois = self._region_proposal(net_conv, is_training, initializer)

      # region of interest pooling

      if cfg.POOLING_MODE == 'crop':

        pool5 = self._crop_pool_layer(net_conv, rois, "pool5")

      else:

        raise NotImplementedError

    fc7 = self._head_to_tail(pool5, is_training)

    with tf.variable_scope(self._scope, self._scope):

      # region classification

      cls_prob, bbox_pred = self._region_classification(fc7, is_training,

                                                        initializer, initializer_bbox)

    self._score_summaries.update(self._predictions)

    return rois, cls_prob, bbox_pred

resnet_v1中_image_to_head 的实现，得到网络中在rpn 提取之前的部分。

  def _image_to_head(self, is_training, reuse=None):

    assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 3)

    # Now the base is always fixed during training
 

    with slim.arg_scope(resnet_arg_scope(is_training=False)):
 

      net_conv = self._build_base()

  # net_conv 网络的输入第一个conv

if cfg.RESNET.FIXED_BLOCKS > 0:

     # 其他层的初始化

      with slim.arg_scope(resnet_arg_scope(is_training=False)):
        # tensorflow.contrib.slim.python.slim.nets 中定义了resnet_v1

        net_conv, _ = resnet_v1.resnet_v1(net_conv,

                                           self._blocks[0:cfg.RESNET.FIXED_BLOCKS],

                                           global_pool=False,

                                           include_root_block=False,

                                           reuse=reuse,

                                           scope=self._scope)

    if cfg.RESNET.FIXED_BLOCKS < 3:

      with slim.arg_scope(resnet_arg_scope(is_training=is_training)):

        net_conv, _ = resnet_v1.resnet_v1(net_conv,

                                           self._blocks[cfg.RESNET.FIXED_BLOCKS:-1],

                                           global_pool=False,

                                           include_root_block=False,

                                           reuse=reuse,

                                           scope=self._scope)

    self._act_summaries.append(net_conv)

    self._layers['head'] = net_conv

得到anchors

def _anchor_component(self):

    with tf.variable_scope('ANCHOR_' + self._tag) as scope:

      # just to get the shape right， feat_stride 是特征图变化的尺度，将anchors变换为对应现在的特征的尺寸

       height = tf.to_int32(tf.ceil(self._im_info[0] / np.float32(self._feat_stride[0]))) width = tf.to_int32(tf.ceil(self._im_info[1] / np.float32(self._feat_stride[0]))) 
anchors, anchor_length = tf.py_func(generate_anchors_pre, [height, width, self._feat_stride, self._anchor_scales, self._anchor_ratios], [tf.float32, tf.int32], name="generate_anchors") 
anchors.set_shape([None, 4]) 
anchor_length.set_shape([]) 
self._anchors = anchors 
self._anchor_length = anchor_length

　　网络对每个anchor 进行分类，得到是否为object region

  def _region_proposal(self, net_conv, is_training, initializer):

    rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer,

                        scope="rpn_conv/3x3")   # 加上3*3的卷积层

    self._act_summaries.append(rpn)

    rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,

                                weights_initializer=initializer,

                                padding='VALID', activation_fn=None, scope='rpn_cls_score') # 对rpn 进行分类，加上一个1*1的卷积层，num_anchors*2,每个anchor 有两种分类类别

    # change it so that the score has 2 as its channel size

    rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')# 此时feature  map 中的每个点对应一个可以产生9个anchor的区域

    rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")

    rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred")

    rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob")

    rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training,

                                weights_initializer=initializer,

                                padding='VALID', activation_fn=None, scope='rpn_bbox_pred')# rpn 进行位置定位。

    if is_training:

      rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")

      rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")

      # Try to have a deterministic order for the computing graph, for reproducibility

      with tf.control_dependencies([rpn_labels]):

        rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")

    else:

      if cfg.TEST.MODE == 'nms':

        rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")

      elif cfg.TEST.MODE == 'top':

        rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")

      else:

        raise NotImplementedError

    self._predictions["rpn_cls_score"] = rpn_cls_score

    self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape

    self._predictions["rpn_cls_prob"] = rpn_cls_prob

    self._predictions["rpn_cls_pred"] = rpn_cls_pred

    self._predictions["rpn_bbox_pred"] = rpn_bbox_pred

    self._predictions["rois"] = rois

    return rois

tensorflow faster rcnn 代码分析一 demo.py的更多相关文章

Faster RCNN原理分析（二）：Region Proposal Networks详解
Faster RCNN原理分析(二):Region Proposal Networks详解 http://lib.csdn.net/article/deeplearning/61641 0814: A ...
Faster RCNN代码理解（Python）
转自http://www.infocool.net/kb/Python/201611/209696.html#原文地址第一步,准备从train_faster_rcnn_alt_opt.py入: 初 ...
python3 + Tensorflow + Faster R-CNN训练自己的数据
之前实现过faster rcnn, 但是因为各种原因,有需要实现一次,而且发现许多博客都不全面.现在发现了一个比较全面的博客.自己根据这篇博客实现的也比较顺利.在此记录一下(照搬). 原博客:http ...
Faster rcnn代码理解（1）
这段时间看了不少论文,回头看看,感觉还是有必要将Faster rcnn的源码理解一下,毕竟后来很多方法都和它有相近之处,同时理解该框架也有助于以后自己修改和编写自己的框架.好的开始吧- 这里我们跟着F ...
Tensorflow样例代码分析cifar10
github地址:https://github.com/tensorflow/models.git 本文分析tutorial/image/cifar10教程项目的cifar10_input.py代码. ...
Tensorflow faster rcnn系列一
注意:本文主要是学习用,发现了一个在faster rcnn训练流程写的比较详细的博客. 大部分内容来自以下博客连接:https://blog.csdn.net/weixin_37203756/arti ...
Faster R-CNN代码例子
主要参考文章:1,从编程实现角度学习Faster R-CNN(附极简实现) 经常是做到一半发现收敛情况不理想,然后又回去看看这篇文章的细节. 另外两篇: 2,Faster R-CNN学习总结 ...
Faster rcnn代码理解（4）
上一篇我们说完了AnchorTargetLayer层,然后我将Faster rcnn中的其他层看了,这里把ROIPoolingLayer层说一下: 我先说一下它的实现原理:RPN生成的roi区域大小是 ...
Faster rcnn代码理解（2）
接着上篇的博客,咱们继续看一下Faster RCNN的代码- 上次大致讲完了Faster rcnn在训练时是如何获取imdb和roidb文件的,主要都在train_rpn()的get_roidb()函 ...

随机推荐

NOIP引水入城（dfs）
为了使居民们都尽可能饮用到清澈的湖水,现在要在某些城市建造水利设施.水利设施有两种,分别为蓄水厂和输水站.蓄水厂的功能是利用水泵将湖泊中的水抽取到所在城市的蓄水池中. 因此,只有与湖泊毗邻的第1 行的 ...
centos7修改默认网卡名称
问题场景: 使用centos7有好一阵子了,安装过centos7的朋友都会发现网卡命名跟6.x系统的不一样,类似ifcfg-eno16780032, ens192,或者enp2s0等其他不习惯的.不容 ...
Linux下启动,停止,重启Nginx、Mysql、PHP
LINUX启动Nginx的命令: 一.查询是否启动 [root@jiang php-fpm.d]# ps -ef | grep nginx root 25225 1 0 19:26 ? 00:00:0 ...
如何优雅的解决mac安装zsh不执行.bash_profile
最近刚刚重装了系统,并安装了优雅的shell命令工具zsh,突然发现我放在我的工作目录下的.bash_profile居然在启动的时候执行,导致我的java的一些配置没有注册到bash中.然后查资料得知 ...
Mybatis中org.apache.ibatis.binding.BindingException错误问题总结
1. Mybatis出现多个参数,但是多个参数中没有使用@Param注解进行修饰 2. Xml文件中字段名和PO绑定时候,字段写错了 3.XML中<foreach/>标签中的colleac ...
收集JavaScript中常用的方法函数
本文中,收集了一些比较常用的Javascript函数,希望对学习JS的朋友们有所帮助. 1. 字符串长度截取 function cutstr(str, len) { var temp, icount ...
Django 子程序
在Web应用中,通常有一些业务功能模块是在不同的项目中都可以复用的,故在开发中通常将工程项目拆分为不同的子功能模块,各功能模块间可以保持相对的独立,在其他工程项目中需要用到某个特定功能模块时,可以将该 ...
浅谈CSRF（Cross-site request forgery）跨站请求伪造（写的非常好）
一 CSRF是什么 CSRF(Cross-site request forgery)跨站请求伪造,也被称为“One Click Attack”或者Session Riding,通常缩写为CSRF或者X ...
斯坦福大学公开课机器学习： advice for applying machine learning - evaluatin a phpothesis（怎么评估学习算法得到的假设以及如何防止过拟合或欠拟合）
怎样评价我们的学习算法得到的假设以及如何防止过拟合和欠拟合的问题. 当我们确定学习算法的参数时,我们考虑的是选择参数来使训练误差最小化.有人认为,得到一个很小的训练误差一定是一件好事.但其实,仅仅是因 ...
ElasticSearch6.5.0 【字段类型】
字符串类型 text 适合全文索引,有分析的过程 keyword 适合结构化的数据,比如地址.电话号码... 数字 long [带符号64位整数]范围:-263 ~ 263-1 integer ...

tensorflow faster rcnn 代码分析一 demo.py

tensorflow faster rcnn 代码分析一 demo.py的更多相关文章

随机推荐

热门专题