2019-02-18,15点00
'''
下面是别人写的原始的笔记,我在上面自己补充了一些. '''
#https://www.cnblogs.com/the-home-of-123/p/9747963.html # 以voc数据集为例,按照imdb的命名,利用pascal_voc()函数生成不同的imdb ''' for year in ['2007', '2012']:
for split in ['train', 'val', 'trainval', 'test']:
name = 'voc_{}_{}'.format(year, split) #year='2007', split='trainval'
__sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) def get_imdb(name):
"""Get an imdb (image database) by name."""
if name not in __sets:
raise KeyError('Unknown dataset: {}'.format(name))
return __sets[name]() ''' # self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year) #数据库路径
# self._classes = ('__background__', # always index 0, 训练类别标签,包含背景类
# 'person')
# # Default to roidb handler
# self._roidb_handler = self.gt_roidb #感兴趣区域(ROI)数据库
# self._salt = str(uuid.uuid4()) #??
# self._comp_id = 'comp4' # ?? def _build_network(self, is_training=True):
# select initializers进行初始化
if cfg.TRAIN.TRUNCATED:
initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001)
else:
initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01)
initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net_conv = self._image_to_head(is_training)##经过特征提取网络,初步提取特征
with tf.variable_scope(self._scope, self._scope):
# build the anchors for the image
self._anchor_component()###产生anchor
# region proposal network ###产生proposal的坐标
rois = self._region_proposal(net_conv, is_training, initializer)
#这里面rois表示的是那些非背景的区域对应到feature_map上的坐标组成的数组.
'''
上面一行的代码是和兴!!
'''
# region of interest pooling
if cfg.POOLING_MODE == 'crop':
pool5 = self._crop_pool_layer(net_conv, rois, "pool5") ###对产生的porposal进行ROI池化,统一格式
else:
raise NotImplementedError
'''
这里面得到的pool5就是把rois
''' fc7 = self._head_to_tail(pool5, is_training)
with tf.variable_scope(self._scope, self._scope):
# region classification 输入到Fast-RCNN网络中,对样本进行分类和预测框回归
cls_prob, bbox_pred = self._region_classification(fc7, is_training,
initializer, initializer_bbox) '''
利用self._region_classification 里面的fc 层和softmax层输出 cls_prob, bbox_pred.得到最总的预测结果.
'''
self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred '''
下面是上面说的核心代码的分析
''' def _region_proposal(self, net_conv, is_training, initializer):
rpn = slim.conv2d(net_conv, cfg.RPN_CHANNELS, [3, 3], trainable=is_training, weights_initializer=initializer,
scope="rpn_conv/3x3") ##经过一个3X3卷积,之后分两条线
self._act_summaries.append(rpn) '''
下面一行的代码就是唐老师说的最精髓的地方.
anchors这些概念都是虚拟的.其实都没有.都是通过学习得到的. 输出的维度是self._num_anchors * 2, 每2个数表示一种anchor对应的得分.至于到底哪个数对应哪个anchor,
不用指明,这些完全是通过学习获得的.这样避免人工干预,效果更好.更加end_to_end. 从这里面kernal=[1*1]就表示每一个像素点对应9个anchor! ''' rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training,
weights_initializer=initializer,
padding='VALID', activation_fn=None, scope='rpn_cls_score') ###第一条线产生预测类别确定是背景还是类别 '''
我纳闷的地方是这里面得到的rpn_cls_score:(1,height,width,18) 表示的是9个框的分数.而表示不了各个分类的分数
那么后面的nms怎么做? 其实这个地方只是对是否是背景做nms
''' # change it so that the score has 2 as its channel size
rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape')
rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape")
rpn_cls_pred = tf.argmax(tf.reshape(rpn_cls_score_reshape, [-1, 2]), axis=1, name="rpn_cls_pred") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, ###第二条线产生预测框坐标,对预测框坐标进行预测
weights_initializer=initializer,
padding='VALID', activation_fn=None, scope='rpn_bbox_pred')
if is_training:
rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") ###根据预测的类别和预测框坐标对porposa进行筛选,对前N个进行NMS,这里面nms只是找这些框,那些不是背景的得分高.把那些是背景概率高的框去掉.
rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor")
# Try to have a deterministic order for the computing graph, for reproducibility
with tf.control_dependencies([rpn_labels]):
rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois")
else:
if cfg.TEST.MODE == 'nms':
rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
elif cfg.TEST.MODE == 'top':
rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois")
else:
raise NotImplementedError self._predictions["rpn_cls_score"] = rpn_cls_score
self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape
self._predictions["rpn_cls_prob"] = rpn_cls_prob
self._predictions["rpn_cls_pred"] = rpn_cls_pred
self._predictions["rpn_bbox_pred"] = rpn_bbox_pred
self._predictions["rois"] = rois return rois def _crop_pool_layer(self, bottom, rois, name): ####bottom为convert层卷积输出---也就是特征图, feat_stride为补偿乘积,用来求得原图的w,h.rois为选出的256个anchor的坐标,这些坐标是特征图上的坐标.
'''
结果就是在特征图上,把rois这些子图都扣出来.然后按照比例反映射到原始input_image里面的部分.
类似感受眼这个东西. '''
with tf.variable_scope(name) as scope:
batch_ids = tf.squeeze(tf.slice(rois, [0, 0], [-1, 1], name="batch_id"), [1])
# Get the normalized coordinates of bounding boxes
bottom_shape = tf.shape(bottom)
height = (tf.to_float(bottom_shape[1]) - 1.) * np.float32(self._feat_stride[0])
width = (tf.to_float(bottom_shape[2]) - 1.) * np.float32(self._feat_stride[0]) '''
yinwei rois是针对原始图片的坐标.所以相对坐标是需要/htight or width
'''
x1 = tf.slice(rois, [0, 1], [-1, 1], name="x1") / width
y1 = tf.slice(rois, [0, 2], [-1, 1], name="y1") / height
x2 = tf.slice(rois, [0, 3], [-1, 1], name="x2") / width
y2 = tf.slice(rois, [0, 4], [-1, 1], name="y2") / height###得到相对位置
# Won't be back-propagated to rois anyway, but to save time '''
因为bboxes.需要的是图片中子图的相对坐标位置,也就是4个百分比位置
'''
bboxes = tf.stop_gradient(tf.concat([y1, x1, y2, x2], axis=1))
'''
表示bboxes这个变量,不计算梯度.
''' pre_pool_size = cfg.POOLING_SIZE * 2
crops = tf.image.crop_and_resize(bottom, bboxes, tf.to_int32(batch_ids), [pre_pool_size, pre_pool_size], name="crops")##利用tensorflow的自带函数作用类似于ROI池化
'''
因为pre_pool_size = cfg.POOLING_SIZE * 2,所以下面再maxpool一下把图片缩小回去.
'''
return slim.max_pool2d(crops, [2, 2], padding='SAME') import tensorflow as tf
# help(tf.image.crop_and_resize) def _region_classification(self, fc7, is_training, initializer, initializer_bbox):
cls_score = slim.fully_connected(fc7, self._num_classes,
weights_initializer=initializer,
trainable=is_training,
activation_fn=None, scope='cls_score')
cls_prob = self._softmax_layer(cls_score, "cls_prob")
cls_pred = tf.argmax(cls_score, axis=1, name="cls_pred")
bbox_pred = slim.fully_connected(fc7, self._num_classes * 4,
weights_initializer=initializer_bbox,
trainable=is_training,
activation_fn=None, scope='bbox_pred') self._predictions["cls_score"] = cls_score
self._predictions["cls_pred"] = cls_pred
self._predictions["cls_prob"] = cls_prob
self._predictions["bbox_pred"] = bbox_pred return cls_prob, bbox_pred

faster-rcnn 笔记的更多相关文章

  1. Faster RCNN 学习笔记

    下面的介绍都是基于VGG16 的Faster RCNN网络,各网络的差异在于Conv layers层提取特征时有细微差异,至于后续的RPN层.Pooling层及全连接的分类和目标定位基本相同. 一). ...

  2. faster rcnn源码阅读笔记1

    自己保存的源码阅读笔记哈 faster rcnn 的主要识别过程(粗略) (开始填坑了): 一张3通道,1600*1600图像输入中,经过特征提取网络,得到100*100*512的feature ma ...

  3. 论文笔记:目标检测算法(R-CNN,Fast R-CNN,Faster R-CNN,FPN,YOLOv1-v3)

    R-CNN(Region-based CNN) motivation:之前的视觉任务大多数考虑使用SIFT和HOG特征,而近年来CNN和ImageNet的出现使得图像分类问题取得重大突破,那么这方面的 ...

  4. 论文阅读笔记二十七:Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks(CVPR 2016)

    论文源址:https://arxiv.org/abs/1506.01497 tensorflow代码:https://github.com/endernewton/tf-faster-rcnn 室友对 ...

  5. 深度学习笔记之目标检测算法系列(包括RCNN、Fast RCNN、Faster RCNN和SSD)

    不多说,直接上干货! 本文一系列目标检测算法:RCNN, Fast RCNN, Faster RCNN代表当下目标检测的前沿水平,在github都给出了基于Caffe的源码. •   RCNN RCN ...

  6. Faster RCNN学习笔记

    感谢知乎大神的分享 https://zhuanlan.zhihu.com/p/31426458 Ross B. Girshick在2016年提出了新的Faster RCNN,在结构上,Faster R ...

  7. 目标检测(四)Faster R-CNN: Towards Real-Time Object Detection with Region Proposal Networks

    作者:Shaoqing Ren, Kaiming He, Ross Girshick, and Jian Sun SPPnet.Fast R-CNN等目标检测算法已经大幅降低了目标检测网络的运行时间. ...

  8. Faster RCNN代码理解(Python)

    转自http://www.infocool.net/kb/Python/201611/209696.html#原文地址 第一步,准备 从train_faster_rcnn_alt_opt.py入: 初 ...

  9. Faster RCNN 学习与实现

    论文 论文翻译 Faster R-CNN 主要分为两个部分: RPN(Region Proposal Network)生成高质量的 region proposal: Fast R-CNN 利用 reg ...

  10. faster rcnn算法及源码及论文解析相关博客

    1. 通过代码理解faster-RCNN中的RPN http://blog.csdn.net/happyflyy/article/details/54917514 2. faster rcnn详解 R ...

随机推荐

  1. 使用ThreadPoolExecutor进行多线程编程

    ThreadPoolExecutor有四个构造函数,分别是: 1,ThreadPoolExecutor(int corePoolSize, int maximumPoolSize, long keep ...

  2. 关于c# Debug和Release的区别 (转)

    关于Debug和Release的区别之讨论本文主要包含如下内容: 1. Debug 和 Release 编译方式的本质区别2. 哪些情况下 Release 版会出错2. 怎样“调试” Release ...

  3. @Data的注解使用以及在IDEA上安装

    平时在开发过程中创建实体类的时候就经常的操作是:先写成员变量,然后再提供getXxx().setXxx()方法,然后看需要再提供toString等方法.这样一来不仅会发现每写一个实体类的话就会有很多相 ...

  4. join,left join,inner join,full join的区别?

    left join(左联接) 返回包括左表中的所有记录和右表中联结字段相等的记录 right join(右联接) 返回包括右表中的所有记录和左表中联结字段相等的记录inner join(等值连接) 只 ...

  5. 5.LNMP(Linux + Nginx + MySQL + PHP)环境安装

    1.安装Nginx: yum install yum-priorities -y wget http://nginx.org/packages/centos/7/noarch/RPMS/nginx-r ...

  6. 1.Sed | Awk | Grep | Find

    1.Sed | Awk | Grep | Find 可以参考的文档链接 CentOS7 查看 当前机器 已经启动的端口的Shell命令: netstat -lntup | awk -F' ' {'pr ...

  7. JSON Web Token(缩写 JWT) 目前最流行的跨域认证解决方案

    一.跨域认证的问题 互联网服务离不开用户认证.一般流程是下面这样. 1.用户向服务器发送用户名和密码. 2.服务器验证通过后,在当前对话(session)里面保存相关数据,比如用户角色.登录时间等等. ...

  8. 我的vimrc设置

    vim一个文件 :e version :editor version 查看.vimrc所在的系统和用户文件 vim ~/.vimrc " 行号 set number " 语法高亮( ...

  9. sha256_transform

    DECLSPEC void sha256_transform (const u32 *w0, const u32 *w1, const u32 *w2, const u32 *w3, u32 *dig ...

  10. python day04笔记总结

    2019.4.1 S21 day04笔记总结 昨日内容补充 1.解释器/编译器 1.解释型语言.编译型语言 2.解释型:写完代码后提交给解释器,解释器将代码一行行执行.(边接收边解释/实时解释) 常用 ...