r-cnn学习(八):minibatch
这段代码包括由输入图片随机生成相应的RoIs,并生成相应的blobs,由roidb得到相应的
minibatch。其代码如下。
# --------------------------------------------------------
# Fast R-CNN
# Copyright (c) 2015 Microsoft
# Licensed under The MIT License [see LICENSE for details]
# Written by Ross Girshick
# -------------------------------------------------------- """Compute minibatch blobs for training a Fast R-CNN network.""" import numpy as np
import numpy.random as npr
import cv2
from fast_rcnn.config import cfg
from utils.blob import prep_im_for_blob, im_list_to_blob def get_minibatch(roidb, num_classes):
"""Given a roidb, construct a minibatch sampled from it."""
num_images = len(roidb)
# Sample random scales to use for each image in this batch
random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES),
size=num_images)#随机索引组成的numpy,大小是roidb的长度
assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \
'num_images ({}) must divide BATCH_SIZE ({})'. \
format(num_images, cfg.TRAIN.BATCH_SIZE)
rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images #每张图的rois
fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) #目标rois # Get the input image blob, formatted for caffe
im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) blobs = {'data': im_blob} if cfg.TRAIN.HAS_RPN: #每个blobs包含图片中相应的box、gt_box信息
assert len(im_scales) == 1, "Single batch only"
assert len(roidb) == 1, "Single batch only"
# gt boxes: (x1, y1, x2, y2, cls)
gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0]
gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32)
gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0]
gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds]
blobs['gt_boxes'] = gt_boxes
blobs['im_info'] = np.array(
[[im_blob.shape[2], im_blob.shape[3], im_scales[0]]],
dtype=np.float32)
else: # not using RPN
# Now, build the region of interest and label blobs
rois_blob = np.zeros((0, 5), dtype=np.float32)
labels_blob = np.zeros((0), dtype=np.float32)
bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32)
bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32)
# all_overlaps = []
for im_i in xrange(num_images):
labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \
= _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image,
num_classes) # Add to RoIs blob
rois = _project_im_rois(im_rois, im_scales[im_i])
batch_ind = im_i * np.ones((rois.shape[0], 1))
rois_blob_this_image = np.hstack((batch_ind, rois))
rois_blob = np.vstack((rois_blob, rois_blob_this_image)) # Add to labels, bbox targets, and bbox loss blobs
labels_blob = np.hstack((labels_blob, labels))
bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets))
bbox_inside_blob = np.vstack((bbox_inside_blob, bbox_inside_weights))
# all_overlaps = np.hstack((all_overlaps, overlaps)) # For debug visualizations
# _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps) blobs['rois'] = rois_blob
blobs['labels'] = labels_blob if cfg.TRAIN.BBOX_REG:
blobs['bbox_targets'] = bbox_targets_blob
blobs['bbox_inside_weights'] = bbox_inside_blob
blobs['bbox_outside_weights'] = \
np.array(bbox_inside_blob > 0).astype(np.float32) return blobs
#随机生成前景和背景的RoIs
def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes):
"""Generate a random sample of RoIs comprising foreground and background
examples.
"""
# label = class RoI has max overlap with
labels = roidb['max_classes']
overlaps = roidb['max_overlaps']
rois = roidb['boxes'] # Select foreground RoIs as those with >= FG_THRESH overlap
fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0]
# Guard against the case when an image has fewer than fg_rois_per_image
# foreground RoIs
fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size)
# Sample foreground regions without replacement
if fg_inds.size > 0:
fg_inds = npr.choice(
fg_inds, size=fg_rois_per_this_image, replace=False) # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI)
bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) &
(overlaps >= cfg.TRAIN.BG_THRESH_LO))[0]
# Compute number of background RoIs to take from this image (guarding
# against there being fewer than desired)
bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image
bg_rois_per_this_image = np.minimum(bg_rois_per_this_image,
bg_inds.size)
# Sample foreground regions without replacement
if bg_inds.size > 0:
bg_inds = npr.choice(
bg_inds, size=bg_rois_per_this_image, replace=False) # The indices that we're selecting (both fg and bg)
keep_inds = np.append(fg_inds, bg_inds)
# Select sampled values from various arrays:
labels = labels[keep_inds]
# Clamp labels for the background RoIs to 0
labels[fg_rois_per_this_image:] = 0
overlaps = overlaps[keep_inds]
rois = rois[keep_inds] bbox_targets, bbox_inside_weights = _get_bbox_regression_labels(
roidb['bbox_targets'][keep_inds, :], num_classes) return labels, overlaps, rois, bbox_targets, bbox_inside_weights
#由相应尺度的roidb生成相应的blob
def _get_image_blob(roidb, scale_inds):
"""Builds an input blob from the images in the roidb at the specified
scales.
"""
num_images = len(roidb)
processed_ims = []
im_scales = []
for i in xrange(num_images):
im = cv2.imread(roidb[i]['image'])
if roidb[i]['flipped']:
im = im[:, ::-1, :]
target_size = cfg.TRAIN.SCALES[scale_inds[i]]
im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size,
cfg.TRAIN.MAX_SIZE)prep_im_for_blob: util的blob.py中;用于将图片平均后缩放。#im_scales: 每张图片的缩放率
# cfg.PIXEL_MEANS: 原始图片会集体减去该值达到mean
im_scales.append(im_scale)
processed_ims.append(im) # Create a blob to hold the input images
blob = im_list_to_blob(processed_ims)#将以list形式存放的图片数据处理成(batch elem, channel, height, width)的im_blob形式,height,width用的是此次计算所有图片的最大值 return blob, im_scales#blob是一个字典,与name_to_top对应,方便把blob数据放进top def _project_im_rois(im_rois, im_scale_factor): #图片缩放时,相应的rois也进行缩放
"""Project image RoIs into the rescaled training image."""
rois = im_rois * im_scale_factor
return rois
#由roidb返回相应的box及inside_weights
def _get_bbox_regression_labels(bbox_target_data, num_classes):
"""Bounding-box regression targets are stored in a compact form in the
roidb. This function expands those targets into the 4-of-4*K representation used
by the network (i.e. only one class has non-zero targets). The loss weights
are similarly expanded. Returns:
bbox_target_data (ndarray): N x 4K blob of regression targets
bbox_inside_weights (ndarray): N x 4K blob of loss weights
"""
clss = bbox_target_data[:, 0]
bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32)
bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32)
inds = np.where(clss > 0)[0]
for ind in inds:
cls = clss[ind]
start = 4 * cls
end = start + 4
bbox_targets[ind, start:end] = bbox_target_data[ind, 1:]
bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS
return bbox_targets, bbox_inside_weights def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps):
"""Visualize a mini-batch for debugging."""
import matplotlib.pyplot as plt
for i in xrange(rois_blob.shape[0]):
rois = rois_blob[i, :]
im_ind = rois[0]
roi = rois[1:]
im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy()
im += cfg.PIXEL_MEANS
im = im[:, :, (2, 1, 0)]
im = im.astype(np.uint8)
cls = labels_blob[i]
plt.imshow(im)
print 'class: ', cls, ' overlap: ', overlaps[i]
plt.gca().add_patch(
plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0],
roi[3] - roi[1], fill=False,
edgecolor='r', linewidth=3)
)
plt.show()
r-cnn学习(八):minibatch的更多相关文章
- Python Tutorial 学习(八)--Errors and Exceptions
Python Tutorial 学习(八)--Errors and Exceptions恢复 Errors and Exceptions 错误与异常 此前,我们还没有开始着眼于错误信息.不过如果你是一 ...
- CNN学习笔记:批标准化
CNN学习笔记:批标准化 Batch Normalization Batch Normalization, 批标准化, 是将分散的数据统一的一种做法, 也是优化神经网络的一种方法. 在神经网络的训练过 ...
- R基础学习
R基础学习 The Art of R Programming 1.seq 产生等差数列:seq(from,to,by) seq(from,to,length) for(i in 1:length(x) ...
- 卷积神经网络(CNN)学习笔记1:基础入门
卷积神经网络(CNN)学习笔记1:基础入门 Posted on 2016-03-01 | In Machine Learning | 9 Comments | 14935 Vie ...
- SVG 学习<八> SVG的路径——path(2)贝塞尔曲线命令、光滑贝塞尔曲线命令
目录 SVG 学习<一>基础图形及线段 SVG 学习<二>进阶 SVG世界,视野,视窗 stroke属性 svg分组 SVG 学习<三>渐变 SVG 学习<四 ...
- R语言学习 第四篇:函数和流程控制
变量用于临时存储数据,而函数用于操作数据,实现代码的重复使用.在R中,函数只是另一种数据类型的变量,可以被分配,操作,甚至把函数作为参数传递给其他函数.分支控制和循环控制,和通用编程语言的风格很相似, ...
- CNN学习笔记:目标函数
CNN学习笔记:目标函数 分类任务中的目标函数 目标函数,亦称损失函数或代价函数,是整个网络模型的指挥棒,通过样本的预测结果与真实标记产生的误差来反向传播指导网络参数学习和表示学习. 假设某分类任务共 ...
- CNN学习笔记:卷积神经网络
CNN学习笔记:卷积神经网络 卷积神经网络 基本结构 卷积神经网络是一种层次模型,其输入是原始数据,如RGB图像.音频等.卷积神经网络通过卷积(convolution)操作.汇合(pooling)操作 ...
- CNN学习笔记:全连接层
CNN学习笔记:全连接层 全连接层 全连接层在整个网络卷积神经网络中起到“分类器”的作用.如果说卷积层.池化层和激活函数等操作是将原始数据映射到隐层特征空间的话,全连接层则起到将学到的特征表示映射到样 ...
- CNN学习笔记:池化层
CNN学习笔记:池化层 池化 池化(Pooling)是卷积神经网络中另一个重要的概念,它实际上是一种形式的降采样.有多种不同形式的非线性池化函数,而其中“最大池化(Max pooling)”是最为常见 ...
随机推荐
- RFID考试背诵
简答题: 简述RFID标准多元化的原因: 由不同的技术因素.利益因素导致: 工作频率分布在低频至微波的多个频段中,频率不同,技术差异大. 作用距离的差异导致标准不同:因为应答器分为有源.无源两种工作方 ...
- jquery的curCSS方法
核心思想是用getComputedStyle获取样式,如果没有获取到就判断是不是动态创建的元素,如果是则用style获取行内样式.看重点(注释部分)代码吧! curCSS = function( el ...
- JS实现Observable观察者模式
欢迎讨论与交流 : ) 注 代码参考自——汇智网 RxJS教程 前言 Observable观察者模式令小白笔者眼前一亮.数据生产者(observable)负责生产新鲜的数据,同时在生产完毕后'通知“消 ...
- MVC之前的那点事儿系列(10):MVC为什么不再需要注册通配符(*.*)了?
文章内容 很多教程里都提到了,在部署MVC程序的时候要配置通配符映射(或者是*.mvc)到aspnet_ISPAI.dll上,在.NET4.0之前确实应该这么多,但是.NET4.0之后已经不要再费事了 ...
- Convertion of grey code and binary 格雷码和二进制数之间的转换
以下转换代码摘自维基百科 Wikipedia: /* The purpose of this function is to convert an unsigned binary number to r ...
- Android开发之XUtils框架使用和报错处理
一.XUtils lib的的添加: 1.点击+,选择第一个Library dependency 2.输入XUtils 按enter键,搜索: 3.然后就是选择XUtils,选择哪个版本就看个人了,接 ...
- Android LitePal 神一般的数据库框架 超级好用
参考: Android数据库高手秘籍(一)--SQLite命令 Android数据库高手秘籍(二)--创建表和LitePal的基本用法 Android数据库高手秘籍(三)--使用LitePal升级表 ...
- Mac上更新Ruby
因为准备在项目中使用bootstrap,在安装bootstrap过程中提示需要Ruby的版本在1.9.2以上,而目前使用的Ruby版本是Mac系统自带的1.8.7.所以需要对Ruby进行升级.这里使用 ...
- LinuxMint 18 编译cm13.0 笔记
1.安装依赖文件 sudo apt--dev libesd0-dev git-core gnupg flex bison gperf build-essential zip curl zlib1g-d ...
- cocos2d-x打飞机实例总结(一):程序入口分析和AppDelegate,Application,ApplicationProtocol三个类的分析
首先,是个敲代码的,基本上都知道程序的入口是main函数,显然,就算在cocos2d-x框架中也一样 我们看看main函数做了什么 #include "main.h" #inclu ...