github  https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection

train.py

from __future__ import print_function
import sys import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms
from torch.autograd import Variable
from tqdm import tqdm import dataset
import random
import math
import os
from utils import *
from cfg import parse_cfg, cfg
from darknet import Darknet
import pdb # Training settings
# datacfg = sys.argv[1]
# darknetcfg = parse_cfg(sys.argv[2])
# learnetcfg = parse_cfg(sys.argv[3]) datacfg = "cfg/fewyolov3_voc.data"
darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg")
learnetcfg = parse_cfg("cfg/reweighting_net.cfg")
weightfile = "tmp/000050.weights"
if len(sys.argv) == 5:
weightfile = sys.argv[4] data_options = read_data_cfg(datacfg)
net_options = darknetcfg[0]
meta_options = learnetcfg[0] # Configure options
cfg.config_data(data_options)
cfg.config_meta(meta_options)
cfg.config_net(net_options) # Parameters
metadict = data_options['meta']
trainlist = data_options['train'] testlist = data_options['valid']
backupdir = data_options['backup']
gpus = data_options['gpus'] # e.g. 0,1,2,3
ngpus = len(gpus.split(','))
num_workers = int(data_options['num_workers']) batch_size = int(net_options['batch'])
print("batch_size:",batch_size)
max_batches = int(net_options['max_batches'])
learning_rate = float(data_options['learning_rate'])
momentum = float(net_options['momentum'])
decay = float(net_options['decay'])
steps = [float(step) for step in data_options['steps'].split(',')]
scales = [float(scale) for scale in data_options['scales'].split(',')] # Train parameters
use_cuda = True
seed = int(time.time()) ## --------------------------------------------------------------------------
## MAIN
backupdir = cfg.backup
print('logging to ' + backupdir)
if not os.path.exists(backupdir):
os.makedirs(backupdir) torch.manual_seed(seed)
if use_cuda:
os.environ['CUDA_VISIBLE_DEVICES'] = gpus
torch.cuda.manual_seed(seed) model = Darknet(darknetcfg, learnetcfg)
region_loss = model.loss model.print_network()
# if len(sys.argv) == 5:
model.load_weights(weightfile) ###################################################
### Meta-model parameters
region_loss.seen = model.seen
processed_batches = 0 if cfg.tuning else model.seen / batch_size
trainlist = dataset.build_dataset(data_options)
nsamples = len(trainlist)
init_width = model.width
init_height = model.height
init_epoch = 0 if cfg.tuning else model.seen / nsamples
max_epochs = max_batches * batch_size / nsamples + 1
max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs
print(cfg.repeat, nsamples, max_batches, batch_size)
print(num_workers) kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {} if use_cuda:
if ngpus > 1:
model = torch.nn.DataParallel(model).cuda()
else:
model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate) def adjust_learning_rate(optimizer, processed_batches):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
lr = learning_rate
for i in range(len(steps)):
scale = scales[i] if i < len(scales) else 1
if processed_batches >= steps[i]:
lr = lr * scale
if processed_batches == steps[i]:
break
else:
break
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr def train(epoch):
global processed_batches
t0 = time.time()
if ngpus > 1:
cur_model = model.module
else:
cur_model = model train_loader = torch.utils.data.DataLoader(
dataset.listDataset(trainlist, shape=(init_width, init_height),
shuffle=False,
transform=transforms.Compose([
transforms.ToTensor(),
]),
train=True,
seen=cur_model.seen,
batch_size=batch_size,
num_workers=num_workers),
batch_size=batch_size, shuffle=False, **kwargs) metaset = dataset.MetaDataset(metafiles=metadict, train=True)
metaloader = torch.utils.data.DataLoader(
metaset,
batch_size=metaset.batch_size,
shuffle=False,
num_workers=num_workers,
pin_memory=True
)
metaloader = iter(metaloader) lr = adjust_learning_rate(optimizer, processed_batches)
logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr)) model.train()
t1 = time.time()
avg_time = torch.zeros(9)
with tqdm(total=train_loader.__len__()) as t: for batch_idx, (data, target) in enumerate(train_loader):
metax, mask = metaloader.next()
t2 = time.time()
adjust_learning_rate(optimizer, processed_batches)
processed_batches = processed_batches + 1
if use_cuda:
data = data.cuda()
metax = metax.cuda()
mask = mask.cuda()
# target= target.cuda()
t3 = time.time()
data, target = Variable(data), Variable(target)
metax, mask = Variable(metax), Variable(mask)
t4 = time.time()
optimizer.zero_grad()
t5 = time.time()
output = model(data, metax, mask)
t6 = time.time()
region_loss.seen = region_loss.seen + data.data.size(0)
cur_model.seen = region_loss.seen
region_loss.input_size = (data.data.size(2), data.data.size(3))
loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target)
t.set_description('Epoch %d' % epoch)
t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls,
cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals)
t.update() t7 = time.time()
loss.backward()
t8 = time.time()
optimizer.step()
t9 = time.time()
if False and batch_idx > 1:
avg_time[0] = avg_time[0] + (t2 - t1)
avg_time[1] = avg_time[1] + (t3 - t2)
avg_time[2] = avg_time[2] + (t4 - t3)
avg_time[3] = avg_time[3] + (t5 - t4)
avg_time[4] = avg_time[4] + (t6 - t5)
avg_time[5] = avg_time[5] + (t7 - t6)
avg_time[6] = avg_time[6] + (t8 - t7)
avg_time[7] = avg_time[7] + (t9 - t8)
avg_time[8] = avg_time[8] + (t9 - t1)
print('-------------------------------')
print(' load data : %f' % (avg_time[0] / (batch_idx)))
print(' cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
print(' zero_grad : %f' % (avg_time[3] / (batch_idx)))
print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
print(' forward loss : %f' % (avg_time[5] / (batch_idx)))
print(' backward : %f' % (avg_time[6] / (batch_idx)))
print(' step : %f' % (avg_time[7] / (batch_idx)))
print(' total : %f' % (avg_time[8] / (batch_idx)))
t1 = time.time()
print('')
t1 = time.time()
logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0))) if (epoch + 1) % cfg.save_interval == 0:
logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1)) init_epoch = int(init_epoch)
max_epochs = int(max_epochs)
print("init_epoch:",init_epoch)
print("max_epochs:",max_epochs)
for epoch in range(init_epoch, max_epochs):
train(epoch)

region_loss.py

import time
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.autograd import Variable
from utils import *
from cfg import cfg
from numbers import Number
from random import random, randint
import pdb def neg_filter(pred_boxes, target, withids=False):
assert pred_boxes.size(0) == target.size(0)
if cfg.neg_ratio == 'full':
inds = list(range(pred_boxes.size(0)))
elif isinstance(cfg.neg_ratio, Number):
flags = torch.sum(target, 1) != 0
flags = flags.cpu().data.tolist()
ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
if ratio >= 1:
inds = list(range(pred_boxes.size(0)))
else:
flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
inds = np.argwhere(flags).squeeze()
pred_boxes, target = pred_boxes[inds], target[inds]
else:
raise NotImplementedError('neg_ratio not recognized')
if withids:
return pred_boxes, target, inds
else:
return pred_boxes, target def neg_filter_v2(pred_boxes, target, withids=False):
assert pred_boxes.size(0) == target.size(0)
if cfg.neg_ratio == 'full':
inds = list(range(pred_boxes.size(0)))
elif isinstance(cfg.neg_ratio, Number):
flags = torch.sum(target, 1) != 0
flags = flags.cpu().data.tolist()
ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
if ratio >= 1:
inds = list(range(pred_boxes.size(0)))
else:
flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
if sum(flags) == 0:
flags[randint(0, len(flags) - 1)] = 1
inds = np.nonzero(flags)[0]
pred_boxes, target = pred_boxes[inds], target[inds]
else:
raise NotImplementedError('neg_ratio not recognized')
if withids:
return pred_boxes, target, inds
else:
return pred_boxes, target def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh):
nB = target.size(0)
nA = num_anchors
# print('anchor_step: ', anchor_step)
obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0)
noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1)
tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda() tboxes = target.view(-1, 5)
nonzero_ind = tboxes[:, 3] > 0
tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5)
ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda()
ind_B = ind_B[nonzero_ind]
gx = (tboxes[:, 1] * feature_size[1]).float()
gy = (tboxes[:, 2] * feature_size[0]).float()
gw = (tboxes[:, 3] * input_size[1]).float()
gh = (tboxes[:, 4] * input_size[0]).float()
aw = anchors[:, 0]
ah = anchors[:, 1]
nbox = tboxes.size(0)
gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0)
anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0)
ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False)
best_ious, best_a = ious.max(1)
gj = gy.long()
gi = gx.long()
obj_mask[ind_B, best_a, gj, gi] = 1
noobj_mask[ind_B, best_a, gj, gi] = 0 for i, iou in enumerate(ious):
if (iou > ignore_thresh).sum():
noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0 tx[ind_B, best_a, gj, gi] = gx - gx.floor()
ty[ind_B, best_a, gj, gi] = gy - gy.floor()
tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0])
th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1])
tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float()
tconf = obj_mask.float()
pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda()
conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data
target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0),
(tboxes[:, 2] * input_size[0]).float().unsqueeze(0),
gw.unsqueeze(0),
gh.unsqueeze(0)], 0) iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False)
conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float()
detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50
detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50 return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75 class RegionLoss(nn.Module):
def __init__(self, num_classes=0, anchors=[], num_anchors=1):
super(RegionLoss, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.anchor_step = len(anchors) / num_anchors
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.thresh = 0.6
self.seen = 0 def forward(self, output, target):
# import pdb; pdb.set_trace()
# output : BxAs*(4+1+num_classes)*H*W # if target.dim() == 3:
# # target : B * n_cls * l
# l = target.size(-1)
# target = target.permute(1,0,2).contiguous().view(-1, l)
if target.dim() == 3:
target = target.view(-1, target.size(-1))
bef = target.size(0)
output, target = neg_filter(output, target)
# print("{}/{}".format(target.size(0), bef)) t0 = time.time()
nB = output.data.size(0)
nA = self.num_anchors
nC = self.num_classes
nH = output.data.size(2)
nW = output.data.size(3) output = output.view(nB, nA, (5 + nC), nH, nW)
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
# [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC) t1 = time.time() pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
pred_boxes[0] = x.data + grid_x
pred_boxes[1] = y.data + grid_y
pred_boxes[2] = torch.exp(w.data) * anchor_w
pred_boxes[3] = torch.exp(h.data) * anchor_h
pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
t2 = time.time() nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
target.data,
self.anchors, nA,
nC, \
nH, nW,
self.noobject_scale,
self.object_scale,
self.thresh,
self.seen)
cls_mask = (cls_mask == 1)
if cfg.metayolo:
tcls.zero_()
nProposals = int((conf > 0.25).float().sum().data[0]) tx = Variable(tx.cuda())
ty = Variable(ty.cuda())
tw = Variable(tw.cuda())
th = Variable(th.cuda())
tconf = Variable(tconf.cuda())
tcls = Variable(tcls.view(-1)[cls_mask].long().cuda()) coord_mask = Variable(coord_mask.cuda())
conf_mask = Variable(conf_mask.cuda().sqrt())
cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
cls = cls[cls_mask].view(-1, nC) t3 = time.time() loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0
loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0
loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0
loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0
loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0
loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_boxes : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
loss_conf.data[0], loss_cls.data[0], loss.data[0]))
return loss class RegionLossV2(nn.Module):
"""
Yolo region loss + Softmax classification across meta-inputs
""" def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)):
super(RegionLossV2, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.coord_scale = 1
self.class_scale = 1
self.obj_scale = 1
self.noobj_scale = 100
self.thresh = 0.5
self.seen = 0
self.input_size = input_size
self.feature_scale = [32, 16, 8]
print('class_scale', self.class_scale) def forward(self, output, target):
# output : (bs*cs, nA*(5+1), N)
# target : (bs, cs, 50*5)
# Get all classification prediction
# pdb.set_trace()
bs = target.size(0)
cs = target.size(1)
nA = self.num_anchors
nC = self.num_classes
N = output.data.size(2)
# feature_size = [[26, 26], [52, 52], [104, 104]]
cls = output.view(output.size(0), nA, (5 + nC), N)
cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze()
cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs)
cls_conf = F.softmax(cls, 1)
_, cls_max_ids = torch.max(cls_conf, 1)
cls_max_ids = cls_max_ids.data
pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda()
pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1
pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N) # Rearrange target and perform filtering operation
target = target.view(-1, target.size(-1))
# bef = target.size(0)
output, target, inds = neg_filter_v2(output, target, withids=True)
counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs))
# print("{}/{}".format(target.size(0), bef))
pre_cls_mask = pre_cls_mask[inds] t0 = time.time()
nB = output.data.size(0) output = output.view(nB, nA, (5 + nC), N) # (nB, nA, (5+nC), N)
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2)) # (nB, nA, N)
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2))
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2)
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2))
# [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
# cls = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
# cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
t1 = time.time() pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N)
grid_x = []
grid_y = []
anchor_w = []
anchor_h = []
scale = []
feature_size = []
for fs in self.feature_scale:
feature_h = self.input_size[0] // fs
feature_w = self.input_size[1] // fs
# print("feature_h:",feature_h)
# print("feature_w:",feature_w)
feature_size.append([feature_h, feature_w])
grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) \
.repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() \
.repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda())
grid_x = torch.cat(grid_x, 2) # (nB, nA, N)
grid_y = torch.cat(grid_y, 2)
scale = torch.cat(scale, 2)
for i in range(3):
aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \
.index_select(1, torch.LongTensor([0])).cuda()
ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \
.index_select(1, torch.LongTensor([1])).cuda()
anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) \
.view(nB, nA, feature_size[i][0] * feature_size[i][1]))
anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) \
.view(nB, nA, feature_size[i][0] * feature_size[i][1]))
anchor_w = torch.cat(anchor_w, 2)
anchor_h = torch.cat(anchor_h, 2)
pred_boxes[0] = (x.data + grid_x) * scale
pred_boxes[1] = (y.data + grid_y) * scale
pred_boxes[2] = torch.exp(w.data) * anchor_w
pred_boxes[3] = torch.exp(h.data) * anchor_h
pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous()) # (nB, nA, N, 4)
t2 = time.time()
nGT = 0
iou_scores = []
obj_mask = []
noobj_mask = []
tx = []
ty = []
tw = []
th = []
tconf = []
tcls = []
start_N = 0
detected50 = torch.zeros(0)
detected75 = torch.zeros(0)
for imap in range(3):
nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, \
tcls_temp, detected50_temp, detected75_temp = build_targets(
pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :],
target.data.cuda(),
conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]],
torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(),
nA,
feature_size[imap],
self.input_size,
self.thresh)
if not len(detected50):
detected50 = torch.zeros(nGT).cuda()
if not len(detected75):
detected75 = torch.zeros(nGT).cuda()
detected50 += detected50_temp
detected75 += detected75_temp
start_N += feature_size[imap][0] * feature_size[imap][1]
iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) iou_scores = torch.cat(iou_scores, 2)
obj_mask = torch.cat(obj_mask, 2)
noobj_mask = torch.cat(noobj_mask, 2)
tx = torch.cat(tx, 2)
ty = torch.cat(ty, 2)
tw = torch.cat(tw, 2)
th = torch.cat(th, 2)
tconf = torch.cat(tconf, 2)
tcls = torch.cat(tcls, 2) # Take care of class mask
idx_start = 0
cls_mask_list = []
tcls_list = []
for i in range(len(counts)):
if counts[i] == 0:
cur_mask = torch.zeros(nA, N).cuda()
cur_tcls = torch.zeros(nA, N).cuda()
else:
cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0)
cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0)
cls_mask_list.append(cur_mask)
tcls_list.append(cur_tcls)
idx_start += counts[i]
cls_mask = torch.stack(cls_mask_list) # (bs, nA, N)
tcls = torch.stack(tcls_list) cls_mask = (cls_mask == 1)
conf50 = (conf > 0.5).float().data
iou50 = (iou_scores > 0.5).float()
detected_mask = conf50 * tconf
precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
detected50 = (detected50 > 0).float()
detected75 = (detected75 > 0).float()
recall50 = detected50.sum() / (nGT + 1e-16)
recall75 = detected75.sum() / (nGT + 1e-16)
nProposals = int((conf > 0.25).float().sum().item())
tx = Variable(tx)
ty = Variable(ty)
tw = Variable(tw)
th = Variable(th)
tconf = Variable(tconf) obj_mask = Variable(obj_mask.bool())
noobj_mask = Variable(noobj_mask.bool())
# cls_mask = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda())
cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs)
cls_max_ids = cls_max_ids[cls_mask.view(-1)]
tcls = Variable(tcls[cls_mask].long())
cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16) ClassificationLoss = nn.CrossEntropyLoss()
MseLoss = nn.MSELoss()
BceLoss = nn.BCELoss() t3 = time.time() loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask])
loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask])
loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask])
loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask])
loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask])
loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask])
loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
if len(cls):
loss_cls = self.class_scale * ClassificationLoss(cls, tcls)
else:
loss_cls = Variable(torch.Tensor([0]).float().cuda()) # # pdb.set_trace()
# ids = [9,11,12,16]
# new_cls, new_tcls = select_classes(cls, tcls, ids)
# new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda())
# loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls)
# loss_cls_new *= 10
# loss_cls += loss_cls_new loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_boxes : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
# print(
# '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % \
# (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), \
# loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item()))
# print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0]))
return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals def select_classes(pred, tgt, ids):
# convert tgt to numpy
tgt = tgt.cpu().data.numpy()
new_tgt = [(tgt == d) * i for i, d in enumerate(ids)]
new_tgt = np.max(np.stack(new_tgt), axis=0)
idxes = np.argwhere(new_tgt > 0).squeeze()
new_pred = pred[idxes]
new_pred = new_pred[:, ids]
new_tgt = new_tgt[idxes]
return new_pred, new_tgt

  

few-shot-learning for object detection的更多相关文章

  1. deep learning on object detection

    回归工作一周,忙的头晕,看了两三篇文章,主要在写各种文档和走各种办事流程了-- 这次来写写object detection最近看的三篇文章吧.都不是最近的文章,但是是今年的文章,我也想借此让自己赶快熟 ...

  2. 论文阅读笔记五十三:Libra R-CNN: Towards Balanced Learning for Object Detection(CVPR2019)

    论文原址:https://arxiv.org/pdf/1904.02701.pdf github:https://github.com/OceanPang/Libra_R-CNN 摘要 相比模型的结构 ...

  3. Object detection with deep learning and OpenCV

    目录 Single Shot Detectors for Object Detection Deep learning-based object detection with OpenCV   这篇文 ...

  4. (转)Awesome Object Detection

    Awesome Object Detection 2018-08-10 09:30:40 This blog is copied from: https://github.com/amusi/awes ...

  5. [Arxiv1706] Few-Example Object Detection with Model Communication 论文笔记

    p.p1 { margin: 0.0px 0.0px 0.0px 0.0px; font: 13.0px "Helvetica Neue"; color: #042eee } p. ...

  6. object detection 总结

    1.基础 自己对于YOLOV1,2,3都比较熟悉. RCNN也比较熟悉.这个是自己目前掌握的基础2.第一步 看一下2019年的井喷的anchor free的网络3.第二步 看一下以往,引用多的网路4. ...

  7. 关于目标检测(Object Detection)的文献整理

    本文对CV中目标检测子方向的研究,整理了如下的相关笔记(持续更新中): 1. Cascade R-CNN: Delving into High Quality Object Detection 年份: ...

  8. 论文学习-深度学习目标检测2014至201901综述-Deep Learning for Generic Object Detection A Survey

    目录 写在前面 目标检测任务与挑战 目标检测方法汇总 基础子问题 基于DCNN的特征表示 主干网络(network backbone) Methods For Improving Object Rep ...

  9. 课程四(Convolutional Neural Networks),第三 周(Object detection) —— 0.Learning Goals

    Learning Goals: Understand the challenges of Object Localization, Object Detection and Landmark Find ...

  10. 论文阅读之: Hierarchical Object Detection with Deep Reinforcement Learning

    Hierarchical Object Detection with Deep Reinforcement Learning NIPS 2016 WorkShop  Paper : https://a ...

随机推荐

  1. centos python3虚拟环境

    为什么需要虚拟环境? 在使用 Python 语言时,通过 pip(pip3)来安装第三方包,但是由于 pip 的特性,系统中只能安装每个包的一个版本.但是在实际项目开发中,不同项目可能需要第三方包的不 ...

  2. 世界银行使用.NET 7开发的免费电子问卷制作系统Survey Solution

    Survey Solution (下文简称SS) 是世界银行数据部开发的一套免费电子问卷制作系统, 官网地址为: https://mysurvey.solutions/, github地址:https ...

  3. Educational Codeforces Round 141:C. Yet Another Tournament

    一.来源:Problem - C - Codeforces 二.题面 三.思路 读题: 其他人的胜场由位次决定,对于第i位,其胜场为i-1 人数为\(5·10^5\),不是5(看错了) 每个人和自己比 ...

  4. Deeplink实践原理分析

    目录介绍 01.先看一个场景 02.什么是DeepLink 03.什么是Deferred DeepLink 04.什么是AppLink 05.DeepLink和AppLink核心技术 06.DeepL ...

  5. Dialog源码分析

    目录介绍 1.简单用法 2.AlertDialog源码分析 2.1 AlertDialog.Builder的构造方法 2.2 通过AlertDialog.Builder对象设置属性 2.3 build ...

  6. 【Leetcode】53. 最大子数组和

    题目(链接) 给你一个整数数组nums,请你找出一个具有最大和的连续子数组(子数组最少包含一个元素),返回其最大和. 子数组是数组中的一个连续部分. 示例 1: 输入:nums = [-2,1,-3, ...

  7. 您真的了解Java中的锁吗?这7种不同维度下的锁知道吗?

    写在开头 在上几篇博文中,我们聊到过volatile关键字,用它修饰变量可以保证可见性与有序性,但它并不是锁,在使用时并不会阻塞线程,且不保证原子性,属于一种轻量级.高效的同步方式,因此,如果我们的使 ...

  8. 记录--怎么写一个可以鼠标控制旋转的div?

    这里给大家分享我在网上总结出来的一些知识,希望对大家有所帮助 说在前面 鼠标控制元素旋转在现在也是一个很常见的功能,让我们从实现div元素的旋转控制开始来了解元素旋转的具体原理和实现方法吧. 效果展示 ...

  9. hdfs中acl权限管理的简单实用

    1.背景 在我们开发的过程中有这么一种场景, /projectA 目录是 hadoopdeploy用户创建的,他对这个目录有wrx权限,同时这个目录属于supergroup,在这个组中的用户也具有这个 ...

  10. Twitter的分布式自增ID算法snowflake(雪花算法) C#和Java版

    概述 分布式系统中,有一些需要使用全局唯一ID的场景,这种时候为了防止ID冲突可以使用36位的UUID, 但是UUID有一些缺点,首先他相对比较长,另外UUID一般是无序的. 有些时候我们希望能使用一 ...