github  https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection

train.py

from __future__ import print_function
import sys import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms
from torch.autograd import Variable
from tqdm import tqdm import dataset
import random
import math
import os
from utils import *
from cfg import parse_cfg, cfg
from darknet import Darknet
import pdb # Training settings
# datacfg = sys.argv[1]
# darknetcfg = parse_cfg(sys.argv[2])
# learnetcfg = parse_cfg(sys.argv[3]) datacfg = "cfg/fewyolov3_voc.data"
darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg")
learnetcfg = parse_cfg("cfg/reweighting_net.cfg")
weightfile = "tmp/000050.weights"
if len(sys.argv) == 5:
weightfile = sys.argv[4] data_options = read_data_cfg(datacfg)
net_options = darknetcfg[0]
meta_options = learnetcfg[0] # Configure options
cfg.config_data(data_options)
cfg.config_meta(meta_options)
cfg.config_net(net_options) # Parameters
metadict = data_options['meta']
trainlist = data_options['train'] testlist = data_options['valid']
backupdir = data_options['backup']
gpus = data_options['gpus'] # e.g. 0,1,2,3
ngpus = len(gpus.split(','))
num_workers = int(data_options['num_workers']) batch_size = int(net_options['batch'])
print("batch_size:",batch_size)
max_batches = int(net_options['max_batches'])
learning_rate = float(data_options['learning_rate'])
momentum = float(net_options['momentum'])
decay = float(net_options['decay'])
steps = [float(step) for step in data_options['steps'].split(',')]
scales = [float(scale) for scale in data_options['scales'].split(',')] # Train parameters
use_cuda = True
seed = int(time.time()) ## --------------------------------------------------------------------------
## MAIN
backupdir = cfg.backup
print('logging to ' + backupdir)
if not os.path.exists(backupdir):
os.makedirs(backupdir) torch.manual_seed(seed)
if use_cuda:
os.environ['CUDA_VISIBLE_DEVICES'] = gpus
torch.cuda.manual_seed(seed) model = Darknet(darknetcfg, learnetcfg)
region_loss = model.loss model.print_network()
# if len(sys.argv) == 5:
model.load_weights(weightfile) ###################################################
### Meta-model parameters
region_loss.seen = model.seen
processed_batches = 0 if cfg.tuning else model.seen / batch_size
trainlist = dataset.build_dataset(data_options)
nsamples = len(trainlist)
init_width = model.width
init_height = model.height
init_epoch = 0 if cfg.tuning else model.seen / nsamples
max_epochs = max_batches * batch_size / nsamples + 1
max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs
print(cfg.repeat, nsamples, max_batches, batch_size)
print(num_workers) kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {} if use_cuda:
if ngpus > 1:
model = torch.nn.DataParallel(model).cuda()
else:
model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate) def adjust_learning_rate(optimizer, processed_batches):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
lr = learning_rate
for i in range(len(steps)):
scale = scales[i] if i < len(scales) else 1
if processed_batches >= steps[i]:
lr = lr * scale
if processed_batches == steps[i]:
break
else:
break
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr def train(epoch):
global processed_batches
t0 = time.time()
if ngpus > 1:
cur_model = model.module
else:
cur_model = model train_loader = torch.utils.data.DataLoader(
dataset.listDataset(trainlist, shape=(init_width, init_height),
shuffle=False,
transform=transforms.Compose([
transforms.ToTensor(),
]),
train=True,
seen=cur_model.seen,
batch_size=batch_size,
num_workers=num_workers),
batch_size=batch_size, shuffle=False, **kwargs) metaset = dataset.MetaDataset(metafiles=metadict, train=True)
metaloader = torch.utils.data.DataLoader(
metaset,
batch_size=metaset.batch_size,
shuffle=False,
num_workers=num_workers,
pin_memory=True
)
metaloader = iter(metaloader) lr = adjust_learning_rate(optimizer, processed_batches)
logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr)) model.train()
t1 = time.time()
avg_time = torch.zeros(9)
with tqdm(total=train_loader.__len__()) as t: for batch_idx, (data, target) in enumerate(train_loader):
metax, mask = metaloader.next()
t2 = time.time()
adjust_learning_rate(optimizer, processed_batches)
processed_batches = processed_batches + 1
if use_cuda:
data = data.cuda()
metax = metax.cuda()
mask = mask.cuda()
# target= target.cuda()
t3 = time.time()
data, target = Variable(data), Variable(target)
metax, mask = Variable(metax), Variable(mask)
t4 = time.time()
optimizer.zero_grad()
t5 = time.time()
output = model(data, metax, mask)
t6 = time.time()
region_loss.seen = region_loss.seen + data.data.size(0)
cur_model.seen = region_loss.seen
region_loss.input_size = (data.data.size(2), data.data.size(3))
loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target)
t.set_description('Epoch %d' % epoch)
t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls,
cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals)
t.update() t7 = time.time()
loss.backward()
t8 = time.time()
optimizer.step()
t9 = time.time()
if False and batch_idx > 1:
avg_time[0] = avg_time[0] + (t2 - t1)
avg_time[1] = avg_time[1] + (t3 - t2)
avg_time[2] = avg_time[2] + (t4 - t3)
avg_time[3] = avg_time[3] + (t5 - t4)
avg_time[4] = avg_time[4] + (t6 - t5)
avg_time[5] = avg_time[5] + (t7 - t6)
avg_time[6] = avg_time[6] + (t8 - t7)
avg_time[7] = avg_time[7] + (t9 - t8)
avg_time[8] = avg_time[8] + (t9 - t1)
print('-------------------------------')
print(' load data : %f' % (avg_time[0] / (batch_idx)))
print(' cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
print(' zero_grad : %f' % (avg_time[3] / (batch_idx)))
print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
print(' forward loss : %f' % (avg_time[5] / (batch_idx)))
print(' backward : %f' % (avg_time[6] / (batch_idx)))
print(' step : %f' % (avg_time[7] / (batch_idx)))
print(' total : %f' % (avg_time[8] / (batch_idx)))
t1 = time.time()
print('')
t1 = time.time()
logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0))) if (epoch + 1) % cfg.save_interval == 0:
logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1)) init_epoch = int(init_epoch)
max_epochs = int(max_epochs)
print("init_epoch:",init_epoch)
print("max_epochs:",max_epochs)
for epoch in range(init_epoch, max_epochs):
train(epoch)

region_loss.py

import time
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.autograd import Variable
from utils import *
from cfg import cfg
from numbers import Number
from random import random, randint
import pdb def neg_filter(pred_boxes, target, withids=False):
assert pred_boxes.size(0) == target.size(0)
if cfg.neg_ratio == 'full':
inds = list(range(pred_boxes.size(0)))
elif isinstance(cfg.neg_ratio, Number):
flags = torch.sum(target, 1) != 0
flags = flags.cpu().data.tolist()
ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
if ratio >= 1:
inds = list(range(pred_boxes.size(0)))
else:
flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
inds = np.argwhere(flags).squeeze()
pred_boxes, target = pred_boxes[inds], target[inds]
else:
raise NotImplementedError('neg_ratio not recognized')
if withids:
return pred_boxes, target, inds
else:
return pred_boxes, target def neg_filter_v2(pred_boxes, target, withids=False):
assert pred_boxes.size(0) == target.size(0)
if cfg.neg_ratio == 'full':
inds = list(range(pred_boxes.size(0)))
elif isinstance(cfg.neg_ratio, Number):
flags = torch.sum(target, 1) != 0
flags = flags.cpu().data.tolist()
ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
if ratio >= 1:
inds = list(range(pred_boxes.size(0)))
else:
flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
if sum(flags) == 0:
flags[randint(0, len(flags) - 1)] = 1
inds = np.nonzero(flags)[0]
pred_boxes, target = pred_boxes[inds], target[inds]
else:
raise NotImplementedError('neg_ratio not recognized')
if withids:
return pred_boxes, target, inds
else:
return pred_boxes, target def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh):
nB = target.size(0)
nA = num_anchors
# print('anchor_step: ', anchor_step)
obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0)
noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1)
tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda() tboxes = target.view(-1, 5)
nonzero_ind = tboxes[:, 3] > 0
tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5)
ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda()
ind_B = ind_B[nonzero_ind]
gx = (tboxes[:, 1] * feature_size[1]).float()
gy = (tboxes[:, 2] * feature_size[0]).float()
gw = (tboxes[:, 3] * input_size[1]).float()
gh = (tboxes[:, 4] * input_size[0]).float()
aw = anchors[:, 0]
ah = anchors[:, 1]
nbox = tboxes.size(0)
gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0)
anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0)
ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False)
best_ious, best_a = ious.max(1)
gj = gy.long()
gi = gx.long()
obj_mask[ind_B, best_a, gj, gi] = 1
noobj_mask[ind_B, best_a, gj, gi] = 0 for i, iou in enumerate(ious):
if (iou > ignore_thresh).sum():
noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0 tx[ind_B, best_a, gj, gi] = gx - gx.floor()
ty[ind_B, best_a, gj, gi] = gy - gy.floor()
tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0])
th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1])
tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float()
tconf = obj_mask.float()
pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda()
conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data
target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0),
(tboxes[:, 2] * input_size[0]).float().unsqueeze(0),
gw.unsqueeze(0),
gh.unsqueeze(0)], 0) iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False)
conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float()
detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50
detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50 return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75 class RegionLoss(nn.Module):
def __init__(self, num_classes=0, anchors=[], num_anchors=1):
super(RegionLoss, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.anchor_step = len(anchors) / num_anchors
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.thresh = 0.6
self.seen = 0 def forward(self, output, target):
# import pdb; pdb.set_trace()
# output : BxAs*(4+1+num_classes)*H*W # if target.dim() == 3:
# # target : B * n_cls * l
# l = target.size(-1)
# target = target.permute(1,0,2).contiguous().view(-1, l)
if target.dim() == 3:
target = target.view(-1, target.size(-1))
bef = target.size(0)
output, target = neg_filter(output, target)
# print("{}/{}".format(target.size(0), bef)) t0 = time.time()
nB = output.data.size(0)
nA = self.num_anchors
nC = self.num_classes
nH = output.data.size(2)
nW = output.data.size(3) output = output.view(nB, nA, (5 + nC), nH, nW)
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
# [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC) t1 = time.time() pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
pred_boxes[0] = x.data + grid_x
pred_boxes[1] = y.data + grid_y
pred_boxes[2] = torch.exp(w.data) * anchor_w
pred_boxes[3] = torch.exp(h.data) * anchor_h
pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
t2 = time.time() nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
target.data,
self.anchors, nA,
nC, \
nH, nW,
self.noobject_scale,
self.object_scale,
self.thresh,
self.seen)
cls_mask = (cls_mask == 1)
if cfg.metayolo:
tcls.zero_()
nProposals = int((conf > 0.25).float().sum().data[0]) tx = Variable(tx.cuda())
ty = Variable(ty.cuda())
tw = Variable(tw.cuda())
th = Variable(th.cuda())
tconf = Variable(tconf.cuda())
tcls = Variable(tcls.view(-1)[cls_mask].long().cuda()) coord_mask = Variable(coord_mask.cuda())
conf_mask = Variable(conf_mask.cuda().sqrt())
cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
cls = cls[cls_mask].view(-1, nC) t3 = time.time() loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0
loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0
loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0
loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0
loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0
loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_boxes : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
loss_conf.data[0], loss_cls.data[0], loss.data[0]))
return loss class RegionLossV2(nn.Module):
"""
Yolo region loss + Softmax classification across meta-inputs
""" def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)):
super(RegionLossV2, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.coord_scale = 1
self.class_scale = 1
self.obj_scale = 1
self.noobj_scale = 100
self.thresh = 0.5
self.seen = 0
self.input_size = input_size
self.feature_scale = [32, 16, 8]
print('class_scale', self.class_scale) def forward(self, output, target):
# output : (bs*cs, nA*(5+1), N)
# target : (bs, cs, 50*5)
# Get all classification prediction
# pdb.set_trace()
bs = target.size(0)
cs = target.size(1)
nA = self.num_anchors
nC = self.num_classes
N = output.data.size(2)
# feature_size = [[26, 26], [52, 52], [104, 104]]
cls = output.view(output.size(0), nA, (5 + nC), N)
cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze()
cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs)
cls_conf = F.softmax(cls, 1)
_, cls_max_ids = torch.max(cls_conf, 1)
cls_max_ids = cls_max_ids.data
pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda()
pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1
pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N) # Rearrange target and perform filtering operation
target = target.view(-1, target.size(-1))
# bef = target.size(0)
output, target, inds = neg_filter_v2(output, target, withids=True)
counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs))
# print("{}/{}".format(target.size(0), bef))
pre_cls_mask = pre_cls_mask[inds] t0 = time.time()
nB = output.data.size(0) output = output.view(nB, nA, (5 + nC), N) # (nB, nA, (5+nC), N)
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2)) # (nB, nA, N)
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2))
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2)
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2))
# [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
# cls = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
# cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
t1 = time.time() pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N)
grid_x = []
grid_y = []
anchor_w = []
anchor_h = []
scale = []
feature_size = []
for fs in self.feature_scale:
feature_h = self.input_size[0] // fs
feature_w = self.input_size[1] // fs
# print("feature_h:",feature_h)
# print("feature_w:",feature_w)
feature_size.append([feature_h, feature_w])
grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) \
.repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() \
.repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda())
grid_x = torch.cat(grid_x, 2) # (nB, nA, N)
grid_y = torch.cat(grid_y, 2)
scale = torch.cat(scale, 2)
for i in range(3):
aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \
.index_select(1, torch.LongTensor([0])).cuda()
ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \
.index_select(1, torch.LongTensor([1])).cuda()
anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) \
.view(nB, nA, feature_size[i][0] * feature_size[i][1]))
anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) \
.view(nB, nA, feature_size[i][0] * feature_size[i][1]))
anchor_w = torch.cat(anchor_w, 2)
anchor_h = torch.cat(anchor_h, 2)
pred_boxes[0] = (x.data + grid_x) * scale
pred_boxes[1] = (y.data + grid_y) * scale
pred_boxes[2] = torch.exp(w.data) * anchor_w
pred_boxes[3] = torch.exp(h.data) * anchor_h
pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous()) # (nB, nA, N, 4)
t2 = time.time()
nGT = 0
iou_scores = []
obj_mask = []
noobj_mask = []
tx = []
ty = []
tw = []
th = []
tconf = []
tcls = []
start_N = 0
detected50 = torch.zeros(0)
detected75 = torch.zeros(0)
for imap in range(3):
nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, \
tcls_temp, detected50_temp, detected75_temp = build_targets(
pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :],
target.data.cuda(),
conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]],
torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(),
nA,
feature_size[imap],
self.input_size,
self.thresh)
if not len(detected50):
detected50 = torch.zeros(nGT).cuda()
if not len(detected75):
detected75 = torch.zeros(nGT).cuda()
detected50 += detected50_temp
detected75 += detected75_temp
start_N += feature_size[imap][0] * feature_size[imap][1]
iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) iou_scores = torch.cat(iou_scores, 2)
obj_mask = torch.cat(obj_mask, 2)
noobj_mask = torch.cat(noobj_mask, 2)
tx = torch.cat(tx, 2)
ty = torch.cat(ty, 2)
tw = torch.cat(tw, 2)
th = torch.cat(th, 2)
tconf = torch.cat(tconf, 2)
tcls = torch.cat(tcls, 2) # Take care of class mask
idx_start = 0
cls_mask_list = []
tcls_list = []
for i in range(len(counts)):
if counts[i] == 0:
cur_mask = torch.zeros(nA, N).cuda()
cur_tcls = torch.zeros(nA, N).cuda()
else:
cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0)
cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0)
cls_mask_list.append(cur_mask)
tcls_list.append(cur_tcls)
idx_start += counts[i]
cls_mask = torch.stack(cls_mask_list) # (bs, nA, N)
tcls = torch.stack(tcls_list) cls_mask = (cls_mask == 1)
conf50 = (conf > 0.5).float().data
iou50 = (iou_scores > 0.5).float()
detected_mask = conf50 * tconf
precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
detected50 = (detected50 > 0).float()
detected75 = (detected75 > 0).float()
recall50 = detected50.sum() / (nGT + 1e-16)
recall75 = detected75.sum() / (nGT + 1e-16)
nProposals = int((conf > 0.25).float().sum().item())
tx = Variable(tx)
ty = Variable(ty)
tw = Variable(tw)
th = Variable(th)
tconf = Variable(tconf) obj_mask = Variable(obj_mask.bool())
noobj_mask = Variable(noobj_mask.bool())
# cls_mask = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda())
cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs)
cls_max_ids = cls_max_ids[cls_mask.view(-1)]
tcls = Variable(tcls[cls_mask].long())
cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16) ClassificationLoss = nn.CrossEntropyLoss()
MseLoss = nn.MSELoss()
BceLoss = nn.BCELoss() t3 = time.time() loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask])
loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask])
loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask])
loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask])
loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask])
loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask])
loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
if len(cls):
loss_cls = self.class_scale * ClassificationLoss(cls, tcls)
else:
loss_cls = Variable(torch.Tensor([0]).float().cuda()) # # pdb.set_trace()
# ids = [9,11,12,16]
# new_cls, new_tcls = select_classes(cls, tcls, ids)
# new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda())
# loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls)
# loss_cls_new *= 10
# loss_cls += loss_cls_new loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_boxes : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
# print(
# '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % \
# (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), \
# loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item()))
# print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0]))
return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals def select_classes(pred, tgt, ids):
# convert tgt to numpy
tgt = tgt.cpu().data.numpy()
new_tgt = [(tgt == d) * i for i, d in enumerate(ids)]
new_tgt = np.max(np.stack(new_tgt), axis=0)
idxes = np.argwhere(new_tgt > 0).squeeze()
new_pred = pred[idxes]
new_pred = new_pred[:, ids]
new_tgt = new_tgt[idxes]
return new_pred, new_tgt

  

few-shot-learning for object detection的更多相关文章

  1. deep learning on object detection

    回归工作一周,忙的头晕,看了两三篇文章,主要在写各种文档和走各种办事流程了-- 这次来写写object detection最近看的三篇文章吧.都不是最近的文章,但是是今年的文章,我也想借此让自己赶快熟 ...

  2. 论文阅读笔记五十三:Libra R-CNN: Towards Balanced Learning for Object Detection(CVPR2019)

    论文原址:https://arxiv.org/pdf/1904.02701.pdf github:https://github.com/OceanPang/Libra_R-CNN 摘要 相比模型的结构 ...

  3. Object detection with deep learning and OpenCV

    目录 Single Shot Detectors for Object Detection Deep learning-based object detection with OpenCV   这篇文 ...

  4. (转)Awesome Object Detection

    Awesome Object Detection 2018-08-10 09:30:40 This blog is copied from: https://github.com/amusi/awes ...

  5. [Arxiv1706] Few-Example Object Detection with Model Communication 论文笔记

    p.p1 { margin: 0.0px 0.0px 0.0px 0.0px; font: 13.0px "Helvetica Neue"; color: #042eee } p. ...

  6. object detection 总结

    1.基础 自己对于YOLOV1,2,3都比较熟悉. RCNN也比较熟悉.这个是自己目前掌握的基础2.第一步 看一下2019年的井喷的anchor free的网络3.第二步 看一下以往,引用多的网路4. ...

  7. 关于目标检测(Object Detection)的文献整理

    本文对CV中目标检测子方向的研究,整理了如下的相关笔记(持续更新中): 1. Cascade R-CNN: Delving into High Quality Object Detection 年份: ...

  8. 论文学习-深度学习目标检测2014至201901综述-Deep Learning for Generic Object Detection A Survey

    目录 写在前面 目标检测任务与挑战 目标检测方法汇总 基础子问题 基于DCNN的特征表示 主干网络(network backbone) Methods For Improving Object Rep ...

  9. 课程四(Convolutional Neural Networks),第三 周(Object detection) —— 0.Learning Goals

    Learning Goals: Understand the challenges of Object Localization, Object Detection and Landmark Find ...

  10. 论文阅读之: Hierarchical Object Detection with Deep Reinforcement Learning

    Hierarchical Object Detection with Deep Reinforcement Learning NIPS 2016 WorkShop  Paper : https://a ...

随机推荐

  1. springboot+kaptcha生成数学运算验证码和字符验证码

    使用以下代码只需要复制粘贴,修改一处文本生成器路径即可,文中有交代. 1.添加kaptcha依赖 <dependency> <groupId>com.github.penggl ...

  2. 玩转Vue3之shallowRef和shallowReactive

    前言 Vue3 作为一款现代的 JavaScript 框架,引入了许多新的特性和改进,其中包括 shallowRef 和 shallowReactive.这两个功能在Vue 3中提供了更加灵活和高效的 ...

  3. ubuntu添加桌面快捷打开方式

    不太喜欢ubuntu开机后空荡荡的桌面,希望可以有些像windows一样的快捷打开方式.看了一些博客,也自己探索了一下,发现了在ubuntu中添加软件自带的桌面快捷打开方式的方法. 在终端 cd /u ...

  4. day04-2发送文件

    多用户即时通讯系统04 4.编码实现03 4.6功能实现-发送文件功能实现 4.6.1思路分析 客户端(发送者): 先把文件a.jpg读取到客户端的字节数组 把文件对应的字节数组封装到message对 ...

  5. String类为什么要用final修饰?

    final修饰符的意义? https://www.cnblogs.com/loren-Yang/p/13380318.html String类被实现的目标是什么? 效率和安全 如何实现期望? 参考文献 ...

  6. JavaScript知识总结 闭包篇

    这里给大家分享我在网上总结出来的一些知识,希望对大家有所帮助 1. 对闭包的理解 闭包是指有权访问另一个函数作用域中变量的函数,创建闭包的最常见的方式就是在一个函数内创建另一个函数,创建的函数可以访问 ...

  7. 记录--原生 canvas 如何实现大屏?

    这里给大家分享我在网上总结出来的一些知识,希望对大家有所帮助 前言 可视化大屏该如何做?有可能一天完成吗?废话不多说,直接看效果,线上 Demo 地址 lxfu1.github.io/large-sc ...

  8. 基于pytorch的图像训练识别

    一.准备数据集 分为测试集和训练集,文件如下排放 二.开始识别 数据集准备好后,即可导入到模型开始训练,运行下列代码 import time from torch.utils.tensorboard ...

  9. mybatis案例程序

    前置工作 导包(mysql-connector-java.mybatis) 实体类 Mapper层 1.接口 public interface BookMapper { public Book get ...

  10. Java事件侦听器学习记录

    前言 我们监听事件之前要有事件源source,创建事件源(Event),发布事件(publishEvent),然后才能到监听事件. 事件驱动机制是观察者模式(称发布订阅)具体实现,事件对象(Event ...