github  https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection

train.py

from __future__ import print_function
import sys import time
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch.backends.cudnn as cudnn
from torchvision import datasets, transforms
from torch.autograd import Variable
from tqdm import tqdm import dataset
import random
import math
import os
from utils import *
from cfg import parse_cfg, cfg
from darknet import Darknet
import pdb # Training settings
# datacfg = sys.argv[1]
# darknetcfg = parse_cfg(sys.argv[2])
# learnetcfg = parse_cfg(sys.argv[3]) datacfg = "cfg/fewyolov3_voc.data"
darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg")
learnetcfg = parse_cfg("cfg/reweighting_net.cfg")
weightfile = "tmp/000050.weights"
if len(sys.argv) == 5:
weightfile = sys.argv[4] data_options = read_data_cfg(datacfg)
net_options = darknetcfg[0]
meta_options = learnetcfg[0] # Configure options
cfg.config_data(data_options)
cfg.config_meta(meta_options)
cfg.config_net(net_options) # Parameters
metadict = data_options['meta']
trainlist = data_options['train'] testlist = data_options['valid']
backupdir = data_options['backup']
gpus = data_options['gpus'] # e.g. 0,1,2,3
ngpus = len(gpus.split(','))
num_workers = int(data_options['num_workers']) batch_size = int(net_options['batch'])
print("batch_size:",batch_size)
max_batches = int(net_options['max_batches'])
learning_rate = float(data_options['learning_rate'])
momentum = float(net_options['momentum'])
decay = float(net_options['decay'])
steps = [float(step) for step in data_options['steps'].split(',')]
scales = [float(scale) for scale in data_options['scales'].split(',')] # Train parameters
use_cuda = True
seed = int(time.time()) ## --------------------------------------------------------------------------
## MAIN
backupdir = cfg.backup
print('logging to ' + backupdir)
if not os.path.exists(backupdir):
os.makedirs(backupdir) torch.manual_seed(seed)
if use_cuda:
os.environ['CUDA_VISIBLE_DEVICES'] = gpus
torch.cuda.manual_seed(seed) model = Darknet(darknetcfg, learnetcfg)
region_loss = model.loss model.print_network()
# if len(sys.argv) == 5:
model.load_weights(weightfile) ###################################################
### Meta-model parameters
region_loss.seen = model.seen
processed_batches = 0 if cfg.tuning else model.seen / batch_size
trainlist = dataset.build_dataset(data_options)
nsamples = len(trainlist)
init_width = model.width
init_height = model.height
init_epoch = 0 if cfg.tuning else model.seen / nsamples
max_epochs = max_batches * batch_size / nsamples + 1
max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs
print(cfg.repeat, nsamples, max_batches, batch_size)
print(num_workers) kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {} if use_cuda:
if ngpus > 1:
model = torch.nn.DataParallel(model).cuda()
else:
model = model.cuda() optimizer = optim.Adam(model.parameters(), lr=learning_rate) def adjust_learning_rate(optimizer, processed_batches):
"""Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""
lr = learning_rate
for i in range(len(steps)):
scale = scales[i] if i < len(scales) else 1
if processed_batches >= steps[i]:
lr = lr * scale
if processed_batches == steps[i]:
break
else:
break
for param_group in optimizer.param_groups:
param_group['lr'] = lr
return lr def train(epoch):
global processed_batches
t0 = time.time()
if ngpus > 1:
cur_model = model.module
else:
cur_model = model train_loader = torch.utils.data.DataLoader(
dataset.listDataset(trainlist, shape=(init_width, init_height),
shuffle=False,
transform=transforms.Compose([
transforms.ToTensor(),
]),
train=True,
seen=cur_model.seen,
batch_size=batch_size,
num_workers=num_workers),
batch_size=batch_size, shuffle=False, **kwargs) metaset = dataset.MetaDataset(metafiles=metadict, train=True)
metaloader = torch.utils.data.DataLoader(
metaset,
batch_size=metaset.batch_size,
shuffle=False,
num_workers=num_workers,
pin_memory=True
)
metaloader = iter(metaloader) lr = adjust_learning_rate(optimizer, processed_batches)
logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr)) model.train()
t1 = time.time()
avg_time = torch.zeros(9)
with tqdm(total=train_loader.__len__()) as t: for batch_idx, (data, target) in enumerate(train_loader):
metax, mask = metaloader.next()
t2 = time.time()
adjust_learning_rate(optimizer, processed_batches)
processed_batches = processed_batches + 1
if use_cuda:
data = data.cuda()
metax = metax.cuda()
mask = mask.cuda()
# target= target.cuda()
t3 = time.time()
data, target = Variable(data), Variable(target)
metax, mask = Variable(metax), Variable(mask)
t4 = time.time()
optimizer.zero_grad()
t5 = time.time()
output = model(data, metax, mask)
t6 = time.time()
region_loss.seen = region_loss.seen + data.data.size(0)
cur_model.seen = region_loss.seen
region_loss.input_size = (data.data.size(2), data.data.size(3))
loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target)
t.set_description('Epoch %d' % epoch)
t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls,
cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals)
t.update() t7 = time.time()
loss.backward()
t8 = time.time()
optimizer.step()
t9 = time.time()
if False and batch_idx > 1:
avg_time[0] = avg_time[0] + (t2 - t1)
avg_time[1] = avg_time[1] + (t3 - t2)
avg_time[2] = avg_time[2] + (t4 - t3)
avg_time[3] = avg_time[3] + (t5 - t4)
avg_time[4] = avg_time[4] + (t6 - t5)
avg_time[5] = avg_time[5] + (t7 - t6)
avg_time[6] = avg_time[6] + (t8 - t7)
avg_time[7] = avg_time[7] + (t9 - t8)
avg_time[8] = avg_time[8] + (t9 - t1)
print('-------------------------------')
print(' load data : %f' % (avg_time[0] / (batch_idx)))
print(' cpu to cuda : %f' % (avg_time[1] / (batch_idx)))
print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))
print(' zero_grad : %f' % (avg_time[3] / (batch_idx)))
print(' forward feature : %f' % (avg_time[4] / (batch_idx)))
print(' forward loss : %f' % (avg_time[5] / (batch_idx)))
print(' backward : %f' % (avg_time[6] / (batch_idx)))
print(' step : %f' % (avg_time[7] / (batch_idx)))
print(' total : %f' % (avg_time[8] / (batch_idx)))
t1 = time.time()
print('')
t1 = time.time()
logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0))) if (epoch + 1) % cfg.save_interval == 0:
logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))
cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1)) init_epoch = int(init_epoch)
max_epochs = int(max_epochs)
print("init_epoch:",init_epoch)
print("max_epochs:",max_epochs)
for epoch in range(init_epoch, max_epochs):
train(epoch)

region_loss.py

import time
import torch
import math
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from torch.autograd import Variable
from utils import *
from cfg import cfg
from numbers import Number
from random import random, randint
import pdb def neg_filter(pred_boxes, target, withids=False):
assert pred_boxes.size(0) == target.size(0)
if cfg.neg_ratio == 'full':
inds = list(range(pred_boxes.size(0)))
elif isinstance(cfg.neg_ratio, Number):
flags = torch.sum(target, 1) != 0
flags = flags.cpu().data.tolist()
ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
if ratio >= 1:
inds = list(range(pred_boxes.size(0)))
else:
flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
inds = np.argwhere(flags).squeeze()
pred_boxes, target = pred_boxes[inds], target[inds]
else:
raise NotImplementedError('neg_ratio not recognized')
if withids:
return pred_boxes, target, inds
else:
return pred_boxes, target def neg_filter_v2(pred_boxes, target, withids=False):
assert pred_boxes.size(0) == target.size(0)
if cfg.neg_ratio == 'full':
inds = list(range(pred_boxes.size(0)))
elif isinstance(cfg.neg_ratio, Number):
flags = torch.sum(target, 1) != 0
flags = flags.cpu().data.tolist()
ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))
if ratio >= 1:
inds = list(range(pred_boxes.size(0)))
else:
flags = [0 if f == 0 and random() > ratio else 1 for f in flags]
if sum(flags) == 0:
flags[randint(0, len(flags) - 1)] = 1
inds = np.nonzero(flags)[0]
pred_boxes, target = pred_boxes[inds], target[inds]
else:
raise NotImplementedError('neg_ratio not recognized')
if withids:
return pred_boxes, target, inds
else:
return pred_boxes, target def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh):
nB = target.size(0)
nA = num_anchors
# print('anchor_step: ', anchor_step)
obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0)
noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1)
tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()
iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda() tboxes = target.view(-1, 5)
nonzero_ind = tboxes[:, 3] > 0
tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5)
ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda()
ind_B = ind_B[nonzero_ind]
gx = (tboxes[:, 1] * feature_size[1]).float()
gy = (tboxes[:, 2] * feature_size[0]).float()
gw = (tboxes[:, 3] * input_size[1]).float()
gh = (tboxes[:, 4] * input_size[0]).float()
aw = anchors[:, 0]
ah = anchors[:, 1]
nbox = tboxes.size(0)
gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0)
anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0)
ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False)
best_ious, best_a = ious.max(1)
gj = gy.long()
gi = gx.long()
obj_mask[ind_B, best_a, gj, gi] = 1
noobj_mask[ind_B, best_a, gj, gi] = 0 for i, iou in enumerate(ious):
if (iou > ignore_thresh).sum():
noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0 tx[ind_B, best_a, gj, gi] = gx - gx.floor()
ty[ind_B, best_a, gj, gi] = gy - gy.floor()
tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0])
th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1])
tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float()
tconf = obj_mask.float()
pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda()
conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data
target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0),
(tboxes[:, 2] * input_size[0]).float().unsqueeze(0),
gw.unsqueeze(0),
gh.unsqueeze(0)], 0) iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False)
conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float()
detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50
detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50 return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75 class RegionLoss(nn.Module):
def __init__(self, num_classes=0, anchors=[], num_anchors=1):
super(RegionLoss, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.anchor_step = len(anchors) / num_anchors
self.coord_scale = 1
self.noobject_scale = 1
self.object_scale = 5
self.class_scale = 1
self.thresh = 0.6
self.seen = 0 def forward(self, output, target):
# import pdb; pdb.set_trace()
# output : BxAs*(4+1+num_classes)*H*W # if target.dim() == 3:
# # target : B * n_cls * l
# l = target.size(-1)
# target = target.permute(1,0,2).contiguous().view(-1, l)
if target.dim() == 3:
target = target.view(-1, target.size(-1))
bef = target.size(0)
output, target = neg_filter(output, target)
# print("{}/{}".format(target.size(0), bef)) t0 = time.time()
nB = output.data.size(0)
nA = self.num_anchors
nC = self.num_classes
nH = output.data.size(2)
nW = output.data.size(3) output = output.view(nB, nA, (5 + nC), nH, nW)
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))
# [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))
cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC) t1 = time.time() pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)
grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()
anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()
anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()
anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)
pred_boxes[0] = x.data + grid_x
pred_boxes[1] = y.data + grid_y
pred_boxes[2] = torch.exp(w.data) * anchor_w
pred_boxes[3] = torch.exp(h.data) * anchor_h
pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))
t2 = time.time() nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,
target.data,
self.anchors, nA,
nC, \
nH, nW,
self.noobject_scale,
self.object_scale,
self.thresh,
self.seen)
cls_mask = (cls_mask == 1)
if cfg.metayolo:
tcls.zero_()
nProposals = int((conf > 0.25).float().sum().data[0]) tx = Variable(tx.cuda())
ty = Variable(ty.cuda())
tw = Variable(tw.cuda())
th = Variable(th.cuda())
tconf = Variable(tconf.cuda())
tcls = Variable(tcls.view(-1)[cls_mask].long().cuda()) coord_mask = Variable(coord_mask.cuda())
conf_mask = Variable(conf_mask.cuda().sqrt())
cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())
cls = cls[cls_mask].view(-1, nC) t3 = time.time() loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0
loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0
loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0
loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0
loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0
loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)
loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_boxes : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (
self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],
loss_conf.data[0], loss_cls.data[0], loss.data[0]))
return loss class RegionLossV2(nn.Module):
"""
Yolo region loss + Softmax classification across meta-inputs
""" def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)):
super(RegionLossV2, self).__init__()
self.num_classes = num_classes
self.anchors = anchors
self.num_anchors = num_anchors
self.coord_scale = 1
self.class_scale = 1
self.obj_scale = 1
self.noobj_scale = 100
self.thresh = 0.5
self.seen = 0
self.input_size = input_size
self.feature_scale = [32, 16, 8]
print('class_scale', self.class_scale) def forward(self, output, target):
# output : (bs*cs, nA*(5+1), N)
# target : (bs, cs, 50*5)
# Get all classification prediction
# pdb.set_trace()
bs = target.size(0)
cs = target.size(1)
nA = self.num_anchors
nC = self.num_classes
N = output.data.size(2)
# feature_size = [[26, 26], [52, 52], [104, 104]]
cls = output.view(output.size(0), nA, (5 + nC), N)
cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze()
cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs)
cls_conf = F.softmax(cls, 1)
_, cls_max_ids = torch.max(cls_conf, 1)
cls_max_ids = cls_max_ids.data
pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda()
pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1
pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N) # Rearrange target and perform filtering operation
target = target.view(-1, target.size(-1))
# bef = target.size(0)
output, target, inds = neg_filter_v2(output, target, withids=True)
counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs))
# print("{}/{}".format(target.size(0), bef))
pre_cls_mask = pre_cls_mask[inds] t0 = time.time()
nB = output.data.size(0) output = output.view(nB, nA, (5 + nC), N) # (nB, nA, (5+nC), N)
x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2)) # (nB, nA, N)
y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2))
w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2)
h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2)
conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2))
# [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)
# cls = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))
# cls = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)
t1 = time.time() pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N)
grid_x = []
grid_y = []
anchor_w = []
anchor_h = []
scale = []
feature_size = []
for fs in self.feature_scale:
feature_h = self.input_size[0] // fs
feature_w = self.input_size[1] // fs
# print("feature_h:",feature_h)
# print("feature_w:",feature_w)
feature_size.append([feature_h, feature_w])
grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) \
.repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() \
.repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())
scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda())
grid_x = torch.cat(grid_x, 2) # (nB, nA, N)
grid_y = torch.cat(grid_y, 2)
scale = torch.cat(scale, 2)
for i in range(3):
aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \
.index_select(1, torch.LongTensor([0])).cuda()
ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \
.index_select(1, torch.LongTensor([1])).cuda()
anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) \
.view(nB, nA, feature_size[i][0] * feature_size[i][1]))
anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) \
.view(nB, nA, feature_size[i][0] * feature_size[i][1]))
anchor_w = torch.cat(anchor_w, 2)
anchor_h = torch.cat(anchor_h, 2)
pred_boxes[0] = (x.data + grid_x) * scale
pred_boxes[1] = (y.data + grid_y) * scale
pred_boxes[2] = torch.exp(w.data) * anchor_w
pred_boxes[3] = torch.exp(h.data) * anchor_h
pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous()) # (nB, nA, N, 4)
t2 = time.time()
nGT = 0
iou_scores = []
obj_mask = []
noobj_mask = []
tx = []
ty = []
tw = []
th = []
tconf = []
tcls = []
start_N = 0
detected50 = torch.zeros(0)
detected75 = torch.zeros(0)
for imap in range(3):
nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, \
tcls_temp, detected50_temp, detected75_temp = build_targets(
pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :],
target.data.cuda(),
conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]],
torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(),
nA,
feature_size[imap],
self.input_size,
self.thresh)
if not len(detected50):
detected50 = torch.zeros(nGT).cuda()
if not len(detected75):
detected75 = torch.zeros(nGT).cuda()
detected50 += detected50_temp
detected75 += detected75_temp
start_N += feature_size[imap][0] * feature_size[imap][1]
iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))
tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1])) iou_scores = torch.cat(iou_scores, 2)
obj_mask = torch.cat(obj_mask, 2)
noobj_mask = torch.cat(noobj_mask, 2)
tx = torch.cat(tx, 2)
ty = torch.cat(ty, 2)
tw = torch.cat(tw, 2)
th = torch.cat(th, 2)
tconf = torch.cat(tconf, 2)
tcls = torch.cat(tcls, 2) # Take care of class mask
idx_start = 0
cls_mask_list = []
tcls_list = []
for i in range(len(counts)):
if counts[i] == 0:
cur_mask = torch.zeros(nA, N).cuda()
cur_tcls = torch.zeros(nA, N).cuda()
else:
cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0)
cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0)
cls_mask_list.append(cur_mask)
tcls_list.append(cur_tcls)
idx_start += counts[i]
cls_mask = torch.stack(cls_mask_list) # (bs, nA, N)
tcls = torch.stack(tcls_list) cls_mask = (cls_mask == 1)
conf50 = (conf > 0.5).float().data
iou50 = (iou_scores > 0.5).float()
detected_mask = conf50 * tconf
precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)
detected50 = (detected50 > 0).float()
detected75 = (detected75 > 0).float()
recall50 = detected50.sum() / (nGT + 1e-16)
recall75 = detected75.sum() / (nGT + 1e-16)
nProposals = int((conf > 0.25).float().sum().item())
tx = Variable(tx)
ty = Variable(ty)
tw = Variable(tw)
th = Variable(th)
tconf = Variable(tconf) obj_mask = Variable(obj_mask.bool())
noobj_mask = Variable(noobj_mask.bool())
# cls_mask = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda())
cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs)
cls_max_ids = cls_max_ids[cls_mask.view(-1)]
tcls = Variable(tcls[cls_mask].long())
cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16) ClassificationLoss = nn.CrossEntropyLoss()
MseLoss = nn.MSELoss()
BceLoss = nn.BCELoss() t3 = time.time() loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask])
loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask])
loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask])
loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask])
loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask])
loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask])
loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj
if len(cls):
loss_cls = self.class_scale * ClassificationLoss(cls, tcls)
else:
loss_cls = Variable(torch.Tensor([0]).float().cuda()) # # pdb.set_trace()
# ids = [9,11,12,16]
# new_cls, new_tcls = select_classes(cls, tcls, ids)
# new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda())
# loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls)
# loss_cls_new *= 10
# loss_cls += loss_cls_new loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls
t4 = time.time()
if False:
print('-----------------------------------')
print(' activation : %f' % (t1 - t0))
print(' create pred_boxes : %f' % (t2 - t1))
print(' build targets : %f' % (t3 - t2))
print(' create loss : %f' % (t4 - t3))
print(' total : %f' % (t4 - t0))
# print(
# '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % \
# (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), \
# loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item()))
# print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0]))
return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals def select_classes(pred, tgt, ids):
# convert tgt to numpy
tgt = tgt.cpu().data.numpy()
new_tgt = [(tgt == d) * i for i, d in enumerate(ids)]
new_tgt = np.max(np.stack(new_tgt), axis=0)
idxes = np.argwhere(new_tgt > 0).squeeze()
new_pred = pred[idxes]
new_pred = new_pred[:, ids]
new_tgt = new_tgt[idxes]
return new_pred, new_tgt

  

few-shot-learning for object detection的更多相关文章

  1. deep learning on object detection

    回归工作一周,忙的头晕,看了两三篇文章,主要在写各种文档和走各种办事流程了-- 这次来写写object detection最近看的三篇文章吧.都不是最近的文章,但是是今年的文章,我也想借此让自己赶快熟 ...

  2. 论文阅读笔记五十三:Libra R-CNN: Towards Balanced Learning for Object Detection(CVPR2019)

    论文原址:https://arxiv.org/pdf/1904.02701.pdf github:https://github.com/OceanPang/Libra_R-CNN 摘要 相比模型的结构 ...

  3. Object detection with deep learning and OpenCV

    目录 Single Shot Detectors for Object Detection Deep learning-based object detection with OpenCV   这篇文 ...

  4. (转)Awesome Object Detection

    Awesome Object Detection 2018-08-10 09:30:40 This blog is copied from: https://github.com/amusi/awes ...

  5. [Arxiv1706] Few-Example Object Detection with Model Communication 论文笔记

    p.p1 { margin: 0.0px 0.0px 0.0px 0.0px; font: 13.0px "Helvetica Neue"; color: #042eee } p. ...

  6. object detection 总结

    1.基础 自己对于YOLOV1,2,3都比较熟悉. RCNN也比较熟悉.这个是自己目前掌握的基础2.第一步 看一下2019年的井喷的anchor free的网络3.第二步 看一下以往,引用多的网路4. ...

  7. 关于目标检测(Object Detection)的文献整理

    本文对CV中目标检测子方向的研究,整理了如下的相关笔记(持续更新中): 1. Cascade R-CNN: Delving into High Quality Object Detection 年份: ...

  8. 论文学习-深度学习目标检测2014至201901综述-Deep Learning for Generic Object Detection A Survey

    目录 写在前面 目标检测任务与挑战 目标检测方法汇总 基础子问题 基于DCNN的特征表示 主干网络(network backbone) Methods For Improving Object Rep ...

  9. 课程四(Convolutional Neural Networks),第三 周(Object detection) —— 0.Learning Goals

    Learning Goals: Understand the challenges of Object Localization, Object Detection and Landmark Find ...

  10. 论文阅读之: Hierarchical Object Detection with Deep Reinforcement Learning

    Hierarchical Object Detection with Deep Reinforcement Learning NIPS 2016 WorkShop  Paper : https://a ...

随机推荐

  1. mysql中where条件查询

    #进阶2:条件查询 /* 语法: SELECT 查询列表 FROM 表名 WHERE 筛选条件: 分类: 一.按条件表达式筛选 条件运算符:> < = <> >= < ...

  2. CYQ.Data 支持 KingbaseES人大金仓数据库

    KingbaseES人大金仓数据库介绍: KingbaseES是一种关系型数据库管理系统,也被称为人大金仓数据库.KingbaseES 是北京人大金仓信息技术股份有限公司研发的,具有自主知识产权的通用 ...

  3. vuecli-vite-vue3-init 项目架子 快速开发 webpack打包

    要vite的开发的快速 和 webpack打包 开发的时候 用vite,可以打包一个本地可以直接双击,不用起服务的代码 这个架子的缺点就是 vite和vuecli 两套双配置 正式公司项目 还是vue ...

  4. Atom 编辑器实时预览 HTML 页面经典方法

    为什么需要这样一个工具?   每次预览 HTML 页面,都需要打开各种浏览器:哪怕不是调试,只是为了查看下效果:切换来切换去,各种刷新,感觉有些浪费时间:以前用过 DW 的实时预览,感觉这个功能很赞: ...

  5. linux shell 字体颜色设置

    使用 echo -e "\033[0;32;40m" 可以将字体设置成绿色. 这里必须使用echo 的选项 "-e",因为后面需要用到转义序列. 转义序列就是一 ...

  6. dotNet8 全局异常处理

    前言 异常的处理在我们应用程序中是至关重要的,在 dotNet 中有很多异常处理的机制,比如MVC的异常筛选器, 管道中间件定义try catch捕获异常处理亦或者第三方的解决方案Hellang.Mi ...

  7. Welcome to YARP - 2.3 配置功能 - 配置过滤器(Configuration Filters)

    目录 Welcome to YARP - 1.认识YARP并搭建反向代理服务 Welcome to YARP - 2.配置功能 2.1 - 配置文件(Configuration Files) 2.2 ...

  8. [极客大挑战 2019]web部分题解(sql部分已完结,其他部分正在更新,出去吃个饭先)

    [极客大挑战 2019]BabySQL 打开环境后有登录界面◕‿◕ 一眼注入,后先试试万能密码: username:admin' or '1'='1 password:1 GG,出大问题,我就会这一招 ...

  9. Vulnhub靶场--EVILBOX: ONE

    环境配置 靶机连接 攻击者主机IP:192.168.47.130 目标主机IP:192.168.47.131 信息搜集 扫描目标主机,发现目标主机开放了22.80端口 ┌──(kali㉿kali)-[ ...

  10. Hong Kong Azure / .NET club first meetup - WPF business value in the financial industry

    The first meeting of the Hong Kong Azure / .NET Club was held on December 29, 2019 at Starbucks, She ...