few-shot-learning for object detection

github https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection

train.py

from __future__ import print_function

import sys

import time

import torch

import torch.nn as nn

import torch.nn.functional as F

import torch.optim as optim

import torch.backends.cudnn as cudnn

from torchvision import datasets, transforms

from torch.autograd import Variable

from tqdm import tqdm

import dataset

import random

import math

import os

from utils import *

from cfg import parse_cfg, cfg

from darknet import Darknet

import pdb

# Training settings

# datacfg = sys.argv[1]

# darknetcfg = parse_cfg(sys.argv[2])

# learnetcfg = parse_cfg(sys.argv[3])

datacfg = "cfg/fewyolov3_voc.data"

darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg")

learnetcfg = parse_cfg("cfg/reweighting_net.cfg")

weightfile = "tmp/000050.weights"

if len(sys.argv) == 5:

    weightfile = sys.argv[4]

data_options = read_data_cfg(datacfg)

net_options = darknetcfg[0]

meta_options = learnetcfg[0]

# Configure options

cfg.config_data(data_options)

cfg.config_meta(meta_options)

cfg.config_net(net_options)

# Parameters

metadict = data_options['meta']

trainlist = data_options['train']

testlist = data_options['valid']

backupdir = data_options['backup']

gpus = data_options['gpus']  # e.g. 0,1,2,3

ngpus = len(gpus.split(','))

num_workers = int(data_options['num_workers'])

batch_size = int(net_options['batch'])

print("batch_size:",batch_size)

max_batches = int(net_options['max_batches'])

learning_rate = float(data_options['learning_rate'])

momentum = float(net_options['momentum'])

decay = float(net_options['decay'])

steps = [float(step) for step in data_options['steps'].split(',')]

scales = [float(scale) for scale in data_options['scales'].split(',')]

# Train parameters

use_cuda = True

seed = int(time.time())

## --------------------------------------------------------------------------

## MAIN

backupdir = cfg.backup

print('logging to ' + backupdir)

if not os.path.exists(backupdir):

    os.makedirs(backupdir)

torch.manual_seed(seed)

if use_cuda:

    os.environ['CUDA_VISIBLE_DEVICES'] = gpus

    torch.cuda.manual_seed(seed)

model = Darknet(darknetcfg, learnetcfg)

region_loss = model.loss

model.print_network()

# if len(sys.argv) == 5:

model.load_weights(weightfile)

###################################################

### Meta-model parameters

region_loss.seen = model.seen

processed_batches = 0 if cfg.tuning else model.seen / batch_size

trainlist = dataset.build_dataset(data_options)

nsamples = len(trainlist)

init_width = model.width

init_height = model.height

init_epoch = 0 if cfg.tuning else model.seen / nsamples

max_epochs = max_batches * batch_size / nsamples + 1

max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs

print(cfg.repeat, nsamples, max_batches, batch_size)

print(num_workers)

kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}

if use_cuda:

    if ngpus > 1:

        model = torch.nn.DataParallel(model).cuda()

    else:

        model = model.cuda()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

def adjust_learning_rate(optimizer, processed_batches):

    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""

    lr = learning_rate

    for i in range(len(steps)):

        scale = scales[i] if i < len(scales) else 1

        if processed_batches >= steps[i]:

            lr = lr * scale

            if processed_batches == steps[i]:

                break

        else:

            break

    for param_group in optimizer.param_groups:

        param_group['lr'] = lr

    return lr

def train(epoch):

    global processed_batches

    t0 = time.time()

    if ngpus > 1:

        cur_model = model.module

    else:

        cur_model = model

    train_loader = torch.utils.data.DataLoader(

        dataset.listDataset(trainlist, shape=(init_width, init_height),

                            shuffle=False,

                            transform=transforms.Compose([

                                transforms.ToTensor(),

                            ]),

                            train=True,

                            seen=cur_model.seen,

                            batch_size=batch_size,

                            num_workers=num_workers),

        batch_size=batch_size, shuffle=False, **kwargs)

    metaset = dataset.MetaDataset(metafiles=metadict, train=True)

    metaloader = torch.utils.data.DataLoader(

        metaset,

        batch_size=metaset.batch_size,

        shuffle=False,

        num_workers=num_workers,

        pin_memory=True

    )

    metaloader = iter(metaloader)

    lr = adjust_learning_rate(optimizer, processed_batches)

    logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr))

    model.train()

    t1 = time.time()

    avg_time = torch.zeros(9)

    with tqdm(total=train_loader.__len__()) as t:

        for batch_idx, (data, target) in enumerate(train_loader):

            metax, mask = metaloader.next()

            t2 = time.time()

            adjust_learning_rate(optimizer, processed_batches)

            processed_batches = processed_batches + 1

            if use_cuda:

                data = data.cuda()

                metax = metax.cuda()

                mask = mask.cuda()

                # target= target.cuda()

            t3 = time.time()

            data, target = Variable(data), Variable(target)

            metax, mask = Variable(metax), Variable(mask)

            t4 = time.time()

            optimizer.zero_grad()

            t5 = time.time()

            output = model(data, metax, mask)

            t6 = time.time()

            region_loss.seen = region_loss.seen + data.data.size(0)

            cur_model.seen = region_loss.seen

            region_loss.input_size = (data.data.size(2), data.data.size(3))

            loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target)

            t.set_description('Epoch %d' % epoch)

            t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls,

                          cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals)

            t.update()

            t7 = time.time()

            loss.backward()

            t8 = time.time()

            optimizer.step()

            t9 = time.time()

            if False and batch_idx > 1:

                avg_time[0] = avg_time[0] + (t2 - t1)

                avg_time[1] = avg_time[1] + (t3 - t2)

                avg_time[2] = avg_time[2] + (t4 - t3)

                avg_time[3] = avg_time[3] + (t5 - t4)

                avg_time[4] = avg_time[4] + (t6 - t5)

                avg_time[5] = avg_time[5] + (t7 - t6)

                avg_time[6] = avg_time[6] + (t8 - t7)

                avg_time[7] = avg_time[7] + (t9 - t8)

                avg_time[8] = avg_time[8] + (t9 - t1)

                print('-------------------------------')

                print('       load data : %f' % (avg_time[0] / (batch_idx)))

                print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))

                print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))

                print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))

                print(' forward feature : %f' % (avg_time[4] / (batch_idx)))

                print('    forward loss : %f' % (avg_time[5] / (batch_idx)))

                print('        backward : %f' % (avg_time[6] / (batch_idx)))

                print('            step : %f' % (avg_time[7] / (batch_idx)))

                print('           total : %f' % (avg_time[8] / (batch_idx)))

            t1 = time.time()

        print('')

        t1 = time.time()

        logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0)))

        if (epoch + 1) % cfg.save_interval == 0:

            logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))

            cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))

init_epoch = int(init_epoch)

max_epochs = int(max_epochs)

print("init_epoch:",init_epoch)

print("max_epochs:",max_epochs)

for epoch in range(init_epoch, max_epochs):

    train(epoch)

region_loss.py

import time

import torch

import math

import torch.nn as nn

import torch.nn.functional as F

import numpy as np

from torch.autograd import Variable

from utils import *

from cfg import cfg

from numbers import Number

from random import random, randint

import pdb

def neg_filter(pred_boxes, target, withids=False):

    assert pred_boxes.size(0) == target.size(0)

    if cfg.neg_ratio == 'full':

        inds = list(range(pred_boxes.size(0)))

    elif isinstance(cfg.neg_ratio, Number):

        flags = torch.sum(target, 1) != 0

        flags = flags.cpu().data.tolist()

        ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))

        if ratio >= 1:

            inds = list(range(pred_boxes.size(0)))

        else:

            flags = [0 if f == 0 and random() > ratio else 1 for f in flags]

            inds = np.argwhere(flags).squeeze()

            pred_boxes, target = pred_boxes[inds], target[inds]

    else:

        raise NotImplementedError('neg_ratio not recognized')

    if withids:

        return pred_boxes, target, inds

    else:

        return pred_boxes, target

def neg_filter_v2(pred_boxes, target, withids=False):

    assert pred_boxes.size(0) == target.size(0)

    if cfg.neg_ratio == 'full':

        inds = list(range(pred_boxes.size(0)))

    elif isinstance(cfg.neg_ratio, Number):

        flags = torch.sum(target, 1) != 0

        flags = flags.cpu().data.tolist()

        ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))

        if ratio >= 1:

            inds = list(range(pred_boxes.size(0)))

        else:

            flags = [0 if f == 0 and random() > ratio else 1 for f in flags]

            if sum(flags) == 0:

                flags[randint(0, len(flags) - 1)] = 1

            inds = np.nonzero(flags)[0]

            pred_boxes, target = pred_boxes[inds], target[inds]

    else:

        raise NotImplementedError('neg_ratio not recognized')

    if withids:

        return pred_boxes, target, inds

    else:

        return pred_boxes, target

def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh):

    nB = target.size(0)

    nA = num_anchors

    # print('anchor_step: ', anchor_step)

    obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0)

    noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1)

    tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    tboxes = target.view(-1, 5)

    nonzero_ind = tboxes[:, 3] > 0

    tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5)

    ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda()

    ind_B = ind_B[nonzero_ind]

    gx = (tboxes[:, 1] * feature_size[1]).float()

    gy = (tboxes[:, 2] * feature_size[0]).float()

    gw = (tboxes[:, 3] * input_size[1]).float()

    gh = (tboxes[:, 4] * input_size[0]).float()

    aw = anchors[:, 0]

    ah = anchors[:, 1]

    nbox = tboxes.size(0)

    gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0)

    anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0)

    ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False)

    best_ious, best_a = ious.max(1)

    gj = gy.long()

    gi = gx.long()

    obj_mask[ind_B, best_a, gj, gi] = 1

    noobj_mask[ind_B, best_a, gj, gi] = 0

    for i, iou in enumerate(ious):

        if (iou > ignore_thresh).sum():

            noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0

    tx[ind_B, best_a, gj, gi] = gx - gx.floor()

    ty[ind_B, best_a, gj, gi] = gy - gy.floor()

    tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0])

    th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1])

    tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float()

    tconf = obj_mask.float()

    pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda()

    conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data

    target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0),

                              (tboxes[:, 2] * input_size[0]).float().unsqueeze(0),

                              gw.unsqueeze(0),

                              gh.unsqueeze(0)], 0)

    iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False)

    conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float()

    detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50

    detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50

    return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75

class RegionLoss(nn.Module):

    def __init__(self, num_classes=0, anchors=[], num_anchors=1):

        super(RegionLoss, self).__init__()

        self.num_classes = num_classes

        self.anchors = anchors

        self.num_anchors = num_anchors

        self.anchor_step = len(anchors) / num_anchors

        self.coord_scale = 1

        self.noobject_scale = 1

        self.object_scale = 5

        self.class_scale = 1

        self.thresh = 0.6

        self.seen = 0

    def forward(self, output, target):

        # import pdb; pdb.set_trace()

        # output : BxAs*(4+1+num_classes)*H*W

        # if target.dim() == 3:

        #     # target : B * n_cls * l

        #     l = target.size(-1)

        #     target = target.permute(1,0,2).contiguous().view(-1, l)

        if target.dim() == 3:

            target = target.view(-1, target.size(-1))

        bef = target.size(0)

        output, target = neg_filter(output, target)

        # print("{}/{}".format(target.size(0), bef))

        t0 = time.time()

        nB = output.data.size(0)

        nA = self.num_anchors

        nC = self.num_classes

        nH = output.data.size(2)

        nW = output.data.size(3)

        output = output.view(nB, nA, (5 + nC), nH, nW)

        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))

        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))

        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)

        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)

        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))

        # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)

        cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))

        cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)

        t1 = time.time()

        pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)

        grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()

        grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()

        anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()

        anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()

        anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)

        anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)

        pred_boxes[0] = x.data + grid_x

        pred_boxes[1] = y.data + grid_y

        pred_boxes[2] = torch.exp(w.data) * anchor_w

        pred_boxes[3] = torch.exp(h.data) * anchor_h

        pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))

        t2 = time.time()

        nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,

                                                                                                    target.data,

                                                                                                    self.anchors, nA,

                                                                                                    nC, \

                                                                                                    nH, nW,

                                                                                                    self.noobject_scale,

                                                                                                    self.object_scale,

                                                                                                    self.thresh,

                                                                                                    self.seen)

        cls_mask = (cls_mask == 1)

        if cfg.metayolo:

            tcls.zero_()

        nProposals = int((conf > 0.25).float().sum().data[0])

        tx = Variable(tx.cuda())

        ty = Variable(ty.cuda())

        tw = Variable(tw.cuda())

        th = Variable(th.cuda())

        tconf = Variable(tconf.cuda())

        tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())

        coord_mask = Variable(coord_mask.cuda())

        conf_mask = Variable(conf_mask.cuda().sqrt())

        cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())

        cls = cls[cls_mask].view(-1, nC)

        t3 = time.time()

        loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0

        loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0

        loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0

        loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0

        loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0

        loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)

        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

        t4 = time.time()

        if False:

            print('-----------------------------------')

            print('        activation : %f' % (t1 - t0))

            print(' create pred_boxes : %f' % (t2 - t1))

            print('     build targets : %f' % (t3 - t2))

            print('       create loss : %f' % (t4 - t3))

            print('             total : %f' % (t4 - t0))

        print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (

            self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],

            loss_conf.data[0], loss_cls.data[0], loss.data[0]))

        return loss

class RegionLossV2(nn.Module):

    """

    Yolo region loss + Softmax classification across meta-inputs

    """

    def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)):

        super(RegionLossV2, self).__init__()

        self.num_classes = num_classes

        self.anchors = anchors

        self.num_anchors = num_anchors

        self.coord_scale = 1

        self.class_scale = 1

        self.obj_scale = 1

        self.noobj_scale = 100

        self.thresh = 0.5

        self.seen = 0

        self.input_size = input_size

        self.feature_scale = [32, 16, 8]

        print('class_scale', self.class_scale)

    def forward(self, output, target):

        # output : (bs*cs, nA*(5+1), N)

        # target : (bs, cs, 50*5)

        # Get all classification prediction

        # pdb.set_trace()

        bs = target.size(0)

        cs = target.size(1)

        nA = self.num_anchors

        nC = self.num_classes

        N = output.data.size(2)

        # feature_size = [[26, 26], [52, 52], [104, 104]]

        cls = output.view(output.size(0), nA, (5 + nC), N)

        cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze()

        cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs)

        cls_conf = F.softmax(cls, 1)

        _, cls_max_ids = torch.max(cls_conf, 1)

        cls_max_ids = cls_max_ids.data

        pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda()

        pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1

        pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N)

        # Rearrange target and perform filtering operation

        target = target.view(-1, target.size(-1))

        # bef = target.size(0)

        output, target, inds = neg_filter_v2(output, target, withids=True)

        counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs))

        # print("{}/{}".format(target.size(0), bef))

        pre_cls_mask = pre_cls_mask[inds]

        t0 = time.time()

        nB = output.data.size(0)

        output = output.view(nB, nA, (5 + nC), N)  # (nB, nA, (5+nC), N)

        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2))  # (nB, nA, N)

        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2))

        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2)

        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2)

        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2))

        # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)

        # cls  = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))

        # cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)

        t1 = time.time()

        pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N)

        grid_x = []

        grid_y = []

        anchor_w = []

        anchor_h = []

        scale = []

        feature_size = []

        for fs in self.feature_scale:

            feature_h = self.input_size[0] // fs

            feature_w = self.input_size[1] // fs

            # print("feature_h:",feature_h)

            # print("feature_w:",feature_w)

            feature_size.append([feature_h, feature_w])

            grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) \

                          .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())

            grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() \

                          .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())

            scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda())

        grid_x = torch.cat(grid_x, 2)  # (nB, nA, N)

        grid_y = torch.cat(grid_y, 2)

        scale = torch.cat(scale, 2)

        for i in range(3):

            aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \

                .index_select(1, torch.LongTensor([0])).cuda()

            ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \

                .index_select(1, torch.LongTensor([1])).cuda()

            anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) \

                            .view(nB, nA, feature_size[i][0] * feature_size[i][1]))

            anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) \

                            .view(nB, nA, feature_size[i][0] * feature_size[i][1]))

        anchor_w = torch.cat(anchor_w, 2)

        anchor_h = torch.cat(anchor_h, 2)

        pred_boxes[0] = (x.data + grid_x) * scale

        pred_boxes[1] = (y.data + grid_y) * scale

        pred_boxes[2] = torch.exp(w.data) * anchor_w

        pred_boxes[3] = torch.exp(h.data) * anchor_h

        pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous())  # (nB, nA, N, 4)

        t2 = time.time()

        nGT = 0

        iou_scores = []

        obj_mask = []

        noobj_mask = []

        tx = []

        ty = []

        tw = []

        th = []

        tconf = []

        tcls = []

        start_N = 0

        detected50 = torch.zeros(0)

        detected75 = torch.zeros(0)

        for imap in range(3):

            nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, \

            tcls_temp, detected50_temp, detected75_temp = build_targets(

                pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :],

                target.data.cuda(),

                conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]],

                torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(),

                nA,

                feature_size[imap],

                self.input_size,

                self.thresh)

            if not len(detected50):

                detected50 = torch.zeros(nGT).cuda()

            if not len(detected75):

                detected75 = torch.zeros(nGT).cuda()

            detected50 += detected50_temp

            detected75 += detected75_temp

            start_N += feature_size[imap][0] * feature_size[imap][1]

            iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

        iou_scores = torch.cat(iou_scores, 2)

        obj_mask = torch.cat(obj_mask, 2)

        noobj_mask = torch.cat(noobj_mask, 2)

        tx = torch.cat(tx, 2)

        ty = torch.cat(ty, 2)

        tw = torch.cat(tw, 2)

        th = torch.cat(th, 2)

        tconf = torch.cat(tconf, 2)

        tcls = torch.cat(tcls, 2)

        # Take care of class mask

        idx_start = 0

        cls_mask_list = []

        tcls_list = []

        for i in range(len(counts)):

            if counts[i] == 0:

                cur_mask = torch.zeros(nA, N).cuda()

                cur_tcls = torch.zeros(nA, N).cuda()

            else:

                cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0)

                cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0)

            cls_mask_list.append(cur_mask)

            tcls_list.append(cur_tcls)

            idx_start += counts[i]

        cls_mask = torch.stack(cls_mask_list)  # (bs, nA, N)

        tcls = torch.stack(tcls_list)

        cls_mask = (cls_mask == 1)

        conf50 = (conf > 0.5).float().data

        iou50 = (iou_scores > 0.5).float()

        detected_mask = conf50 * tconf

        precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)

        detected50 = (detected50 > 0).float()

        detected75 = (detected75 > 0).float()

        recall50 = detected50.sum() / (nGT + 1e-16)

        recall75 = detected75.sum() / (nGT + 1e-16)

        nProposals = int((conf > 0.25).float().sum().item())

        tx = Variable(tx)

        ty = Variable(ty)

        tw = Variable(tw)

        th = Variable(th)

        tconf = Variable(tconf)

        obj_mask = Variable(obj_mask.bool())

        noobj_mask = Variable(noobj_mask.bool())

        # cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda())

        cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs)

        cls_max_ids = cls_max_ids[cls_mask.view(-1)]

        tcls = Variable(tcls[cls_mask].long())

        cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16)

        ClassificationLoss = nn.CrossEntropyLoss()

        MseLoss = nn.MSELoss()

        BceLoss = nn.BCELoss()

        t3 = time.time()

        loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask])

        loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask])

        loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask])

        loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask])

        loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask])

        loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask])

        loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj

        if len(cls):

            loss_cls = self.class_scale * ClassificationLoss(cls, tcls)

        else:

            loss_cls = Variable(torch.Tensor([0]).float().cuda())

        # # pdb.set_trace()

        # ids = [9,11,12,16]

        # new_cls, new_tcls = select_classes(cls, tcls, ids)

        # new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda())

        # loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls)

        # loss_cls_new *= 10

        # loss_cls += loss_cls_new

        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

        t4 = time.time()

        if False:

            print('-----------------------------------')

            print('        activation : %f' % (t1 - t0))

            print(' create pred_boxes : %f' % (t2 - t1))

            print('     build targets : %f' % (t3 - t2))

            print('       create loss : %f' % (t4 - t3))

            print('             total : %f' % (t4 - t0))

        # print(

        #     '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % \

        #     (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), \

        #      loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item()))

        # print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0]))

        return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals

def select_classes(pred, tgt, ids):

    # convert tgt to numpy

    tgt = tgt.cpu().data.numpy()

    new_tgt = [(tgt == d) * i for i, d in enumerate(ids)]

    new_tgt = np.max(np.stack(new_tgt), axis=0)

    idxes = np.argwhere(new_tgt > 0).squeeze()

    new_pred = pred[idxes]

    new_pred = new_pred[:, ids]

    new_tgt = new_tgt[idxes]

    return new_pred, new_tgt

few-shot-learning for object detection的更多相关文章

deep learning on object detection
回归工作一周,忙的头晕,看了两三篇文章,主要在写各种文档和走各种办事流程了-- 这次来写写object detection最近看的三篇文章吧.都不是最近的文章,但是是今年的文章,我也想借此让自己赶快熟 ...
论文阅读笔记五十三：Libra R-CNN: Towards Balanced Learning for Object Detection(CVPR2019)
论文原址:https://arxiv.org/pdf/1904.02701.pdf github:https://github.com/OceanPang/Libra_R-CNN 摘要相比模型的结构 ...
Object detection with deep learning and OpenCV
目录 Single Shot Detectors for Object Detection Deep learning-based object detection with OpenCV 这篇文 ...
（转）Awesome Object Detection
Awesome Object Detection 2018-08-10 09:30:40 This blog is copied from: https://github.com/amusi/awes ...
[Arxiv1706] Few-Example Object Detection with Model Communication 论文笔记
p.p1 { margin: 0.0px 0.0px 0.0px 0.0px; font: 13.0px "Helvetica Neue"; color: #042eee } p. ...
object detection 总结
1.基础自己对于YOLOV1,2,3都比较熟悉. RCNN也比较熟悉.这个是自己目前掌握的基础2.第一步看一下2019年的井喷的anchor free的网络3.第二步看一下以往,引用多的网路4. ...
关于目标检测(Object Detection)的文献整理
本文对CV中目标检测子方向的研究,整理了如下的相关笔记(持续更新中): 1. Cascade R-CNN: Delving into High Quality Object Detection 年份: ...
论文学习-深度学习目标检测2014至201901综述-Deep Learning for Generic Object Detection A Survey
目录写在前面目标检测任务与挑战目标检测方法汇总基础子问题基于DCNN的特征表示主干网络(network backbone) Methods For Improving Object Rep ...
课程四(Convolutional Neural Networks)，第三周（Object detection） —— 0.Learning Goals
Learning Goals: Understand the challenges of Object Localization, Object Detection and Landmark Find ...
论文阅读之: Hierarchical Object Detection with Deep Reinforcement Learning
Hierarchical Object Detection with Deep Reinforcement Learning NIPS 2016 WorkShop Paper : https://a ...

随机推荐

mysql中where条件查询
#进阶2:条件查询 /* 语法: SELECT 查询列表 FROM 表名 WHERE 筛选条件: 分类: 一.按条件表达式筛选条件运算符:> < = <> >= < ...
CYQ.Data 支持 KingbaseES人大金仓数据库
KingbaseES人大金仓数据库介绍: KingbaseES是一种关系型数据库管理系统,也被称为人大金仓数据库.KingbaseES 是北京人大金仓信息技术股份有限公司研发的,具有自主知识产权的通用 ...
vuecli-vite-vue3-init 项目架子快速开发 webpack打包
要vite的开发的快速和 webpack打包开发的时候用vite,可以打包一个本地可以直接双击,不用起服务的代码这个架子的缺点就是 vite和vuecli 两套双配置正式公司项目还是vue ...
Atom 编辑器实时预览 HTML 页面经典方法
为什么需要这样一个工具? 每次预览 HTML 页面,都需要打开各种浏览器:哪怕不是调试,只是为了查看下效果:切换来切换去,各种刷新,感觉有些浪费时间:以前用过 DW 的实时预览,感觉这个功能很赞: ...
linux shell 字体颜色设置
使用 echo -e "\033[0;32;40m" 可以将字体设置成绿色. 这里必须使用echo 的选项 "-e",因为后面需要用到转义序列. 转义序列就是一 ...
dotNet8 全局异常处理
前言异常的处理在我们应用程序中是至关重要的,在 dotNet 中有很多异常处理的机制,比如MVC的异常筛选器, 管道中间件定义try catch捕获异常处理亦或者第三方的解决方案Hellang.Mi ...
Welcome to YARP - 2.3 配置功能 - 配置过滤器(Configuration Filters)
目录 Welcome to YARP - 1.认识YARP并搭建反向代理服务 Welcome to YARP - 2.配置功能 2.1 - 配置文件(Configuration Files) 2.2 ...
[极客大挑战 2019]web部分题解（sql部分已完结，其他部分正在更新，出去吃个饭先）
[极客大挑战 2019]BabySQL 打开环境后有登录界面◕‿◕ 一眼注入,后先试试万能密码: username:admin' or '1'='1 password:1 GG,出大问题,我就会这一招 ...
Vulnhub靶场--EVILBOX: ONE
环境配置靶机连接攻击者主机IP:192.168.47.130 目标主机IP:192.168.47.131 信息搜集扫描目标主机,发现目标主机开放了22.80端口 ┌──(kali㉿kali)-[ ...
Hong Kong Azure / .NET club first meetup - WPF business value in the financial industry
The first meeting of the Hong Kong Azure / .NET Club was held on December 29, 2019 at Starbucks, She ...

few-shot-learning for object detection

few-shot-learning for object detection的更多相关文章

随机推荐

热门专题