few-shot-learning for object detection

github https://github.com/LiuXinyu12378/few-shot-learning-for-object-detection

train.py

from __future__ import print_function

import sys

import time

import torch

import torch.nn as nn

import torch.nn.functional as F

import torch.optim as optim

import torch.backends.cudnn as cudnn

from torchvision import datasets, transforms

from torch.autograd import Variable

from tqdm import tqdm

import dataset

import random

import math

import os

from utils import *

from cfg import parse_cfg, cfg

from darknet import Darknet

import pdb

# Training settings

# datacfg = sys.argv[1]

# darknetcfg = parse_cfg(sys.argv[2])

# learnetcfg = parse_cfg(sys.argv[3])

datacfg = "cfg/fewyolov3_voc.data"

darknetcfg = parse_cfg("cfg/darknet_yolov3_spp.cfg")

learnetcfg = parse_cfg("cfg/reweighting_net.cfg")

weightfile = "tmp/000050.weights"

if len(sys.argv) == 5:

    weightfile = sys.argv[4]

data_options = read_data_cfg(datacfg)

net_options = darknetcfg[0]

meta_options = learnetcfg[0]

# Configure options

cfg.config_data(data_options)

cfg.config_meta(meta_options)

cfg.config_net(net_options)

# Parameters

metadict = data_options['meta']

trainlist = data_options['train']

testlist = data_options['valid']

backupdir = data_options['backup']

gpus = data_options['gpus']  # e.g. 0,1,2,3

ngpus = len(gpus.split(','))

num_workers = int(data_options['num_workers'])

batch_size = int(net_options['batch'])

print("batch_size:",batch_size)

max_batches = int(net_options['max_batches'])

learning_rate = float(data_options['learning_rate'])

momentum = float(net_options['momentum'])

decay = float(net_options['decay'])

steps = [float(step) for step in data_options['steps'].split(',')]

scales = [float(scale) for scale in data_options['scales'].split(',')]

# Train parameters

use_cuda = True

seed = int(time.time())

## --------------------------------------------------------------------------

## MAIN

backupdir = cfg.backup

print('logging to ' + backupdir)

if not os.path.exists(backupdir):

    os.makedirs(backupdir)

torch.manual_seed(seed)

if use_cuda:

    os.environ['CUDA_VISIBLE_DEVICES'] = gpus

    torch.cuda.manual_seed(seed)

model = Darknet(darknetcfg, learnetcfg)

region_loss = model.loss

model.print_network()

# if len(sys.argv) == 5:

model.load_weights(weightfile)

###################################################

### Meta-model parameters

region_loss.seen = model.seen

processed_batches = 0 if cfg.tuning else model.seen / batch_size

trainlist = dataset.build_dataset(data_options)

nsamples = len(trainlist)

init_width = model.width

init_height = model.height

init_epoch = 0 if cfg.tuning else model.seen / nsamples

max_epochs = max_batches * batch_size / nsamples + 1

max_epochs = int(math.ceil(cfg.max_epoch * 1. / cfg.repeat)) if cfg.tuning else max_epochs

print(cfg.repeat, nsamples, max_batches, batch_size)

print(num_workers)

kwargs = {'num_workers': num_workers, 'pin_memory': True} if use_cuda else {}

if use_cuda:

    if ngpus > 1:

        model = torch.nn.DataParallel(model).cuda()

    else:

        model = model.cuda()

optimizer = optim.Adam(model.parameters(), lr=learning_rate)

def adjust_learning_rate(optimizer, processed_batches):

    """Sets the learning rate to the initial LR decayed by 10 every 30 epochs"""

    lr = learning_rate

    for i in range(len(steps)):

        scale = scales[i] if i < len(scales) else 1

        if processed_batches >= steps[i]:

            lr = lr * scale

            if processed_batches == steps[i]:

                break

        else:

            break

    for param_group in optimizer.param_groups:

        param_group['lr'] = lr

    return lr

def train(epoch):

    global processed_batches

    t0 = time.time()

    if ngpus > 1:

        cur_model = model.module

    else:

        cur_model = model

    train_loader = torch.utils.data.DataLoader(

        dataset.listDataset(trainlist, shape=(init_width, init_height),

                            shuffle=False,

                            transform=transforms.Compose([

                                transforms.ToTensor(),

                            ]),

                            train=True,

                            seen=cur_model.seen,

                            batch_size=batch_size,

                            num_workers=num_workers),

        batch_size=batch_size, shuffle=False, **kwargs)

    metaset = dataset.MetaDataset(metafiles=metadict, train=True)

    metaloader = torch.utils.data.DataLoader(

        metaset,

        batch_size=metaset.batch_size,

        shuffle=False,

        num_workers=num_workers,

        pin_memory=True

    )

    metaloader = iter(metaloader)

    lr = adjust_learning_rate(optimizer, processed_batches)

    logging('epoch %d/%d, processed %d samples, lr %e' % (epoch, max_epochs, epoch * len(train_loader.dataset), lr))

    model.train()

    t1 = time.time()

    avg_time = torch.zeros(9)

    with tqdm(total=train_loader.__len__()) as t:

        for batch_idx, (data, target) in enumerate(train_loader):

            metax, mask = metaloader.next()

            t2 = time.time()

            adjust_learning_rate(optimizer, processed_batches)

            processed_batches = processed_batches + 1

            if use_cuda:

                data = data.cuda()

                metax = metax.cuda()

                mask = mask.cuda()

                # target= target.cuda()

            t3 = time.time()

            data, target = Variable(data), Variable(target)

            metax, mask = Variable(metax), Variable(mask)

            t4 = time.time()

            optimizer.zero_grad()

            t5 = time.time()

            output = model(data, metax, mask)

            t6 = time.time()

            region_loss.seen = region_loss.seen + data.data.size(0)

            cur_model.seen = region_loss.seen

            region_loss.input_size = (data.data.size(2), data.data.size(3))

            loss,loss_box,loss_conf,loss_cls,cls_acc,recall50,recall75,nProposals = region_loss(output, target)

            t.set_description('Epoch %d' % epoch)

            t.set_postfix(loss=loss.item(), loss_bbox=loss_box,loss_conf=loss_conf,loss_cls=loss_cls,

                          cls_acc=cls_acc, recall50=recall50, recall75=recall75,Proposals=nProposals)

            t.update()

            t7 = time.time()

            loss.backward()

            t8 = time.time()

            optimizer.step()

            t9 = time.time()

            if False and batch_idx > 1:

                avg_time[0] = avg_time[0] + (t2 - t1)

                avg_time[1] = avg_time[1] + (t3 - t2)

                avg_time[2] = avg_time[2] + (t4 - t3)

                avg_time[3] = avg_time[3] + (t5 - t4)

                avg_time[4] = avg_time[4] + (t6 - t5)

                avg_time[5] = avg_time[5] + (t7 - t6)

                avg_time[6] = avg_time[6] + (t8 - t7)

                avg_time[7] = avg_time[7] + (t9 - t8)

                avg_time[8] = avg_time[8] + (t9 - t1)

                print('-------------------------------')

                print('       load data : %f' % (avg_time[0] / (batch_idx)))

                print('     cpu to cuda : %f' % (avg_time[1] / (batch_idx)))

                print('cuda to variable : %f' % (avg_time[2] / (batch_idx)))

                print('       zero_grad : %f' % (avg_time[3] / (batch_idx)))

                print(' forward feature : %f' % (avg_time[4] / (batch_idx)))

                print('    forward loss : %f' % (avg_time[5] / (batch_idx)))

                print('        backward : %f' % (avg_time[6] / (batch_idx)))

                print('            step : %f' % (avg_time[7] / (batch_idx)))

                print('           total : %f' % (avg_time[8] / (batch_idx)))

            t1 = time.time()

        print('')

        t1 = time.time()

        logging('training with %f samples/s' % (len(train_loader.dataset) / (t1 - t0)))

        if (epoch + 1) % cfg.save_interval == 0:

            logging('save weights to %s/%06d.weights' % (backupdir, epoch + 1))

            cur_model.save_weights('%s/%06d.weights' % (backupdir, epoch + 1))

init_epoch = int(init_epoch)

max_epochs = int(max_epochs)

print("init_epoch:",init_epoch)

print("max_epochs:",max_epochs)

for epoch in range(init_epoch, max_epochs):

    train(epoch)

region_loss.py

import time

import torch

import math

import torch.nn as nn

import torch.nn.functional as F

import numpy as np

from torch.autograd import Variable

from utils import *

from cfg import cfg

from numbers import Number

from random import random, randint

import pdb

def neg_filter(pred_boxes, target, withids=False):

    assert pred_boxes.size(0) == target.size(0)

    if cfg.neg_ratio == 'full':

        inds = list(range(pred_boxes.size(0)))

    elif isinstance(cfg.neg_ratio, Number):

        flags = torch.sum(target, 1) != 0

        flags = flags.cpu().data.tolist()

        ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))

        if ratio >= 1:

            inds = list(range(pred_boxes.size(0)))

        else:

            flags = [0 if f == 0 and random() > ratio else 1 for f in flags]

            inds = np.argwhere(flags).squeeze()

            pred_boxes, target = pred_boxes[inds], target[inds]

    else:

        raise NotImplementedError('neg_ratio not recognized')

    if withids:

        return pred_boxes, target, inds

    else:

        return pred_boxes, target

def neg_filter_v2(pred_boxes, target, withids=False):

    assert pred_boxes.size(0) == target.size(0)

    if cfg.neg_ratio == 'full':

        inds = list(range(pred_boxes.size(0)))

    elif isinstance(cfg.neg_ratio, Number):

        flags = torch.sum(target, 1) != 0

        flags = flags.cpu().data.tolist()

        ratio = cfg.neg_ratio * sum(flags) * 1. / (len(flags) - sum(flags))

        if ratio >= 1:

            inds = list(range(pred_boxes.size(0)))

        else:

            flags = [0 if f == 0 and random() > ratio else 1 for f in flags]

            if sum(flags) == 0:

                flags[randint(0, len(flags) - 1)] = 1

            inds = np.nonzero(flags)[0]

            pred_boxes, target = pred_boxes[inds], target[inds]

    else:

        raise NotImplementedError('neg_ratio not recognized')

    if withids:

        return pred_boxes, target, inds

    else:

        return pred_boxes, target

def build_targets(pred_boxes, target, conf, anchors, num_anchors, feature_size, input_size, ignore_thresh):

    nB = target.size(0)

    nA = num_anchors

    # print('anchor_step: ', anchor_step)

    obj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(0)

    noobj_mask = torch.cuda.ByteTensor(nB, nA, feature_size[0], feature_size[1]).fill_(1)

    tx = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    ty = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    tw = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    th = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    tcls = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    iou_scores = torch.zeros(nB, nA, feature_size[0], feature_size[1]).cuda()

    tboxes = target.view(-1, 5)

    nonzero_ind = tboxes[:, 3] > 0

    tboxes = tboxes[nonzero_ind.unsqueeze(1).repeat(1, 5)].view(-1, 5)

    ind_B = torch.linspace(0, nB - 1, nB).unsqueeze(1).repeat(1, 50).view(-1).long().cuda()

    ind_B = ind_B[nonzero_ind]

    gx = (tboxes[:, 1] * feature_size[1]).float()

    gy = (tboxes[:, 2] * feature_size[0]).float()

    gw = (tboxes[:, 3] * input_size[1]).float()

    gh = (tboxes[:, 4] * input_size[0]).float()

    aw = anchors[:, 0]

    ah = anchors[:, 1]

    nbox = tboxes.size(0)

    gt_box = torch.cat([torch.zeros(1, nbox).cuda(), torch.zeros(1, nbox).cuda(), gw.unsqueeze(0), gh.unsqueeze(0)], 0)

    anchor_box = torch.cat([torch.zeros(1, nA).cuda(), torch.zeros(1, nA).cuda(), aw.unsqueeze(0), ah.unsqueeze(0)], 0)

    ious = bbox_ious(gt_box.unsqueeze(2).repeat(1, 1, nA), anchor_box.unsqueeze(1).repeat(1, nbox, 1), x1y1x2y2=False)

    best_ious, best_a = ious.max(1)

    gj = gy.long()

    gi = gx.long()

    obj_mask[ind_B, best_a, gj, gi] = 1

    noobj_mask[ind_B, best_a, gj, gi] = 0

    for i, iou in enumerate(ious):

        if (iou > ignore_thresh).sum():

            noobj_mask[ind_B[i:i + 1], (iou > ignore_thresh).nonzero().squeeze(1), gj[i:i + 1], gi[i:i + 1]] = 0

    tx[ind_B, best_a, gj, gi] = gx - gx.floor()

    ty[ind_B, best_a, gj, gi] = gy - gy.floor()

    tw[ind_B, best_a, gj, gi] = torch.log(gw / anchors[best_a][:, 0])

    th[ind_B, best_a, gj, gi] = torch.log(gh / anchors[best_a][:, 1])

    tcls[ind_B, best_a, gj, gi] = tboxes[:, 0].float()

    tconf = obj_mask.float()

    pred_boxes = pred_boxes.contiguous().view(nB, nA, feature_size[0], feature_size[1], 4).cuda()

    conf = conf.contiguous().view(nB, nA, feature_size[0], feature_size[1]).data

    target_boxes = torch.cat([(tboxes[:, 1] * input_size[1]).float().unsqueeze(0),

                              (tboxes[:, 2] * input_size[0]).float().unsqueeze(0),

                              gw.unsqueeze(0),

                              gh.unsqueeze(0)], 0)

    iou_scores[ind_B, best_a, gj, gi] = bbox_ious(pred_boxes[ind_B, best_a, gj, gi].t(), target_boxes, x1y1x2y2=False)

    conf50 = (conf[ind_B, best_a, gj, gi] > 0.5).float()

    detected50 = (iou_scores[ind_B, best_a, gj, gi] > 0.5).float() * conf50

    detected75 = (iou_scores[ind_B, best_a, gj, gi] > 0.75).float() * conf50

    return nbox, iou_scores, obj_mask, noobj_mask, tx, ty, tw, th, tconf, tcls, detected50, detected75

class RegionLoss(nn.Module):

    def __init__(self, num_classes=0, anchors=[], num_anchors=1):

        super(RegionLoss, self).__init__()

        self.num_classes = num_classes

        self.anchors = anchors

        self.num_anchors = num_anchors

        self.anchor_step = len(anchors) / num_anchors

        self.coord_scale = 1

        self.noobject_scale = 1

        self.object_scale = 5

        self.class_scale = 1

        self.thresh = 0.6

        self.seen = 0

    def forward(self, output, target):

        # import pdb; pdb.set_trace()

        # output : BxAs*(4+1+num_classes)*H*W

        # if target.dim() == 3:

        #     # target : B * n_cls * l

        #     l = target.size(-1)

        #     target = target.permute(1,0,2).contiguous().view(-1, l)

        if target.dim() == 3:

            target = target.view(-1, target.size(-1))

        bef = target.size(0)

        output, target = neg_filter(output, target)

        # print("{}/{}".format(target.size(0), bef))

        t0 = time.time()

        nB = output.data.size(0)

        nA = self.num_anchors

        nC = self.num_classes

        nH = output.data.size(2)

        nW = output.data.size(3)

        output = output.view(nB, nA, (5 + nC), nH, nW)

        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).view(nB, nA, nH, nW))

        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).view(nB, nA, nH, nW))

        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).view(nB, nA, nH, nW)

        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).view(nB, nA, nH, nW)

        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).view(nB, nA, nH, nW))

        # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)

        cls = output.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda()))

        cls = cls.view(nB * nA, nC, nH * nW).transpose(1, 2).contiguous().view(nB * nA * nH * nW, nC)

        t1 = time.time()

        pred_boxes = torch.cuda.FloatTensor(4, nB * nA * nH * nW)

        grid_x = torch.linspace(0, nW - 1, nW).repeat(nH, 1).repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()

        grid_y = torch.linspace(0, nH - 1, nH).repeat(nW, 1).t().repeat(nB * nA, 1, 1).view(nB * nA * nH * nW).cuda()

        anchor_w = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([0])).cuda()

        anchor_h = torch.Tensor(self.anchors).view(nA, self.anchor_step).index_select(1, torch.LongTensor([1])).cuda()

        anchor_w = anchor_w.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)

        anchor_h = anchor_h.repeat(nB, 1).repeat(1, 1, nH * nW).view(nB * nA * nH * nW)

        pred_boxes[0] = x.data + grid_x

        pred_boxes[1] = y.data + grid_y

        pred_boxes[2] = torch.exp(w.data) * anchor_w

        pred_boxes[3] = torch.exp(h.data) * anchor_h

        pred_boxes = convert2cpu(pred_boxes.transpose(0, 1).contiguous().view(-1, 4))

        t2 = time.time()

        nGT, nCorrect, coord_mask, conf_mask, cls_mask, tx, ty, tw, th, tconf, tcls = build_targets(pred_boxes,

                                                                                                    target.data,

                                                                                                    self.anchors, nA,

                                                                                                    nC, \

                                                                                                    nH, nW,

                                                                                                    self.noobject_scale,

                                                                                                    self.object_scale,

                                                                                                    self.thresh,

                                                                                                    self.seen)

        cls_mask = (cls_mask == 1)

        if cfg.metayolo:

            tcls.zero_()

        nProposals = int((conf > 0.25).float().sum().data[0])

        tx = Variable(tx.cuda())

        ty = Variable(ty.cuda())

        tw = Variable(tw.cuda())

        th = Variable(th.cuda())

        tconf = Variable(tconf.cuda())

        tcls = Variable(tcls.view(-1)[cls_mask].long().cuda())

        coord_mask = Variable(coord_mask.cuda())

        conf_mask = Variable(conf_mask.cuda().sqrt())

        cls_mask = Variable(cls_mask.view(-1, 1).repeat(1, nC).cuda())

        cls = cls[cls_mask].view(-1, nC)

        t3 = time.time()

        loss_x = self.coord_scale * nn.MSELoss(size_average=False)(x * coord_mask, tx * coord_mask) / 2.0

        loss_y = self.coord_scale * nn.MSELoss(size_average=False)(y * coord_mask, ty * coord_mask) / 2.0

        loss_w = self.coord_scale * nn.MSELoss(size_average=False)(w * coord_mask, tw * coord_mask) / 2.0

        loss_h = self.coord_scale * nn.MSELoss(size_average=False)(h * coord_mask, th * coord_mask) / 2.0

        loss_conf = nn.MSELoss(size_average=False)(conf * conf_mask, tconf * conf_mask) / 2.0

        loss_cls = self.class_scale * nn.CrossEntropyLoss(size_average=False)(cls, tcls)

        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

        t4 = time.time()

        if False:

            print('-----------------------------------')

            print('        activation : %f' % (t1 - t0))

            print(' create pred_boxes : %f' % (t2 - t1))

            print('     build targets : %f' % (t3 - t2))

            print('       create loss : %f' % (t4 - t3))

            print('             total : %f' % (t4 - t0))

        print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % (

            self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0],

            loss_conf.data[0], loss_cls.data[0], loss.data[0]))

        return loss

class RegionLossV2(nn.Module):

    """

    Yolo region loss + Softmax classification across meta-inputs

    """

    def __init__(self, num_classes=0, anchors=[], num_anchors=1, input_size=(832, 832)):

        super(RegionLossV2, self).__init__()

        self.num_classes = num_classes

        self.anchors = anchors

        self.num_anchors = num_anchors

        self.coord_scale = 1

        self.class_scale = 1

        self.obj_scale = 1

        self.noobj_scale = 100

        self.thresh = 0.5

        self.seen = 0

        self.input_size = input_size

        self.feature_scale = [32, 16, 8]

        print('class_scale', self.class_scale)

    def forward(self, output, target):

        # output : (bs*cs, nA*(5+1), N)

        # target : (bs, cs, 50*5)

        # Get all classification prediction

        # pdb.set_trace()

        bs = target.size(0)

        cs = target.size(1)

        nA = self.num_anchors

        nC = self.num_classes

        N = output.data.size(2)

        # feature_size = [[26, 26], [52, 52], [104, 104]]

        cls = output.view(output.size(0), nA, (5 + nC), N)

        cls = cls.index_select(2, Variable(torch.linspace(5, 5 + nC - 1, nC).long().cuda())).squeeze()

        cls = cls.view(bs, cs, nA * N).transpose(1, 2).contiguous().view(bs * nA * N, cs)

        cls_conf = F.softmax(cls, 1)

        _, cls_max_ids = torch.max(cls_conf, 1)

        cls_max_ids = cls_max_ids.data

        pre_cls_mask = torch.zeros(bs * nA * N, cs).cuda()

        pre_cls_mask[torch.linspace(0, bs * nA * N - 1, bs * nA * N).long().cuda(), cls_max_ids] = 1

        pre_cls_mask = pre_cls_mask.view(bs, nA * N, cs).transpose(1, 2).contiguous().view(bs * cs, nA, N)

        # Rearrange target and perform filtering operation

        target = target.view(-1, target.size(-1))

        # bef = target.size(0)

        output, target, inds = neg_filter_v2(output, target, withids=True)

        counts, _ = np.histogram(inds, bins=bs, range=(0, bs * cs))

        # print("{}/{}".format(target.size(0), bef))

        pre_cls_mask = pre_cls_mask[inds]

        t0 = time.time()

        nB = output.data.size(0)

        output = output.view(nB, nA, (5 + nC), N)  # (nB, nA, (5+nC), N)

        x = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([0]))).squeeze(2))  # (nB, nA, N)

        y = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([1]))).squeeze(2))

        w = output.index_select(2, Variable(torch.cuda.LongTensor([2]))).squeeze(2)

        h = output.index_select(2, Variable(torch.cuda.LongTensor([3]))).squeeze(2)

        conf = F.sigmoid(output.index_select(2, Variable(torch.cuda.LongTensor([4]))).squeeze(2))

        # [nB, nA, nC, nW, nH] | (bs, 5, 1, 13, 13)

        # cls  = output.index_select(2, Variable(torch.linspace(5,5+nC-1,nC).long().cuda()))

        # cls  = cls.view(nB*nA, nC, nH*nW).transpose(1,2).contiguous().view(nB*nA*nH*nW, nC)

        t1 = time.time()

        pred_boxes = torch.cuda.FloatTensor(4, nB, nA, N)

        grid_x = []

        grid_y = []

        anchor_w = []

        anchor_h = []

        scale = []

        feature_size = []

        for fs in self.feature_scale:

            feature_h = self.input_size[0] // fs

            feature_w = self.input_size[1] // fs

            # print("feature_h:",feature_h)

            # print("feature_w:",feature_w)

            feature_size.append([feature_h, feature_w])

            grid_x.append(torch.linspace(0, feature_w - 1, feature_w).repeat(feature_h, 1) \

                          .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())

            grid_y.append(torch.linspace(0, feature_h - 1, feature_h).repeat(feature_w, 1).t() \

                          .repeat(nB * nA, 1, 1).view(nB, nA, feature_h * feature_w).cuda())

            scale.append((torch.ones(nB, nA, feature_h * feature_w) * fs).cuda())

        grid_x = torch.cat(grid_x, 2)  # (nB, nA, N)

        grid_y = torch.cat(grid_y, 2)

        scale = torch.cat(scale, 2)

        for i in range(3):

            aw = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \

                .index_select(1, torch.LongTensor([0])).cuda()

            ah = torch.Tensor(self.anchors[6 * i:6 * (i + 1)]).view(nA, -1) \

                .index_select(1, torch.LongTensor([1])).cuda()

            anchor_w.append(aw.repeat(nB, feature_size[i][0] * feature_size[i][1]) \

                            .view(nB, nA, feature_size[i][0] * feature_size[i][1]))

            anchor_h.append(ah.repeat(nB, feature_size[i][0] * feature_size[i][1]) \

                            .view(nB, nA, feature_size[i][0] * feature_size[i][1]))

        anchor_w = torch.cat(anchor_w, 2)

        anchor_h = torch.cat(anchor_h, 2)

        pred_boxes[0] = (x.data + grid_x) * scale

        pred_boxes[1] = (y.data + grid_y) * scale

        pred_boxes[2] = torch.exp(w.data) * anchor_w

        pred_boxes[3] = torch.exp(h.data) * anchor_h

        pred_boxes = convert2cpu(pred_boxes.permute(1, 2, 3, 0).contiguous())  # (nB, nA, N, 4)

        t2 = time.time()

        nGT = 0

        iou_scores = []

        obj_mask = []

        noobj_mask = []

        tx = []

        ty = []

        tw = []

        th = []

        tconf = []

        tcls = []

        start_N = 0

        detected50 = torch.zeros(0)

        detected75 = torch.zeros(0)

        for imap in range(3):

            nGT, iou_scores_temp, obj_mask_temp, noobj_mask_temp, tx_temp, ty_temp, tw_temp, th_temp, tconf_temp, \

            tcls_temp, detected50_temp, detected75_temp = build_targets(

                pred_boxes[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1], :],

                target.data.cuda(),

                conf[:, :, start_N:start_N + feature_size[imap][0] * feature_size[imap][1]],

                torch.Tensor(self.anchors[6 * imap:6 * (imap + 1)]).view(nA, -1).cuda(),

                nA,

                feature_size[imap],

                self.input_size,

                self.thresh)

            if not len(detected50):

                detected50 = torch.zeros(nGT).cuda()

            if not len(detected75):

                detected75 = torch.zeros(nGT).cuda()

            detected50 += detected50_temp

            detected75 += detected75_temp

            start_N += feature_size[imap][0] * feature_size[imap][1]

            iou_scores.append(iou_scores_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            obj_mask.append(obj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            noobj_mask.append(noobj_mask_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            tx.append(tx_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            ty.append(ty_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            tw.append(tw_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            th.append(th_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            tconf.append(tconf_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

            tcls.append(tcls_temp.view(nB, nA, feature_size[imap][0] * feature_size[imap][1]))

        iou_scores = torch.cat(iou_scores, 2)

        obj_mask = torch.cat(obj_mask, 2)

        noobj_mask = torch.cat(noobj_mask, 2)

        tx = torch.cat(tx, 2)

        ty = torch.cat(ty, 2)

        tw = torch.cat(tw, 2)

        th = torch.cat(th, 2)

        tconf = torch.cat(tconf, 2)

        tcls = torch.cat(tcls, 2)

        # Take care of class mask

        idx_start = 0

        cls_mask_list = []

        tcls_list = []

        for i in range(len(counts)):

            if counts[i] == 0:

                cur_mask = torch.zeros(nA, N).cuda()

                cur_tcls = torch.zeros(nA, N).cuda()

            else:

                cur_mask = torch.sum(obj_mask[idx_start:idx_start + counts[i]].float(), dim=0)

                cur_tcls = torch.sum(tcls[idx_start:idx_start + counts[i]], dim=0)

            cls_mask_list.append(cur_mask)

            tcls_list.append(cur_tcls)

            idx_start += counts[i]

        cls_mask = torch.stack(cls_mask_list)  # (bs, nA, N)

        tcls = torch.stack(tcls_list)

        cls_mask = (cls_mask == 1)

        conf50 = (conf > 0.5).float().data

        iou50 = (iou_scores > 0.5).float()

        detected_mask = conf50 * tconf

        precision = torch.sum(iou50 * detected_mask) / (conf50.sum() + 1e-16)

        detected50 = (detected50 > 0).float()

        detected75 = (detected75 > 0).float()

        recall50 = detected50.sum() / (nGT + 1e-16)

        recall75 = detected75.sum() / (nGT + 1e-16)

        nProposals = int((conf > 0.25).float().sum().item())

        tx = Variable(tx)

        ty = Variable(ty)

        tw = Variable(tw)

        th = Variable(th)

        tconf = Variable(tconf)

        obj_mask = Variable(obj_mask.bool())

        noobj_mask = Variable(noobj_mask.bool())

        # cls_mask   = Variable(cls_mask.view(-1, 1).repeat(1,cs).cuda())

        cls = cls[Variable(cls_mask.view(-1, 1).repeat(1, cs))].view(-1, cs)

        cls_max_ids = cls_max_ids[cls_mask.view(-1)]

        tcls = Variable(tcls[cls_mask].long())

        cls_acc = float(torch.sum(cls_max_ids == tcls.data)) / (cls_max_ids.numel() + 1e-16)

        ClassificationLoss = nn.CrossEntropyLoss()

        MseLoss = nn.MSELoss()

        BceLoss = nn.BCELoss()

        t3 = time.time()

        loss_x = self.coord_scale * MseLoss(x[obj_mask], tx[obj_mask])

        loss_y = self.coord_scale * MseLoss(y[obj_mask], ty[obj_mask])

        loss_w = self.coord_scale * MseLoss(w[obj_mask], tw[obj_mask])

        loss_h = self.coord_scale * MseLoss(h[obj_mask], th[obj_mask])

        loss_conf_obj = BceLoss(conf[obj_mask], tconf[obj_mask])

        loss_conf_noobj = BceLoss(conf[noobj_mask], tconf[noobj_mask])

        loss_conf = self.obj_scale * loss_conf_obj + self.noobj_scale * loss_conf_noobj

        if len(cls):

            loss_cls = self.class_scale * ClassificationLoss(cls, tcls)

        else:

            loss_cls = Variable(torch.Tensor([0]).float().cuda())

        # # pdb.set_trace()

        # ids = [9,11,12,16]

        # new_cls, new_tcls = select_classes(cls, tcls, ids)

        # new_tcls = Variable(torch.from_numpy(new_tcls).long().cuda())

        # loss_cls_new = self.class_scale * nn.CrossEntropyLoss(size_average=False)(new_cls, new_tcls)

        # loss_cls_new *= 10

        # loss_cls += loss_cls_new

        loss = loss_x + loss_y + loss_w + loss_h + loss_conf + loss_cls

        t4 = time.time()

        if False:

            print('-----------------------------------')

            print('        activation : %f' % (t1 - t0))

            print(' create pred_boxes : %f' % (t2 - t1))

            print('     build targets : %f' % (t3 - t2))

            print('       create loss : %f' % (t4 - t3))

            print('             total : %f' % (t4 - t0))

        # print(

        #     '%d: nGT %d, precision %f, recall50 %f, recall75 %f, cls_acc %f, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, total %f' % \

        #     (self.seen, nGT, precision, recall50, recall75, cls_acc, loss_x.item(), loss_y.item(), \

        #      loss_w.item(), loss_h.item(), loss_conf.item(), loss_cls.item(), loss.item()))

        # print('%d: nGT %d, recall %d, proposals %d, loss: x %f, y %f, w %f, h %f, conf %f, cls %f, cls_new %f, total %f' % (self.seen, nGT, nCorrect, nProposals, loss_x.data[0], loss_y.data[0], loss_w.data[0], loss_h.data[0], loss_conf.data[0], loss_cls.data[0], loss_cls_new.data[0], loss.data[0]))

        return loss,loss_x.item() + loss_y.item() + loss_w.item() + loss_h.item(),loss_conf.item(),loss_cls.item(),cls_acc,recall50.item(),recall75.item(),nProposals

def select_classes(pred, tgt, ids):

    # convert tgt to numpy

    tgt = tgt.cpu().data.numpy()

    new_tgt = [(tgt == d) * i for i, d in enumerate(ids)]

    new_tgt = np.max(np.stack(new_tgt), axis=0)

    idxes = np.argwhere(new_tgt > 0).squeeze()

    new_pred = pred[idxes]

    new_pred = new_pred[:, ids]

    new_tgt = new_tgt[idxes]

    return new_pred, new_tgt

few-shot-learning for object detection的更多相关文章

deep learning on object detection
回归工作一周,忙的头晕,看了两三篇文章,主要在写各种文档和走各种办事流程了-- 这次来写写object detection最近看的三篇文章吧.都不是最近的文章,但是是今年的文章,我也想借此让自己赶快熟 ...
论文阅读笔记五十三：Libra R-CNN: Towards Balanced Learning for Object Detection(CVPR2019)
论文原址:https://arxiv.org/pdf/1904.02701.pdf github:https://github.com/OceanPang/Libra_R-CNN 摘要相比模型的结构 ...
Object detection with deep learning and OpenCV
目录 Single Shot Detectors for Object Detection Deep learning-based object detection with OpenCV 这篇文 ...
（转）Awesome Object Detection
Awesome Object Detection 2018-08-10 09:30:40 This blog is copied from: https://github.com/amusi/awes ...
[Arxiv1706] Few-Example Object Detection with Model Communication 论文笔记
p.p1 { margin: 0.0px 0.0px 0.0px 0.0px; font: 13.0px "Helvetica Neue"; color: #042eee } p. ...
object detection 总结
1.基础自己对于YOLOV1,2,3都比较熟悉. RCNN也比较熟悉.这个是自己目前掌握的基础2.第一步看一下2019年的井喷的anchor free的网络3.第二步看一下以往,引用多的网路4. ...
关于目标检测(Object Detection)的文献整理
本文对CV中目标检测子方向的研究,整理了如下的相关笔记(持续更新中): 1. Cascade R-CNN: Delving into High Quality Object Detection 年份: ...
论文学习-深度学习目标检测2014至201901综述-Deep Learning for Generic Object Detection A Survey
目录写在前面目标检测任务与挑战目标检测方法汇总基础子问题基于DCNN的特征表示主干网络(network backbone) Methods For Improving Object Rep ...
课程四(Convolutional Neural Networks)，第三周（Object detection） —— 0.Learning Goals
Learning Goals: Understand the challenges of Object Localization, Object Detection and Landmark Find ...
论文阅读之: Hierarchical Object Detection with Deep Reinforcement Learning
Hierarchical Object Detection with Deep Reinforcement Learning NIPS 2016 WorkShop Paper : https://a ...

随机推荐

（广州南沙）vue知识点整理2021，主要是防止忘记防备快速翻看
/////////////////////////// vue 中使用路由技巧:router //////////////////////////////// 写笔记说明,之前在江门工作 ...
一文学会JDBC实现java和mySQL的数据连接(尚硅谷学习课程代码+笔记+思路总结)
JDBC是指数据库连接技术,用于java连接mySQL等数据库.本文详细介绍了尚硅谷课程中JDBC的学习内容和补充知识. 概述 java语言只提供规范接口,存在于java.sql.javax.sql包 ...
Android Studio导出APP的数据库db文件
原文地址:Android Studio导出APP的数据库db文件 | Stars-One的杂货小窝最近项目开发需要使用到Android内置的Sqlite存数据,但是公司里没有对应的调试环境,只能让现 ...
IO 多路复用原理
IO 多路复用普通情况下,一个进程只能监视一个文件描述符(阻塞),如果使用非阻塞 IO,则会使 CPU 频繁陷入内核和空转,降低效率.而IO 多路复用是操作系统提供的接口,他会帮你同时监视多个 fd ...
面试官：SpringBoot如何优雅停机？
优雅停机(Graceful Shutdown) 是指在服务器需要关闭或重启时,能够先处理完当前正在进行的请求,然后再停止服务的操作. 优雅停机的实现步骤主要分为以下几步: 停止接收新的请求:首先,系统 ...
python面向对象（反射、内置方法、元类）
一反射 # 静态语言:在执行前就定义好数据类型 # var x int=8 # 动态语言:在执行的时候,才识别数据类型 # x = 8 # 什么是反射? # 指的是在程序运行过程中可以"动 ...
记录--Cesium+Vue实战教程——地图导航
这里给大家分享我在网上总结出来的一些知识,希望对大家有所帮助项目效果我们今天要实现的是一个路径规划的功能,有两个输入框.输入起点终点,然后查询,得到规划的路径,效果如下: 我们会用到以下库: Ax ...
记录--Uniapp + TypeScript 配置文档
这里给大家分享我在网上总结出来的一些知识,希望对大家有所帮助 0 目标使用 uniapp + TypeScript 为基础栈进行小程序开发 uniapp 是一个使用 Vue.js 开发所有前端应用 ...
RMI反序列化分析
RMI介绍 RMI全程Remote Method Invocation (远程方法引用),RMI有客户端和服务端,还有一个注册中心,在java中客户端可以通过RMI调用服务端的方法,流程图如下: 服务 ...

few-shot-learning for object detection

few-shot-learning for object detection的更多相关文章

随机推荐

热门专题