VOC数据集生成代码使用说明

#split.py 文件 输入格式为images ,和标签txt文件，txt中的数据为坐标值共8个。

import os

import numpy as np

import math

import cv2 as cv

import imageio

#path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/image'

#path = '/home/chendali1/Gsj/text-detection-ctpn-master/prepare_training_data/image/image_1000/'

path='/home/chendali1/Gsj/prepare_training_data/ICDAR/images_train/'

#gt_path = '/home/chendali1/Gsj/text-detection-ctpn-master/prepare_training_data/label/labelDigit1000/'

gt_path='/home/chendali1/Gsj/prepare_training_data/ICDAR/result_train/'

out_path = 're_image'

if not os.path.exists(out_path):

    os.makedirs(out_path)

files = os.listdir(path)

files.sort()

#files=files[:100]

for file in files:

    _, basename = os.path.split(file)

    if basename.lower().split('.')[-1] not in ['jpg', 'png']:

        continue

    stem, ext = os.path.splitext(basename)

    #stem=stem0.split('_')[2]

    gt_file = os.path.join(gt_path, stem+'.txt')

    img_path = os.path.join(path, file)

    print(img_path)

    #print(gt_file)

    img = cv.imread(img_path)

    if img is None:

        print('****************************')

        print('Image ' + img_path + ' may be a bad picture!')

        print('****************************')

        newname = os.path.join(path,stem+'.gif')

        os.rename(img_path,newname)

        img_path=newname

        print(img_path)

        print('Try read with imageio.')

        gif = imageio.mimread(img_path)

        if gif is None:

            print('****************************')

            print("Image " + img_path + " can't be read!")

            print('****************************')

        print('Read success!')

        img = cv.cvtColor(gif[0], cv.COLOR_RGB2BGR)

    img_size = img.shape

    im_size_min = np.min(img_size[0:2])

    im_size_max = np.max(img_size[0:2])

    im_scale = float(600) / float(im_size_min)

    if np.round(im_scale * im_size_max) > 1200:

        im_scale = float(1200) / float(im_size_max)

    re_im = cv.resize(img, None, None, fx=im_scale, fy=im_scale, interpolation=cv.INTER_LINEAR)

    re_size = re_im.shape

    cv.imwrite(os.path.join(out_path, stem) + '.jpg', re_im)

    with open(gt_file, 'r') as f:

        lines = f.readlines()

    for line in lines:

        splitted_line = line.strip().lower().split(',')

        pt_x = np.zeros((4, 1))

        pt_y = np.zeros((4, 1))

        pt_x[0, 0] = int(float(splitted_line[0]) / img_size[1] * re_size[1])

        pt_y[0, 0] = int(float(splitted_line[1]) / img_size[0] * re_size[0])

        pt_x[1, 0] = int(float(splitted_line[2]) / img_size[1] * re_size[1])

        pt_y[1, 0] = int(float(splitted_line[3]) / img_size[0] * re_size[0])

        pt_x[2, 0] = int(float(splitted_line[4]) / img_size[1] * re_size[1])

        pt_y[2, 0] = int(float(splitted_line[5]) / img_size[0] * re_size[0])

        pt_x[3, 0] = int(float(splitted_line[6]) / img_size[1] * re_size[1])

        pt_y[3, 0] = int(float(splitted_line[7]) / img_size[0] * re_size[0])

        ind_x = np.argsort(pt_x, axis=0)

        pt_x = pt_x[ind_x]

        pt_y = pt_y[ind_x]

        if pt_y[0] < pt_y[1]:

            pt1 = (pt_x[0], pt_y[0])

            pt3 = (pt_x[1], pt_y[1])

        else:

            pt1 = (pt_x[1], pt_y[1])

            pt3 = (pt_x[0], pt_y[0])

        if pt_y[2] < pt_y[3]:

            pt2 = (pt_x[2], pt_y[2])

            pt4 = (pt_x[3], pt_y[3])

        else:

            pt2 = (pt_x[3], pt_y[3])

            pt4 = (pt_x[2], pt_y[2])

        xmin = int(min(pt1[0], pt2[0]))

        ymin = int(min(pt1[1], pt2[1]))

        xmax = int(max(pt2[0], pt4[0]))

        ymax = int(max(pt3[1], pt4[1]))

        if xmin < 0:

            xmin = 0

        if xmax > re_size[1] - 1:

            xmax = re_size[1] - 1

        if ymin < 0:

            ymin = 0

        if ymax > re_size[0] - 1:

            ymax = re_size[0] - 1

        width = xmax - xmin

        height = ymax - ymin

        # reimplement

        step = 16.0

        x_left = []

        x_right = []

        x_left.append(xmin)

        x_left_start = int(math.ceil(xmin / 16.0) * 16.0)

        if x_left_start == xmin:

            x_left_start = xmin + 16

        for i in np.arange(x_left_start, xmax, 16):

            x_left.append(i)

        x_left = np.array(x_left)

        x_right.append(x_left_start - 1)

        for i in range(1, len(x_left) - 1):

            x_right.append(x_left[i] + 15)

        x_right.append(xmax)

        x_right = np.array(x_right)

        idx = np.where(x_left == x_right)

        x_left = np.delete(x_left, idx, axis=0)

        x_right = np.delete(x_right, idx, axis=0)

        if not os.path.exists('label_tmp'):

            os.makedirs('label_tmp')

        with open(os.path.join('label_tmp', stem) + '.txt', 'a') as f:

            #for i in range(len(x_left)):

            f.writelines("tianchi\t")

            f.writelines(str(int( pt_x[0, 0])))

            f.writelines("\t")

            f.writelines(str(int( pt_y[0, 0])))

            f.writelines("\t")

            f.writelines(str(int( pt_x[1, 0])))

            f.writelines("\t")

            f.writelines(str(int( pt_y[1, 0])))

            f.writelines("\t")

            f.writelines(str(int( pt_x[2, 0])))

            f.writelines("\t")

            f.writelines(str(int( pt_y[2, 0])))

            f.writelines("\t")

            f.writelines(str(int( pt_x[3, 0])))

            f.writelines("\t")

            f.writelines(str(int( pt_y[3, 0])))

            f.writelines("\n")

#ToVoc.py 上述执行完后直接运行这个脚本文件完美生成VOC文件

from xml.dom.minidom import Document

import cv2

import os

import glob

import shutil

import numpy as np

def generate_xml(name, lines, img_size, class_sets, doncateothers=True):

    doc = Document()

    def append_xml_node_attr(child, parent=None, text=None):

        ele = doc.createElement(child)

        if not text is None:

            text_node = doc.createTextNode(text)

            ele.appendChild(text_node)

        parent = doc if parent is None else parent

        parent.appendChild(ele)

        return ele

    img_name = name + '.jpg'

    # create header

    annotation = append_xml_node_attr('annotation')

    append_xml_node_attr('folder', parent=annotation, text='tianchi')

    append_xml_node_attr('filename', parent=annotation, text=img_name)

    source = append_xml_node_attr('source', parent=annotation)

    append_xml_node_attr('database', parent=source, text='coco_text_database')

    append_xml_node_attr('annotation', parent=source, text='tianchi')

    append_xml_node_attr('image', parent=source, text='tianchi')

    append_xml_node_attr('flickrid', parent=source, text='')

    owner = append_xml_node_attr('owner', parent=annotation)

    append_xml_node_attr('name', parent=owner, text='ms')

    size = append_xml_node_attr('size', annotation)

    append_xml_node_attr('width', size, str(img_size[1]))

    append_xml_node_attr('height', size, str(img_size[0]))

    append_xml_node_attr('depth', size, str(img_size[2]))

    append_xml_node_attr('segmented', parent=annotation, text='')

    # create objects

    objs = []

    for line in lines:

        splitted_line = line.strip().lower().split()

        cls = splitted_line[0].lower()

        if not doncateothers and cls not in class_sets:

            continue

        cls = 'dontcare' if cls not in class_sets else cls

        if cls == 'dontcare':

            continue

        obj = append_xml_node_attr('object', parent=annotation)

        occlusion = int(0)

        x1, y1, x2, y2 = int(float(splitted_line[1]) + 1), int(float(splitted_line[2]) + 1), \

                         int(float(splitted_line[3]) + 1), int(float(splitted_line[4]) + 1)

        x0,y0,x1,y1,x2,y2,x3,y3 = int(float(splitted_line[1])+1),int(float(splitted_line[2])+1),\

        int(float(splitted_line[3])+1),int(float(splitted_line[4])+1),int(float(splitted_line[5])+1),\

        int(float(splitted_line[6])+1),int(float(splitted_line[7])+1),int(float(splitted_line[8])+1)

        truncation = float(0)

        difficult = 1 if _is_hard(cls, truncation, occlusion, x1, y1, x2, y2) else 0

        truncted = 0 if truncation < 0.5 else 1

        append_xml_node_attr('name', parent=obj, text=cls)

        append_xml_node_attr('pose', parent=obj, text='none')

        append_xml_node_attr('truncated', parent=obj, text=str(truncted))

        append_xml_node_attr('difficult', parent=obj, text=str(int(difficult)))

        bb = append_xml_node_attr('bndbox', parent=obj)

        append_xml_node_attr('x0', parent=bb, text=str(int(x0)))

        append_xml_node_attr('y0', parent=bb, text=str(y0))

        append_xml_node_attr('x1', parent=bb, text=str(x1))

        append_xml_node_attr('y1', parent=bb, text=str(y1))

        append_xml_node_attr('x1', parent=bb, text=str(x2))

        append_xml_node_attr('y1', parent=bb, text=str(y2))

        append_xml_node_attr('x1', parent=bb, text=str(x3))

        append_xml_node_attr('y1', parent=bb, text=str(y3))

        o = {'class': cls, 'box': np.asarray([x0, y0,x1,y1, x2, y2,x3,y3], dtype=float), \

             'truncation': truncation, 'difficult': difficult, 'occlusion': occlusion}

        objs.append(o)

    return doc, objs

def _is_hard(cls, truncation, occlusion, x1, y1, x2, y2):

    hard = False

    if y2 - y1 < 25 and occlusion >= 2:

        hard = True

        return hard

    if occlusion >= 3:

        hard = True

        return hard

    if truncation > 0.8:

        hard = True

        return hard

    return hard

def build_voc_dirs(outdir):

    mkdir = lambda dir: os.makedirs(dir) if not os.path.exists(dir) else None

    mkdir(outdir)

    mkdir(os.path.join(outdir, 'Annotations'))

    mkdir(os.path.join(outdir, 'ImageSets'))

    mkdir(os.path.join(outdir, 'ImageSets', 'Layout'))

    mkdir(os.path.join(outdir, 'ImageSets', 'Main'))

    mkdir(os.path.join(outdir, 'ImageSets', 'Segmentation'))

    mkdir(os.path.join(outdir, 'JPEGImages'))

    mkdir(os.path.join(outdir, 'SegmentationClass'))

    mkdir(os.path.join(outdir, 'SegmentationObject'))

    return os.path.join(outdir, 'Annotations'), os.path.join(outdir, 'JPEGImages'), os.path.join(outdir, 'ImageSets',

                                                                                                 'Main')

if __name__ == '__main__':

    _outdir = 'TEXTVOC/VOC2007'

    _draw = bool(0)

    _dest_label_dir, _dest_img_dir, _dest_set_dir = build_voc_dirs(_outdir)

    _doncateothers = bool(1)

    for dset in ['train']:

        _labeldir = 'label_tmp'

        _imagedir = 're_image'

        class_sets = ('tianchi', 'dontcare')

        class_sets_dict = dict((k, i) for i, k in enumerate(class_sets))

        allclasses = {}

        fs = [open(os.path.join(_dest_set_dir, cls + '_' + dset + '.txt'), 'w') for cls in class_sets]

        ftrain = open(os.path.join(_dest_set_dir, dset + '.txt'), 'w')

        files = glob.glob(os.path.join(_labeldir, '*.txt'))

        files.sort()

        for file in files:

            path, basename = os.path.split(file)

            stem, ext = os.path.splitext(basename)

            with open(file, 'r') as f:

                lines = f.readlines()

            img_file = os.path.join(_imagedir, stem + '.jpg')

            print(img_file)

            img = cv2.imread(img_file)

            img_size = img.shape

            doc, objs = generate_xml(stem, lines, img_size, class_sets=class_sets, doncateothers=_doncateothers)

            cv2.imwrite(os.path.join(_dest_img_dir, stem + '.jpg'), img)

            xmlfile = os.path.join(_dest_label_dir, stem + '.xml')

            with open(xmlfile, 'w') as f:

                f.write(doc.toprettyxml(indent='    '))

            ftrain.writelines(stem + '\n')

            cls_in_image = set([o['class'] for o in objs])

            for obj in objs:

                cls = obj['class']

                allclasses[cls] = 0 \

                    if not cls in list(allclasses.keys()) else allclasses[cls] + 1

            for cls in cls_in_image:

                if cls in class_sets:

                    fs[class_sets_dict[cls]].writelines(stem + ' 1\n')

            for cls in class_sets:

                if cls not in cls_in_image:

                    fs[class_sets_dict[cls]].writelines(stem + ' -1\n')

        (f.close() for f in fs)

        ftrain.close()

        print('~~~~~~~~~~~~~~~~~~~')

        print(allclasses)

        print('~~~~~~~~~~~~~~~~~~~')

        shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'val.txt'))

        shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), os.path.join(_dest_set_dir, 'trainval.txt'))

        for cls in class_sets:

            shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'),

                            os.path.join(_dest_set_dir, cls + '_trainval.txt'))

            shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'),

                            os.path.join(_dest_set_dir, cls + '_val.txt'))

VOC数据集生成代码使用说明的更多相关文章

【Detection】物体识别-制作PASCAL VOC数据集
PASCAL VOC数据集 PASCAL VOC为图像识别和分类提供了一整套标准化的优秀的数据集,从2005年到2012年每年都会举行一场图像识别challenge 默认为20类物体 1 数据集结构 ...
搭建 MobileNet-SSD 开发环境并使用 VOC 数据集训练 TensorFlow 模型
原文地址:搭建 MobileNet-SSD 开发环境并使用 VOC 数据集训练 TensorFlow 模型 0x00 环境 OS: Ubuntu 1810 x64 Anaconda: 4.6.12 P ...
在Ubuntu内制作自己的VOC数据集
一.VOC数据集的简介 PASCAL VOC为图像的识别和分类提供了一整套标准化的优秀数据集,基本上就是目标检测数据集的模板.现在有VOC2007,VOC2012.主要有20个类.而现在主要的模型评估 ...
【工具引入】uiautomatorviewer 查找元素后自动生成代码
缘起公司部门调整PC部门和无线部门合并,原本负责主站PC端自动化的同事需要马上上手安卓,IOS自动化.对于初次接触移动端的测试者来说,跨度还是有点大的.加之人员有些变动,不得不搞个工具降低学习成本, ...
PASCAL VOC数据集分析（转）
PASCAL VOC数据集分析 PASCAL VOC为图像识别和分类提供了一整套标准化的优秀的数据集,从2005年到2012年每年都会举行一场图像识别challenge. 本文主要分析PASCAL V ...
自动化工具制作PASCAL VOC 数据集
自动化工具制作PASCAL VOC 数据集 1. VOC的格式 VOC主要有三个重要的文件夹:Annotations.ImageSets和JPEGImages JPEGImages 文件夹该文件 ...
Eclipse 使用mybatis generator插件自动生成代码
Eclipse 使用mybatis generator插件自动生成代码标签: mybatis 2016-12-07 15:10 5247人阅读评论(0) 收藏举报 .embody{ paddin ...
目标检测：keras-yolo3之制作VOC数据集训练指南
制作VOC数据集指南 Github:https://github.com/hyhouyong/keras-yolo3 LabelImg标注工具(windows环境下):https://github.c ...
VOC数据集目标检测
最近在做与目标检测模型相关的工作,很多都要求VOC格式的数据集. PASCAL VOC挑战赛 (The PASCAL Visual Object Classes )是一个世界级的计算机视觉挑战赛, P ...

随机推荐

修复服务器上出现ImportError: cannot import name main的问题
在服务器上成功升级pip2之后再运行pip2命令出现如下报错信息 Traceback (most recent call last): File "/usr/bin/pip2.7" ...
对象及变量的并发访问(同步方法、同步代码块、对class进行加锁、线程死锁)&内部类的基本用法
主要学习多线程的并发访问,也就是使得线程安全. 同步的单词为synchronized,异步的单词为asynchronized 同步主要就是通过锁的方式实现,一种就是隐式锁,另一种是显示锁Lock,本节 ...
pytroch 0.3 到 0.4版本迁移资料mark
搜了一堆,还是官方资料给力,一份中文,一份英文,maek一下 https://www.pytorchtutorial.com/pytorch-0-4-0-migration-guide/ https: ...
python3-面向对象进阶(内置方法)
面向对象进阶: isinstance和issubclass 反射 __setattr__,__getattr,__delattr__ __setitem__,__getitem,__delitem__ ...
Keepalived详解（一）：Keepalived介绍【转】
一.Keepalived介绍: Keepalived是Linux下一个轻量级的高可用解决方案,它与HeartBeat.RoseHA实现的功能类似,都可以实现服务或者网络的高可用,但是又 ...
Spring MVC的核心控制器DispatcherServlet的作用
关于Spring MVC的核心控制器DispatcherServlet的作用,以下说法错误的是( )? 它负责接收HTTP请求加载配置文件实现业务操作初始化上下应用对象ApplicationC ...
linux mysql 定时备份使用crontab
第一步:在服务器上配置备份目录代码: mkdir /var/lib/mysqlbackup cd /var/lib/mysqlbackup 第二步:编写备份脚本代码: vi dbbackup.sh ...
[MySQL]join的细节
left join,左表返回所有记录,右表只返回跟左表有关联的记录,当右表有N条记录跟左表的某一条记录A关联,那么查询结果会出现N条A记录(相应关联右表的N条记录) right join,右表返回所有 ...
微信小程序—获取用户网络状态和设备的信息
这个是一个简易教程,按照他的步骤下载好了,打开界面看到的效果是如下的:
android7.0以上使用融云即使通讯的坑
一.连接服务器不走connect()方法在android6.0以下,在使用融云sdk时,直接将依赖库引入到项目中即可.但是在7.0及以上时,直接应用会发现消息一直发送不出去,错误提示为dlopen ...

VOC数据集生成代码使用说明

VOC数据集生成代码使用说明的更多相关文章

随机推荐

热门专题