背景

一般在TX2上部署深度学习模型时，都是读取摄像头视频或传入视频文件进行推理，从视频中抽取帧进行目标检测等任务。对于大点的模型，推理的速度是赶不上摄像头或视频的帧率的，如果我们使用单线程进行处理，即读取一帧检测一帧，推理会堵塞视频的正常传输，表现出来就是摄像头视频有很大的延迟，如果是对实时性要求较高，这种延迟是难以接受的。因此，采用多线程的方法，将视频读取与深度学习推理放在两个线程里，互不影响，达到实时的效果。

实现方法

将摄像头的视频读取放入子线程，充当一个生产者的角色，将推理放入主线程，充当消费者的角色，主线程推理完一帧后从子线程提数据，继续推理，下图是原博文的一幅图片，描述了两个线程的关系：

程序实现

子线程

"""camera.py

This code implements the Camera class, which encapsulates code to

handle IP CAM, USB webcam or the Jetson onboard camera.  The Camera

class is further extend to take either a video or an image file as

input.

"""

import time

import logging

import threading

import numpy as np

import cv2

def open_cam_rtsp(uri, width, height, latency):

    """Open an RTSP URI (IP CAM)."""

    gst_str = ('rtspsrc location={} latency={} ! '

               'rtph264depay ! h264parse ! omxh264dec ! '

               'nvvidconv ! '

               'video/x-raw, width=(int){}, height=(int){}, '

               'format=(string)BGRx ! videoconvert ! '

               'appsink').format(uri, latency, width, height)

    return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)

def open_cam_usb(dev, width, height):

    """Open a USB webcam.

    We want to set width and height here, otherwise we could just do:

        return cv2.VideoCapture(dev)

    """

    gst_str = ('v4l2src device=/dev/video{} ! '

               'video/x-raw, width=(int){}, height=(int){}, '

               'format=(string)RGB ! videoconvert ! '

               'appsink').format(dev, width, height)

    return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)

def open_cam_onboard(width, height):

    """Open the Jetson onboard camera.

    On versions of L4T prior to 28.1, you might need to add

    'flip-method=2' into gst_str.

    """

    gst_str = ('nvcamerasrc ! '

               'video/x-raw(memory:NVMM), '

               'width=(int)2592, height=(int)1458, '

               'format=(string)I420, framerate=(fraction)30/1 ! '

               'nvvidconv ! '

               'video/x-raw, width=(int){}, height=(int){}, '

               'format=(string)BGRx ! videoconvert ! '

               'appsink').format(width, height)

    return cv2.VideoCapture(gst_str, cv2.CAP_GSTREAMER)

def grab_img(cam):

    """This 'grab_img' function is designed to be run in the sub-thread.

    Once started, this thread continues to grab a new image and put it

    into the global 'img_handle', until 'thread_running' is set to False.

    """

    while cam.thread_running:

        if cam.args.use_image:

            assert cam.img_handle is not None, 'img_handle is empty in use_image case!'

            # keep using the same img, no need to update it

            time.sleep(0.01)  # yield CPU to other threads

        else:

            _, cam.img_handle = cam.cap.read()

            fps = cam.cap.get(cv2.CAP_PROP_FPS)

            time.sleep(1/fps)  # fps = 20hz

            print('time sleep ', 1/fps)

            if cam.img_handle is None:

                logging.warning('grab_img(): cap.read() returns None...')

                break

    cam.thread_running = False

class Camera():

    """Camera class which supports reading images from theses video sources:

    1. Video file

    2. Image (jpg, png, etc.) file, repeating indefinitely

    3. RTSP (IP CAM)

    4. USB webcam

    5. Jetson onboard camera

    """

    def __init__(self, args):

        self.args = args

        self.is_opened = False

        self.thread_running = False

        self.img_handle = None

        self.img_width = 0

        self.img_height = 0

        self.cap = None

        self.thread = None

    def open(self):

        """Open camera based on command line arguments."""

        assert self.cap is None, 'Camera is already opened!'

        args = self.args

        if args.use_file:

            self.cap = cv2.VideoCapture(args.filename)

            # ignore image width/height settings here

        elif args.use_image:

            self.cap = 'OK'

            self.img_handle = cv2.imread(args.filename)

            # ignore image width/height settings here

            if self.img_handle is not None:

                self.is_opened = True

                self.img_height, self.img_width, _ = self.img_handle.shape

        elif args.use_rtsp:

            self.cap = open_cam_rtsp(

                args.rtsp_uri,

                args.image_width,

                args.image_height,

                args.rtsp_latency

            )

        elif args.use_usb:

            self.cap = open_cam_usb(

                args.video_dev,

                args.image_width,

                args.image_height

            )

        else:  # by default, use the jetson onboard camera

            self.cap = open_cam_onboard(

                args.image_width,

                args.image_height

            )

        if self.cap != 'OK':

            if self.cap.isOpened():

                # Try to grab the 1st image and determine width and height

                _, img = self.cap.read()

                if img is not None:

                    self.img_height, self.img_width, _ = img.shape

                    self.is_opened = True

    def start(self):

        assert not self.thread_running

        self.thread_running = True

        self.thread = threading.Thread(target=grab_img, args=(self,))

        self.thread.start()

    def stop(self):

        self.thread_running = False

        self.thread.join()

    def read(self):

        if self.args.use_image:

            return np.copy(self.img_handle)

        else:

            return self.img_handle

    def release(self):

        assert not self.thread_running

        if self.cap != 'OK':

            self.cap.release()

主线程

主线程程序以tensorflow object-detection部分为主，重点看里边读摄像头或视频的方法，运行时要传入读取的摄像头或视频参数：

# coding: utf-8

import numpy as np

import os

import six.moves.urllib as urllib

import sys

import tarfile

import tensorflow as tf

import zipfile

from collections import defaultdict

from io import StringIO

from matplotlib import pyplot as plt

from PIL import Image

import cv2

import time

from PIL import Image

import tensorflow.contrib.tensorrt as trt

from camera import Camera

import argparse

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

# This is needed since the notebook is stored in the object_detection folder.

sys.path.append("..")

from object_detection.utils import ops as utils_ops

if tf.__version__ < '1.4.0':

  raise ImportError('Please upgrade your tensorflow installation to v1.4.* or later!')

from utils import label_map_util

from utils import visualization_utils as vis_util

# Path to frozen detection graph. This is the actual model that is used for the object detection.

PATH_TO_FROZEN_GRAPH = 'data/ssd_mobilenet_coco_0129/frozen_inference_graph.pb'

# List of the strings that is used to add correct label for each box.

PATH_TO_LABELS = os.path.join('data/object-detection.pbtxt')

NUM_CLASSES = 12

VIDEO_NAME = 'data/2018-09-10_162811'

filename = VIDEO_NAME + '.mp4'

def parse_args():

    """Parse input arguments."""

    desc = ('This script captures and displays live camera video, '

            'and does real-time object detection with TF-TRT model '

            'on Jetson TX2/TX1')

    parser = argparse.ArgumentParser(description=desc)

    parser.add_argument('--file', dest='use_file',

                        help='use a video file as input (remember to '

                        'also set --filename)',

                        action='store_true')

    parser.add_argument('--image', dest='use_image',

                        help='use an image file as input (remember to '

                        'also set --filename)',

                        action='store_true')

    parser.add_argument('--filename', dest='filename',

                        help='video file name, e.g. test.mp4',

                        default='data/2018-09-10_162811.mp4', type=str)

    parser.add_argument('--rtsp', dest='use_rtsp',

                        help='use IP CAM (remember to also set --uri)',

                        action='store_true')

    parser.add_argument('--uri', dest='rtsp_uri',

                        help='RTSP URI, e.g. rtsp://admin:jiaxun123@192.168.170.119/H.264/ch1/main',

                        default=None, type=str)

    parser.add_argument('--latency', dest='rtsp_latency',

                        help='latency in ms for RTSP [200]',

                        default=200, type=int)

    parser.add_argument('--usb', dest='use_usb',

                        help='use USB webcam (remember to also set --vid)',

                        action='store_true')

    parser.add_argument('--vid', dest='video_dev',

                        help='device # of USB webcam (/dev/video?) [1]',

                        default=1, type=int)

    parser.add_argument('--width', dest='image_width',

                        help='image width [1280]',

                        default=1280, type=int)

    parser.add_argument('--height', dest='image_height',

                        help='image height [720]',

                        default=720, type=int)

    parser.add_argument('--confidence', dest='conf_th',

                        help='confidence threshold [0.3]',

                        default=0.3, type=float)

    args = parser.parse_args()

    return args

def detect_in_video():

    args = parse_args()

    detection_graph = tf.Graph()

    with detection_graph.as_default():

        od_graph_def = tf.GraphDef()

        with tf.gfile.GFile(PATH_TO_FROZEN_GRAPH, 'rb') as fid:

            serialized_graph = fid.read()

            od_graph_def.ParseFromString(serialized_graph)

            tf.import_graph_def(od_graph_def, name='')

    label_map = label_map_util.load_labelmap(PATH_TO_LABELS)

    categories = label_map_util.convert_label_map_to_categories(

        label_map, max_num_classes=NUM_CLASSES, use_display_name=True)

    category_index = label_map_util.create_category_index(categories)

    config = tf.ConfigProto()

    config.gpu_options.allow_growth = True

    with detection_graph.as_default():

        with tf.Session(graph=detection_graph,config=config) as sess:

            image_tensor = detection_graph.get_tensor_by_name('image_tensor:0')

            detection_boxes = detection_graph.get_tensor_by_name(

                'detection_boxes:0')

            detection_scores = detection_graph.get_tensor_by_name(

                'detection_scores:0')

            detection_classes = detection_graph.get_tensor_by_name(

                'detection_classes:0')

            num_detections = detection_graph.get_tensor_by_name(

                'num_detections:0')

            cam = Camera(args)

            cam.open()

            cam.start()

            while cam.thread_running:

                frame = cam.read()

                color_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

                image_np_expanded = np.expand_dims(color_frame, axis=0)

                (boxes, scores, classes, num) = sess.run(

                    [detection_boxes, detection_scores,

                        detection_classes, num_detections],

                    feed_dict={image_tensor: image_np_expanded})

def main():

  detect_in_video()

if __name__ =='__main__':

  main()

Ref

子线程实现

 原博客

TX2之多线程读取视频及深度学习推理的更多相关文章

在TX2上多线程读取视频帧进行caffe推理
参考文章:Multi-threaded Camera Caffe Inferencing TX2之多线程读取视频及深度学习推理背景一般在TX2上部署深度学习模型时,都是读取摄像头视频或者传入视频文 ...
NVIDIA TensorRT高性能深度学习推理
NVIDIA TensorRT高性能深度学习推理 NVIDIA TensorRT 是用于高性能深度学习推理的 SDK.此 SDK 包含深度学习推理优化器和运行时环境,可为深度学习推理应用提供低延迟和高 ...
Papers | 图像/视频增强 + 深度学习
目录 I. ARCNN 1. Motivation 2. Contribution 3. Artifacts Reduction Convolutional Neural Networks (ARCN ...
【ARM-Linux开发】【CUDA开发】【深度学习与神经网络】Jetson Tx2安装相关之三
JetPack(Jetson SDK)是一个按需的一体化软件包,捆绑了NVIDIA®Jetson嵌入式平台的开发人员软件.JetPack 3.0包括对Jetson TX2 , Jetson TX1和J ...
机器学习(Machine Learning)&深度学习(Deep Learning)资料【转】
转自:机器学习(Machine Learning)&深度学习(Deep Learning)资料 <Brief History of Machine Learning> 介绍:这是一 ...
机器学习(Machine Learning)与深度学习(Deep Learning)资料汇总
<Brief History of Machine Learning> 介绍:这是一篇介绍机器学习历史的文章,介绍很全面,从感知机.神经网络.决策树.SVM.Adaboost到随机森林.D ...
TensorRT深度学习训练和部署图示
TensorRT深度学习训练和部署 NVIDIA TensorRT是用于生产环境的高性能深度学习推理库.功率效率和响应速度是部署的深度学习应用程序的两个关键指标,因为它们直接影响用户体验和所提供服务的 ...
MLPerf结果证实至强® 可有效助力深度学习训练
MLPerf结果证实至强可有效助力深度学习训练核心与视觉计算事业部副总裁Wei Li通过博客回顾了英特尔这几年为提升深度学习性能所做的努力. 目前根据英特尔至强可扩展处理器的MLPerf结果显 ...
实战深度学习OpenCV(二)：读取并播放本地或者摄像头的视频
一.读取并播放的代码如下: #include "pch.h" #include <iostream> #include <opencv2/core/core.hp ...

随机推荐

vnpy自动化交易
c++ python 写的自动化交易平台期货ctp
elmah oracle
. <sectionGroup name="elmah"> <section name="security" requirePermissio ...
mybatis ：xml文件中传入参数和if标签结合使用时要点
org.mybatis.spring.MyBatisSystemException: nested exception is org.apache.ibatis.reflection.Reflecti ...
Linux上获取CPU Core个数的实现
Linux上获取CPU Core个数的实现可以通过多种手段取得CPU Core的个数,如: 1) 调用系统提供的函数get_nprocs(),可以在头文件sys/sysinfo.h中发现它 2) 借 ...
Quartus中代码字体大小的调整方法
Quartus中代码大小的调整方法网友 "一纸玫瑰"整理第一步:点击Tools(工具) 第二步:点击Options(选项) 第三步:Text Editor(文本编辑)/Font ...
Java中的http（网络处理）相关的库：HttpClient，HttpCore（转载）
[背景] 最近和之前,折腾了这个: [教程]模拟登陆百度之Java代码版然后,对于Java的HttpClient,有了点了解. 现在整理如下: Java本身没有Http相关的库 Java本身,没有内 ...
.net 任务（Task）
1. Task (任务): 很容易调用 ThreadPool.QueueUserWorkItem 实现异步操作,但是这个技术有许多 .net 引入Task类型来使用任务. 如下几种方式都是实现异步的方 ...
duilib入门简明教程 -- 完整的自绘标题栏(8)
看了前面那么多教程,相信对duilib已有基本映像了,我们就快马加鞭,做出一个完整的自绘标题栏吧~ 看到下面这个效果图,小伙伴们是不是有点惊呆了呢~O(∩_∩)O~ duil ...
solr的schema.xml配置属性解释
schema.xml做什么? SOLR加载数据,创建索引和数据时,核心数据结构的配置文件是schema.xml,该配置文件主要用于配置数据源,字段类型定义,搜索类型定义等.schema.xml的配置直 ...
一个简单的C#+arcgis的非数据库版例子
1.首先新建一个winform的项目. 2.确保C#工具箱包含ESRI的相关控件,如果没有就需要安装SDK. 如果VS中依旧不存在esri控件解决方案如下,以VS2013为例: (1)打开注册表,定位 ...

TX2之多线程读取视频及深度学习推理

背景

实现方法

程序实现

子线程

主线程

Ref

TX2之多线程读取视频及深度学习推理的更多相关文章

随机推荐

热门专题