使用yolo算法对视频进行实时目标跟踪和分割

视频演示

大家好，我是Coding茶水间。今天我来分享一个基于YOLO算法的工具，它可以对视频或图片进行实时目标跟踪和分割。

这个工具使用PyQt5构建界面，核心算法依赖Ultralytics的YOLOv11模型。整个项目简单易上手，适合初学者学习计算机视觉和GUI开发。

所有源码我都会在文章末尾提供。如果你觉得有用，欢迎点赞、收藏和关注我的博客。

有什么问题，可以在评论区留言讨论。

工具概述

这个工具的主要功能是：

支持选择本地图片或视频文件。
使用YOLOv11模型进行目标检测、跟踪和实例分割。
实时显示处理结果，包括边界框、类别标签、置信度和分割掩码。
支持保存处理后的图片或视频。

界面使用PyQt5设计，非常简洁，只有四个按钮：选择图片、选择视频、处理和保存结果。运行后，你会看到一个白底窗口，顶部是标题，中间是按钮区，下方是结果展示区域（固定大小800x500），底部有作者信息。

演示效果

视频处理演示

点击“选择视频”按钮，浏览本地文件，选择一段视频（例如公路车辆行驶的视频，支持.mp4、.avi、.mov格式）。
点击“处理”按钮，工具会使用YOLO模型逐帧处理视频。
- 结果：在展示区域实时播放处理后的视频。
- 每辆车会被边界框包围，不同颜色表示不同的跟踪ID。
- 框上方显示类别（如“car”）和置信度（如“0.85”）。
- 车辆被精准分割，背景与目标分离。

3.处理完成后，点击“保存结果”，弹出保存对话框，输入文件名保存为视频文件。

保存后的视频会保留所有分割和跟踪信息，便于后续查看。

图片处理演示

点击“选择图片”按钮，选择本地图片（支持.png、.jpg、.jpeg格式，例如一张小狗的照片）。
点击“处理”按钮，立即显示处理结果。
- 结果：图片中的目标（如小狗）被精准分割，边界清晰。
- 支持复杂场景，例如边缘人物的边界分割效果良好。

3.点击“保存结果”，保存处理后的图片。

我测试了多张图片，包括人物和动物，YOLOv11n模型（最小模型）在边缘处理上表现不错。如果你用更大模型如YOLOv11m，精度更高，但可能稍卡（取决于电脑配置）。

代码核心原理

工具的核心在于Ultralytics库的solutions模块，它封装了YOLO的实例分割功能。简单来说：

初始化分割对象：

self.isegment = solutions.InstanceSegmentation(

    show=False,  # 不直接显示输出，我们用PyQt5展示

    model="yolo11n-seg.pt",  # 使用YOLOv11n分割模型，可换成其他如yolo11m-seg.pt

)

这里可以指定classes参数，只分割特定类别（如人或车）。

视频处理循环：
- 使用OpenCV的VideoCapture读取视频帧。
- 通过QTimer定时器（每30ms一帧）更新帧。
- 处理时，将帧传入isegment对象：
```
results = self.isegment(frame)

frame = results.plot_im  # 获取绘制后的图像（包括框、标签和分割）
```
- 存储处理帧到数组，便于后续保存。
图片处理：
- 直接用cv2.imread读取图片，传入isegment，获取结果并转换为QPixmap显示。
保存逻辑：
- 视频：用cv2.VideoWriter写入处理帧数组。
- 图片：用cv2.imwrite保存结果图像。

如果你想优化：

换成更大模型（如yolo11m）提升精度，但需好显卡。

在demo中，可以用while循环简单测试：

import cv2

from ultralytics import solutions

cap = cv2.VideoCapture("your_video.mp4")

isegment = solutions.InstanceSegmentation(model="yolo11n-seg.pt", show=True)

while cap.isOpened():

    ret, frame = cap.read()

    if not ret:

        break

    results = isegment(frame)  # 直接处理并显示

cap.release()

这是一个最小demo，运行后会弹出OpenCV窗口显示效果。

完整代码

下面是完整代码，直接复制运行（需安装PyQt5、OpenCV和Ultralytics）。环境：Python 3.x。

import sys

import cv2

from PyQt5.QtWidgets import QApplication, QMainWindow, QVBoxLayout, QHBoxLayout, QPushButton, QLabel, QFileDialog, QWidget,QMessageBox

from PyQt5.QtGui import QPixmap, QImage

from PyQt5.QtCore import Qt, QTimer

from ultralytics import solutions

class MainWindow(QMainWindow):

    def __init__(self):

        super().__init__()

        self.setWindowTitle("使用yolo算法对视频进行实时目标跟踪和分割")

        self.setGeometry(100, 100, 800, 600)

        self.setStyleSheet("background-color: white;")

        # 主布局

        self.main_widget = QWidget()

        self.setCentralWidget(self.main_widget)

        self.main_layout = QVBoxLayout()

        self.main_widget.setLayout(self.main_layout)

        # 标题

        self.title_label = QLabel("使用yolo算法对视频进行实时目标跟踪和分割")

        self.title_label.setStyleSheet("font-size: 20px; font-weight: bold; color: #333; margin-bottom: 10px; margin-top: 10px;")

        self.title_label.setAlignment(Qt.AlignCenter)

        self.main_layout.addWidget(self.title_label)

        # 按钮区域

        self.button_layout = QHBoxLayout()

        self.main_layout.addLayout(self.button_layout)

        self.select_image_btn = QPushButton("选择图片")

        self.select_image_btn.setStyleSheet("QPushButton { background-color: #f0f0f0; border: 1px solid #ccc; padding: 10px; font-size: 14px; }")

        self.button_layout.addWidget(self.select_image_btn)

        self.select_video_btn = QPushButton("选择视频")

        self.select_video_btn.setStyleSheet("QPushButton { background-color: #f0f0f0; border: 1px solid #ccc; padding: 10px; font-size: 14px; }")

        self.button_layout.addWidget(self.select_video_btn)

        self.process_btn = QPushButton("处理")

        self.process_btn.setStyleSheet("QPushButton { background-color: #4CAF50; color: white; border: none; padding: 10px; font-size: 14px; }")

        self.button_layout.addWidget(self.process_btn)

        self.save_btn = QPushButton("保存结果")

        self.save_btn.setStyleSheet("QPushButton { background-color: #2196F3; color: white; border: none; padding: 10px; font-size: 14px; }")

        self.button_layout.addWidget(self.save_btn)

        # 展示区域（固定尺寸）

        self.original_display = QLabel()

        self.original_display.setAlignment(Qt.AlignCenter)

        self.original_display.setStyleSheet("border: 1px solid #ccc; background-color: #f9f9f9;")

        self.original_display.setFixedSize(800, 500)  # 固定展示区域尺寸

        self.main_layout.addWidget(self.original_display) 

        # 作者信息（固定高度）

        self.author_label = QLabel("作者：Coding茶水间")

        self.author_label.setStyleSheet("font-size: 12px; color: #777; margin-top: 10px;")

        self.author_label.setAlignment(Qt.AlignCenter)

        self.main_layout.addWidget(self.author_label)

        # 连接信号

        self.select_image_btn.clicked.connect(self.select_image)

        self.select_video_btn.clicked.connect(self.select_video)

        self.process_btn.clicked.connect(self.process)

        self.save_btn.clicked.connect(self.save_result)

        # 创建定时器，控制帧率

        self.video_timer = QTimer(self)

        self.video_timer.timeout.connect(self.update_frame)

        self.isprocess = False

        self.isPrcVideo = True

        self.isegment = solutions.InstanceSegmentation(

            show=False,  # display the output

            model="yolo11n-seg.pt",  # model="yolo11n-seg.pt" for object segmentation using YOLO11.

            # classes=[0, 2],  # segment specific classes i.e, person and car with pretrained model.

        )

        # 初始化存储处理后的帧数组

        self.processed_frames = []

    def select_image(self):

        file_path, _ = QFileDialog.getOpenFileName(self, "选择图片", "", "Image Files (*.png *.jpg *.jpeg)")

        if file_path:

            pixmap = QPixmap(file_path)

            self.im_path = file_path

            self.original_display.setPixmap(pixmap.scaled(self.original_display.size(), Qt.KeepAspectRatio))

            self.isPrcVideo = False

    def select_video(self):

        file_path, _ = QFileDialog.getOpenFileName(self, "选择视频", "", "Video Files (*.mp4 *.avi *.mov)")

        if file_path:

            # 清除原有显示内容

            if hasattr(self, 'video_capture'):

                self.video_capture.release()

            if hasattr(self, 'video_timer'):

                self.video_timer.stop()

            # 初始化 OpenCV 视频捕获

            self.video_capture = cv2.VideoCapture(file_path)

            # 检查视频是否成功打开

            if not self.video_capture.isOpened():

                QMessageBox.warning(self, "错误", "无法打开视频文件！")

                return

            self.isprocess = False

            self.isPrcVideo = True

            self.video_timer.start(30)  # 根据帧率设置定时器间隔

    def update_frame(self):

        # 读取下一帧

        ret, frame = self.video_capture.read()

        if ret:

            # 将 OpenCV 帧转换为 QImage

            if self.isprocess:

                results = self.isegment(frame)

                frame = results.plot_im

                # 存储处理后的帧

                self.processed_frames.append(frame.copy())

                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            else:

                frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

            h, w, ch = frame.shape

            bytes_per_line = ch * w

            q_image = QImage(frame.data, w, h, bytes_per_line, QImage.Format_RGB888)

            self.video_pixmap = QPixmap.fromImage(q_image)

            self.original_display.setPixmap(self.video_pixmap.scaled(

                self.original_display.width(), self.original_display.height(),

                Qt.KeepAspectRatio, Qt.SmoothTransformation

            ))

        else:

            self.video_timer.stop()

            pass

    def process(self):

        if self.isPrcVideo:

            if hasattr(self, 'video_capture') and self.video_capture is not None:

                # 重置视频到第一帧

                self.video_capture.set(cv2.CAP_PROP_POS_FRAMES, 0)

                self.processed_frames = []

                self.isprocess = True

                if hasattr(self, 'video_timer'):

                    self.video_timer.stop()

                self.video_timer.start(30)

            else:

                QMessageBox.warning(self, "错误", "请先选择视频！")

        else:

            if self.im_path:

                im0 = cv2.imread(self.im_path)

                results = self.isegment(im0)

                self.img_result = results.plot_im

                img_result2 = cv2.cvtColor(self.img_result, cv2.COLOR_BGR2RGB)

                h, w, ch = img_result2.shape

                bytes_per_line = ch * w

                q_image = QImage(img_result2.data, w, h, bytes_per_line, QImage.Format_RGB888)

                self.video_pixmap = QPixmap.fromImage(q_image)

                self.original_display.setPixmap(self.video_pixmap.scaled(

                    self.original_display.width(), self.original_display.height(),

                    Qt.KeepAspectRatio, Qt.SmoothTransformation

                ))

            else:

                QMessageBox.warning(self, "错误", "请先选择图片或视频！")

    def save_result(self):

        if self.isPrcVideo:

            if len(self.processed_frames) > 0:

                # 弹出保存视频对话框

                file_path, _ = QFileDialog.getSaveFileName(self, "保存视频", "", "Video Files (*.mp4 *.avi *.mov)")

                if file_path:

                    # 获取第一帧的尺寸

                    height, width, _ = self.processed_frames[0].shape

                    # 初始化视频写入器

                    fourcc = cv2.VideoWriter_fourcc(*'mp4v')

                    video_writer = cv2.VideoWriter(file_path, fourcc, 30.0, (width, height))

                    # 写入所有帧

                    for frame in self.processed_frames:

                        video_writer.write(frame)

                    # 释放资源

                    video_writer.release()

                    QMessageBox.information(self, "成功", "视频保存成功！")

            else:

                QMessageBox.warning(self, "错误", "请先处理视频！")

        else:

            if self.im_path:

                file_path, _ = QFileDialog.getSaveFileName(self, "保存图片", "", "Image Files (*.png *.jpg *.jpeg)")

                if file_path:

                    cv2.imwrite(file_path, self.img_result)

                    QMessageBox.information(self, "成功", "图片保存成功！")

            else:

                QMessageBox.warning(self, "错误", "请先选择图片或视频！")

if __name__ == "__main__":

    app = QApplication(sys.argv)

    window = MainWindow()

    window.show()

    sys.exit(app.exec_())

安装依赖

pip install pyqt5 opencv-python ultralytics
下载YOLO模型：Ultralytics会自动下载"yolo11n-seg.pt"。

总结

这个工具展示了YOLO在实时目标跟踪和分割上的强大能力，代码简洁，易扩展。如果你有GPU，可以试试更大模型提升性能。有问题随时交流！