Github Copilot 实战: 从零开始用AI写一个OCR工具 (3)

源码

https://github.com/densen2014/Blazor100/tree/master/AI/MiOcr

添加一个屏幕截图功能,显示截图起始点,结束点,截图区域,按键ESC取消截图

这里AI就比较中规中矩,很快就能得到我要的功能了.下面只简单贴一下代码

ScreenCaptureWindow.xaml

<Window x:Class="MiOcr.ScreenCaptureWindow"

        xmlns:x="http://schemas.microsoft.com/winfx/2006/xaml"

        xmlns="http://schemas.microsoft.com/winfx/2006/xaml/presentation"

        WindowStyle="None" AllowsTransparency="True" Background="#01000000"

        Topmost="True" ShowInTaskbar="False" WindowState="Maximized">

    <Canvas x:Name="CaptureCanvas">

        <TextBlock x:Name="StartCoordText"

               Foreground="Yellow"

               Background="#80000000"

               FontSize="14"

               Visibility="Collapsed"/>

        <TextBlock x:Name="CurrentCoordText"

               Foreground="Yellow"

               Background="#80000000"

               FontSize="14"

               Visibility="Collapsed"/>

        <TextBlock x:Name="SizeText"

               Foreground="Yellow"

               Background="#80000000"

               FontSize="14"

               Visibility="Collapsed"/>

    </Canvas>

</Window>

ScreenCaptureWindow.xaml.cs

using System.Windows;

using System.Windows.Controls;

using System.Windows.Input;

using System.Windows.Interop;

using System.Windows.Media;

using System.Windows.Media.Imaging;

using System.Windows.Shapes;

namespace MiOcr;

public partial class ScreenCaptureWindow : Window

{

    public Rect SelectedRect { get; private set; }

    public BitmapSource? CapturedImage { get; private set; }

    private System.Windows.Point? _start;

    private Rectangle? _rectShape;

    public ScreenCaptureWindow()

    {

        InitializeComponent();

        MouseLeftButtonDown += OnMouseDown;

        MouseMove += OnMouseMove;

        MouseLeftButtonUp += OnMouseUp;

        Cursor = Cursors.Cross;

        PreviewKeyDown += ScreenCaptureWindow_PreviewKeyDown;

        Focusable = true;

        Loaded += (s, e) => Keyboard.Focus(this);

    }

    private void ScreenCaptureWindow_PreviewKeyDown(object sender, KeyEventArgs e)

    {

        if (e.Key == Key.Escape)

        {

            CapturedImage = null;

            DialogResult = false;

            Close();

        }

    }

    private void PositionTextBlocks(double x, double y, double w, double h)

    {

        double margin = 8;

        double canvasWidth = CaptureCanvas.ActualWidth;

        double canvasHeight = CaptureCanvas.ActualHeight;

        // 先测量文本大小

        StartCoordText.Measure(new Size(double.PositiveInfinity, double.PositiveInfinity));

        SizeText.Measure(new Size(double.PositiveInfinity, double.PositiveInfinity));

        double startW = StartCoordText.DesiredSize.Width;

        double startH = StartCoordText.DesiredSize.Height;

        double sizeW = SizeText.DesiredSize.Width;

        double sizeH = SizeText.DesiredSize.Height;

        // 1. 左上优先

        double startX = x - startW - margin;

        double startY = y - startH - margin;

        if (startX >= 0 && startY >= 0)

        {

            Canvas.SetLeft(StartCoordText, startX);

            Canvas.SetTop(StartCoordText, startY);

            Canvas.SetLeft(SizeText, startX);

            Canvas.SetTop(SizeText, startY + startH + 4);

            return;

        }

        // 2. 右上

        startX = x + w + margin;

        startY = y - startH - margin;

        if (startX + startW <= canvasWidth && startY >= 0)

        {

            Canvas.SetLeft(StartCoordText, startX);

            Canvas.SetTop(StartCoordText, startY);

            Canvas.SetLeft(SizeText, startX);

            Canvas.SetTop(SizeText, startY + startH + 4);

            return;

        }

        // 3. 左下

        startX = x - startW - margin;

        startY = y + h + margin;

        if (startX >= 0 && startY + startH + sizeH + 4 <= canvasHeight)

        {

            Canvas.SetLeft(StartCoordText, startX);

            Canvas.SetTop(StartCoordText, startY);

            Canvas.SetLeft(SizeText, startX);

            Canvas.SetTop(SizeText, startY + startH + 4);

            return;

        }

        // 4. 右下

        startX = x + w + margin;

        startY = y + h + margin;

        if (startX + startW <= canvasWidth && startY + startH + sizeH + 4 <= canvasHeight)

        {

            Canvas.SetLeft(StartCoordText, startX);

            Canvas.SetTop(StartCoordText, startY);

            Canvas.SetLeft(SizeText, startX);

            Canvas.SetTop(SizeText, startY + startH + 4);

            return;

        }

        // 5. 屏幕内兜底

        Canvas.SetLeft(StartCoordText, Math.Max(margin, Math.Min(canvasWidth - startW - margin, x)));

        Canvas.SetTop(StartCoordText, Math.Max(margin, Math.Min(canvasHeight - startH - margin, y)));

        Canvas.SetLeft(SizeText, Math.Max(margin, Math.Min(canvasWidth - sizeW - margin, x)));

        Canvas.SetTop(SizeText, Math.Max(margin, Math.Min(canvasHeight - sizeH - margin, y + startH + 4)));

    }

    private void OnMouseDown(object sender, MouseButtonEventArgs e)

    {

        _start = e.GetPosition(this);

        _rectShape = new Rectangle

        {

            Stroke = Brushes.Red,

            StrokeThickness = 2,

            Fill = new SolidColorBrush(Color.FromArgb(40, 0, 0, 255))

        };

        CaptureCanvas.Children.Add(_rectShape);

        Canvas.SetLeft(_rectShape, _start.Value.X);

        Canvas.SetTop(_rectShape, _start.Value.Y);

        StartCoordText.Text = $"起点: ({(int)_start.Value.X}, {(int)_start.Value.Y})";

        StartCoordText.Visibility = Visibility.Visible;

        CurrentCoordText.Text = $"当前: ({(int)_start.Value.X}, {(int)_start.Value.Y})";

        CurrentCoordText.Visibility = Visibility.Visible;

        SizeText.Text = $"大小: 0 x 0";

        SizeText.Visibility = Visibility.Visible;

        // 初始位置

        PositionTextBlocks(_start.Value.X, _start.Value.Y, 0, 0);

    }

    private void OnMouseMove(object sender, MouseEventArgs e)

    {

        if (_start.HasValue && _rectShape != null)

        {

            var pos = e.GetPosition(this);

            double x = Math.Min(_start.Value.X, pos.X);

            double y = Math.Min(_start.Value.Y, pos.Y);

            double w = Math.Abs(_start.Value.X - pos.X);

            double h = Math.Abs(_start.Value.Y - pos.Y);

            Canvas.SetLeft(_rectShape, x);

            Canvas.SetTop(_rectShape, y);

            _rectShape.Width = w;

            _rectShape.Height = h;

            // 更新当前点坐标

            CurrentCoordText.Text = $"当前: ({(int)pos.X}, {(int)pos.Y})";

            Canvas.SetLeft(CurrentCoordText, pos.X + 2);

            Canvas.SetTop(CurrentCoordText, pos.Y + 2);

            // 更新区域大小

            SizeText.Text = $"大小: {(int)w} x {(int)h}";

            // 动态调整文本位置

            PositionTextBlocks(x, y, w, h);

        }

    }

    private void OnMouseUp(object sender, MouseButtonEventArgs e)

    {

        if (_start.HasValue && _rectShape != null)

        {

            var end = e.GetPosition(this);

            double x = Math.Min(_start.Value.X, end.X);

            double y = Math.Min(_start.Value.Y, end.Y);

            double w = Math.Abs(_start.Value.X - end.X);

            double h = Math.Abs(_start.Value.Y - end.Y);

            SelectedRect = new Rect(x, y, w, h);

            // 隐藏坐标

            StartCoordText.Visibility = Visibility.Collapsed;

            CurrentCoordText.Visibility = Visibility.Collapsed;

            // 隐藏区域大小

            SizeText.Visibility = Visibility.Collapsed;

            // 截图

            CapturedImage = CaptureScreenArea(SelectedRect);

            DialogResult = true;

            Close();

        }

    }

    private BitmapSource CaptureScreenArea(Rect rect)

    {

        double dpiScale = NativeMethods.GetDpiScale(this);

        int x = (int)(rect.X * dpiScale);

        int y = (int)(rect.Y * dpiScale);

        int w = (int)(rect.Width * dpiScale);

        int h = (int)(rect.Height * dpiScale);

        IntPtr hdcSrc = NativeMethods.GetDC(IntPtr.Zero);

        IntPtr hdcDest = NativeMethods.CreateCompatibleDC(hdcSrc);

        IntPtr hBitmap = NativeMethods.CreateCompatibleBitmap(hdcSrc, w, h);

        IntPtr hOld = NativeMethods.SelectObject(hdcDest, hBitmap);

        NativeMethods.BitBlt(hdcDest, 0, 0, w, h, hdcSrc, x, y, 0x00CC0020); // SRCCOPY

        NativeMethods.SelectObject(hdcDest, hOld);

        NativeMethods.DeleteDC(hdcDest);

        NativeMethods.ReleaseDC(IntPtr.Zero, hdcSrc);

        try

        {

            var source = Imaging.CreateBitmapSourceFromHBitmap(

                hBitmap, IntPtr.Zero, Int32Rect.Empty, BitmapSizeOptions.FromEmptyOptions());

            source.Freeze();

            return source;

        }

        finally

        {

            NativeMethods.DeleteObject(hBitmap);

        }

    } 

}

截图api,不走system.draw

NativeMethods.cs

using System.Windows;

namespace MiOcr;

public static class NativeMethods

{

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]

    public static extern bool DeleteObject(IntPtr hObject);

    [System.Runtime.InteropServices.DllImport("user32.dll")]

    public static extern IntPtr GetDC(IntPtr hWnd);

    [System.Runtime.InteropServices.DllImport("user32.dll")]

    public static extern int ReleaseDC(IntPtr hWnd, IntPtr hDC);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]

    public static extern IntPtr CreateCompatibleDC(IntPtr hdc);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]

    public static extern IntPtr CreateCompatibleBitmap(IntPtr hdc, int nWidth, int nHeight);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]

    public static extern IntPtr SelectObject(IntPtr hdc, IntPtr hgdiobj);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]

    public static extern bool BitBlt(IntPtr hdcDest, int nXDest, int nYDest, int nWidth, int nHeight,

        IntPtr hdcSrc, int nXSrc, int nYSrc, int dwRop);

    [System.Runtime.InteropServices.DllImport("gdi32.dll")]

    public static extern bool DeleteDC(IntPtr hdc);

    /// <summary>

    /// 获取指定窗口的 DPI 缩放比例。

    /// </summary>

    /// <param name="window"></param>

    /// <returns></returns>

    public static double GetDpiScale(Window window)

    {

        var source = PresentationSource.FromVisual(window);

        if (source?.CompositionTarget != null)

        {

            return source.CompositionTarget.TransformToDevice.M11; // X 方向缩放

        }

        return 1.0;

    }

}

最后主界面添加按钮

        <Button Width="140" Height="44"

                Background="#4361EE"

                Foreground="White"

                FontWeight="SemiBold"

                FontSize="16"

                HorizontalAlignment="Left"

                VerticalAlignment="Top"

                Margin="180,20,20,20"

                Click="CaptureScreenButton_Click">

            <Button.Effect>

                <DropShadowEffect ShadowDepth="1" BlurRadius="8" Opacity="0.25" Color="#4361EE"/>

            </Button.Effect>

            <Button.Content>

                <StackPanel Orientation="Horizontal" VerticalAlignment="Center">

                    <TextBlock Text=""

                       FontFamily="Segoe MDL2 Assets"

                       FontSize="20"

                       Margin="0,0,8,0"

                       VerticalAlignment="Center"/>

                    <TextBlock Text="截屏"

                       FontSize="16"

                       VerticalAlignment="Center"/>

                </StackPanel>

            </Button.Content>

        </Button>

执行OCR初始化检查模型是否存在, 不存在则回调一个正在初始化提示给UI, 免得用户等待不耐烦

可以通过先检查模型文件是否已存在本地，如果不存在则通知UI“正在初始化/下载模型”，再进行下载。你可以这样实现：

检查模型本地路径是否存在。
如果不存在，调用UI回调（如事件、Action、委托等）提示“正在初始化”。
然后异步下载模型。

假设你用 Action? onStatusChanged 作为回调参数，代码如下：

 // 获取模型本地路径

 var modelInfo = OnlineFullModels.ChineseV3;

 string modelDir = modelInfo.LocalDir;

 if (!Directory.Exists(modelDir) || !File.Exists(Path.Combine(modelDir, "model.pdmodel")))

 {

     onStatusChanged?.Invoke("正在初始化OCR模型，请稍候...");

 }

A: modelInfo.LocalDir; 不存在

A: CreateLocalAsync 方法不存在, 改为异步超时回调UI

这个步骤花了一点时间,因为原本的库就不存在这个功能,所以最终走到这样的代码

明白。你可以通过以下方式实现“异步超时回调UI”：

• 在下载模型前，先启动一个定时任务（如 Task.Delay），如果模型下载还未完成且超时，则回调 UI 提示“正在初始化”。

• 如果模型下载提前完成，则取消提示。

下面是推荐实现方式：

public async Task<(List<string> strings, PaddleOcrResult result)> StartOCR(

    Mat src,

    Action<string>? onStatusChanged = null,

    int timeoutMs = 1500)

{

    PaddleOcrResult result;

    var resultText = new List<string>();

    var modelInfo = OnlineFullModels.ChineseV3;

    // 启动超时提示任务

    using var cts = new CancellationTokenSource();

    var timeoutTask = Task.Delay(timeoutMs, cts.Token)

        .ContinueWith(t =>

        {

            if (!t.IsCanceled)

            {

                onStatusChanged?.Invoke("正在初始化OCR模型，请稍候...");

            }

        });

    // 下载模型（如果已存在会跳过下载）

    FullOcrModel model = await modelInfo.DownloadAsync();

    cts.Cancel(); // 下载完成，取消超时提示

    using (PaddleOcrAll all = new(model)

    {

        AllowRotateDetection = true,

        Enable180Classification = true,

    })

    {

        Stopwatch sw = Stopwatch.StartNew();

        result = all.Run(src);

        Console.WriteLine($"elapsed={sw.ElapsedMilliseconds} ms");

        Console.WriteLine("Detected all texts: \n" + result.Text);

        foreach (PaddleOcrResultRegion region in result.Regions)

        {

            Console.WriteLine($"Text: {region.Text}, Score: {region.Score}, RectCenter: {region.Rect.Center}, RectSize: {region.Rect.Size}, Angle: {region.Rect.Angle}");

            resultText.Add(region.Text);

        }

    }

    src.Dispose();

    return (resultText, result);

}

说明：

• 增加了 timeoutMs 参数（默认1.5秒），可根据需要调整。

• 如果模型下载超过指定时间，UI 会收到“正在初始化OCR模型，请稍候...”提示。

• 如果模型提前下载完成，则不会回调提示。

这样可以优雅地实现异步超时回调UI，无需依赖不存在的 CreateLocalAsync 方法。

StartOCR 处理一下 onStatusChanged 回调, 下载初始化模型反馈友好提示

可以将 PaddleOCRService.StartOCR 的 onStatusChanged 回调参数传递到 UI 层，并在模型下载耗时时友好提示用户。

下面是推荐做法：

PaddleOCRService.cs 已实现超时回调（见前述建议）。
MainWindow.xaml.cs 需要在调用 StartOCR 时传递回调，并在 UI 上显示提示。

步骤一：修改 RunOcrAndDraw，传递 onStatusChanged

将 RunOcrAndDraw 方法修改如下，利用 Dispatcher.Invoke 保证 UI 线程安全：

private async Task RunOcrAndDraw(byte[] imageBytes)

{

    ProcessingText.Visibility = Visibility.Visible;

    ProcessingText.Text = "正在识别...";

    var ocr = new PaddleOCRService();

    var results = await ocr.StartOCR(imageBytes, msg =>

    {

        // 友好提示，确保在UI线程

        Dispatcher.Invoke(() =>

        {

            ProcessingText.Text = msg;

            ProcessingText.Visibility = Visibility.Visible;

        });

    });

    ProcessingText.Visibility = Visibility.Collapsed;

    if (results.strings == null || results.strings.Count == 0)

    {

        OcrTextBox.Text = "未识别到文本";

        return;

    }

    OcrTextBox.Text = string.Join(Environment.NewLine, results.result.Regions.Select(r => r.Text));

    _lastOcrResult = results.result;

    var drawed = DrawOcrResultsOnImage(_currentImage, results.result);

    MainImage.Source = drawed;

}

步骤二：确保 PaddleOCRService.cs 的 StartOCR 支持 onStatusChanged

如前所述，PaddleOCRService.cs 需如下实现：

public async Task<(List<string> strings, PaddleOcrResult result)> StartOCR(

    Mat src,

    Action<string>? onStatusChanged = null,

    int timeoutMs = 1500)

{

    PaddleOcrResult result;

    var resultText = new List<string>();

    var modelInfo = OnlineFullModels.ChineseV3;

    using var cts = new CancellationTokenSource();

    var timeoutTask = Task.Delay(timeoutMs, cts.Token)

        .ContinueWith(t =>

        {

            if (!t.IsCanceled)

            {

                onStatusChanged?.Invoke("正在初始化OCR模型，请稍候...");

            }

        });

    FullOcrModel model = await modelInfo.DownloadAsync();

    cts.Cancel();

    using (PaddleOcrAll all = new(model)

    {

        AllowRotateDetection = true,

        Enable180Classification = true,

    })

    {

        Stopwatch sw = Stopwatch.StartNew();

        result = all.Run(src);

        foreach (PaddleOcrResultRegion region in result.Regions)

        {

            resultText.Add(region.Text);

        }

    }

    src.Dispose();

    return (resultText, result);

}

这样，用户在模型首次下载或初始化时会看到“正在初始化OCR模型，请稍候...”，其余时间显示“正在识别...”，体验更友好。