如何解决用coreML实现的实时检测目标应用程序的响应时间长的问题?

  1. 问题遇到的现象和发生背景
    swift小白一枚,想用coreML来实现一个可以实时检测目标的app。
    目前遇到两个问题
    1,无法快速检测目标,在面对目标不动的前提下,需要大概30-40s左右才会给出反馈。
    2,检测完一个目标,无法检测下一个目标。检测完第一个目标后,画面卡住,无法去检测第二个目标。
    配置如下
    OS:MacOS12.5.1
    开发工具:Xcode 14.1
    开发语言:Swift
  1. ###### 代码如下
import UIKit
import AVFoundation
import Vision
import CoreML

class ViewController: UIViewController, AVCaptureVideoDataOutputSampleBufferDelegate {

    @IBOutlet weak var cameraView: UIView!
    @IBOutlet weak var faceImageView: UIImageView!
    @IBOutlet weak var resultLabel: UILabel!

    var ciImage: CIImage?
    var captureLayer: AVCaptureVideoPreviewLayer?

    override func viewDidLoad() {
        super.viewDidLoad()
        setupCamera()
    }

    override func viewDidLayoutSubviews() {
        captureLayer?.frame = cameraView.bounds
    }

    func setupCamera() {
        let session = AVCaptureSession()
        captureLayer = AVCaptureVideoPreviewLayer(session: session)
        cameraView.layer.addSublayer(captureLayer!)

        guard let device = AVCaptureDevice.default(for: .video) else { return }
        guard let input = try? AVCaptureDeviceInput(device: device) else { return }
        let output = AVCaptureVideoDataOutput()
        output.setSampleBufferDelegate(self, queue: DispatchQueue(label: "camera"))
        session.addInput(input)
        session.addOutput(output)
        session.startRunning()
    }

    func captureOutput(_ output: AVCaptureOutput, didOutput sampleBuffer: CMSampleBuffer, from connection: AVCaptureConnection) {
        if connection.videoOrientation != .portrait {
            connection.videoOrientation = .portrait
            return
        }
        guard let buffer = CMSampleBufferGetImageBuffer(sampleBuffer) else { return }
        ciImage = CIImage(cvImageBuffer: buffer)
        faceDetection(buffer)
    }

    func faceDetection(_ buffer: CVImageBuffer) {
        let request = VNDetectFaceRectanglesRequest { (request, error) in
            guard let results = request.results as? [VNFaceObservation] else { return }
            if let image = self.ciImage, let result = results.first {
                let face = self.getFaceCGImage(image: image, face: result)
                if let cg = face {
                    self.showPreview(cgImage: cg)
                    self.scanImage(cgImage: cg)
                }
            }
        }

        let handler = VNImageRequestHandler(cvPixelBuffer: buffer, options: [:])
        try? handler.perform([request])
    }

    func getFaceCGImage(image: CIImage, face: VNFaceObservation) -> CGImage? {
        let imageSize = image.extent.size

        let box = face.boundingBox.scaledForCropping(to: imageSize)
        guard image.extent.contains(box) else {
            return nil
        }

        let size = CGFloat(300.0)

        let transform = CGAffineTransform(
            scaleX: size / box.size.width,
            y: size / box.size.height
        )
        let faceImage = image.cropped(to: box).transformed(by: transform)

        let ctx = CIContext()
        guard let cgImage = ctx.createCGImage(faceImage, from: faceImage.extent) else {
            assertionFailure()
            return nil
        }
        return cgImage
    }

    private func showPreview(cgImage: CGImage) {
        let uiImage = UIImage(cgImage: cgImage)
        DispatchQueue.main.async {
            self.faceImageView.image = uiImage
        }
    }

    func scanImage(cgImage: CGImage) {
        let image = CIImage(cgImage: cgImage)

        guard let model = try? VNCoreMLModel(for: ArashiClassifier().model) else { return }
        let request = VNCoreMLRequest(model: model) { request, error in
            guard let results = request.results as? [VNClassificationObservation] else { return }
            guard let mostConfidentResult = results.first else { return }

            DispatchQueue.main.async {
                self.resultLabel.text = mostConfidentResult.identifier
            }
        }
        let requestHandler = VNImageRequestHandler(ciImage: image, options: [:])
        try? requestHandler.perform([request])
    }
}

extension CGRect {
    func scaledForCropping(to size: CGSize) -> CGRect {
        return CGRect(
            x: self.origin.x * size.width,
            y: self.origin.y * size.height,
            width: (self.size.width * size.width),
            height: (self.size.height * size.height)
        )
    }
}
  1. 运行结果及报错内容
    可正常运行,但反馈极慢。
  1. 我想要达到的结果
    我希望可以达到2点。
    1,检测目标后,快速得到反馈。
    2,检测完第一个目标后,可以继续检测其他目标,并给出反馈。

我也小白,粗看代码逻辑是没问题,设置Camera,delegate里面获取buffer然后检测。
1:我做相机的时候,拍照所需要的settings是不能复用的,否则会空指针,这里好像没看到新settings?所以有可能你的第二次检测就卡住
2:有没有用过Time Profiler进行耗时时间检测?贴出卡顿操作